less String object creation during search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7756 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent ab5a16b957
commit 87082f407e

@ -447,7 +447,7 @@ public class IndexControlRWIs_p {
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", BlockRank.ranking(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", BlockRank.ranking(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(ASCII.String(entry.hash(), 6, 6)));
prop.put("genUrlList_urlList_"+i+"_urlExists_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(entry.word().lastModified()))); prop.put("genUrlList_urlList_"+i+"_urlExists_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(entry.word().lastModified())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());

@ -384,7 +384,7 @@ public class yacysearch {
while (sitehost.endsWith(".")) { while (sitehost.endsWith(".")) {
sitehost = sitehost.substring(0, sitehost.length() - 1); sitehost = sitehost.substring(0, sitehost.length() - 1);
} }
sitehash = DigestURI.domhash(sitehost); sitehash = DigestURI.hosthash(sitehost);
} }
final int heuristicScroogle = querystring.indexOf("heuristic:scroogle"); final int heuristicScroogle = querystring.indexOf("heuristic:scroogle");

@ -226,7 +226,7 @@ public final class RankingProcess extends Thread {
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++; //this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
// check site constraints // check site constraints
String hosthash = new String(iEntry.urlhash(), 6, 6); String hosthash = iEntry.hosthash();
if (query.sitehash == null) { if (query.sitehash == null) {
// no site constraint there; maybe collect host navigation information // no site constraint there; maybe collect host navigation information
if (nav_hosts && query.urlMask_isCatchall) { if (nav_hosts && query.urlMask_isCatchall) {
@ -330,13 +330,13 @@ public final class RankingProcess extends Thread {
} }
// check doubledom // check doubledom
final String domhash = ASCII.String(rwi.getElement().urlhash(), 6, 6); final String hosthash = rwi.getElement().hosthash();
synchronized (this.doubleDomCache) { synchronized (this.doubleDomCache) {
m = this.doubleDomCache.get(domhash); m = this.doubleDomCache.get(hosthash);
if (m == null) { if (m == null) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned // first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); m = new WeakPriorityBlockingQueue<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
this.doubleDomCache.put(domhash, m); this.doubleDomCache.put(hosthash, m);
return rwi; return rwi;
} }
// second appearances of dom // second appearances of dom
@ -374,7 +374,7 @@ public final class RankingProcess extends Thread {
if (bestEntry == null) return null; if (bestEntry == null) return null;
// finally remove the best entry from the doubledom cache // finally remove the best entry from the doubledom cache
m = this.doubleDomCache.get(ASCII.String(bestEntry.getElement().urlhash()).substring(6)); m = this.doubleDomCache.get(bestEntry.getElement().hosthash());
bestEntry = m.poll(); bestEntry = m.poll();
} }
return bestEntry; return bestEntry;

@ -163,7 +163,7 @@ public class ReferenceOrder {
if (max == null) max = iEntry.clone(); else max.max(iEntry); if (max == null) max = iEntry.clone(); else max.max(iEntry);
out.put(iEntry); // must be after the min/max check to prevent that min/max is null in cardinal() out.put(iEntry); // must be after the min/max check to prevent that min/max is null in cardinal()
// update domcount // update domcount
dom = ASCII.String(iEntry.urlhash(), 6, 6); dom = iEntry.hosthash();
count = doms0.get(dom); count = doms0.get(dom);
if (count == null) { if (count == null) {
doms0.put(dom, int1); doms0.put(dom, int1);
@ -194,8 +194,9 @@ public class ReferenceOrder {
} }
} }
public int authority(final byte[] urlHash) { public int authority(final String hostHash) {
return (doms.get(ASCII.String(urlHash, 6, 6)) << 8) / (1 + this.maxdomcount); assert hostHash.length() == 6;
return (doms.get(hostHash) << 8) / (1 + this.maxdomcount);
} }
/** /**
@ -232,7 +233,7 @@ public class ReferenceOrder {
+ ((max.lother() == min.lother()) ? 0 : (((t.lother() - min.lother() ) << 8) / (max.lother() - min.lother()) ) << ranking.coeff_lother) + ((max.lother() == min.lother()) ? 0 : (((t.lother() - min.lother() ) << 8) / (max.lother() - min.lother()) ) << ranking.coeff_lother)
+ ((max.hitcount() == min.hitcount()) ? 0 : (((t.hitcount() - min.hitcount() ) << 8) / (max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount) + ((max.hitcount() == min.hitcount()) ? 0 : (((t.hitcount() - min.hitcount() ) << 8) / (max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount)
+ tf + tf
+ ((ranking.coeff_authority > 12) ? (authority(t.urlhash()) << ranking.coeff_authority) : 0) + ((ranking.coeff_authority > 12) ? (authority(t.hosthash()) << ranking.coeff_authority) : 0)
+ ((flags.get(WordReferenceRow.flag_app_dc_identifier)) ? 255 << ranking.coeff_appurl : 0) + ((flags.get(WordReferenceRow.flag_app_dc_identifier)) ? 255 << ranking.coeff_appurl : 0)
+ ((flags.get(WordReferenceRow.flag_app_dc_title)) ? 255 << ranking.coeff_app_dc_title : 0) + ((flags.get(WordReferenceRow.flag_app_dc_title)) ? 255 << ranking.coeff_app_dc_title : 0)
+ ((flags.get(WordReferenceRow.flag_app_dc_creator)) ? 255 << ranking.coeff_app_dc_creator : 0) + ((flags.get(WordReferenceRow.flag_app_dc_creator)) ? 255 << ranking.coeff_app_dc_creator : 0)

@ -59,7 +59,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
* @param host * @param host
* @return * @return
*/ */
public static String domhash(final String host) { public static String hosthash(final String host) {
String h = host; String h = host;
if (!h.startsWith("http://")) h = "http://" + h; if (!h.startsWith("http://")) h = "http://" + h;
DigestURI url = null; DigestURI url = null;

@ -74,7 +74,7 @@ public class WordReferenceFactory implements ReferenceFactory<WordReference> {
while (i.hasNext()) { while (i.hasNext()) {
iEntry = i.next(); iEntry = i.next();
if ((excludeContainer != null) && (excludeContainer.getReference(iEntry.urlhash()) != null)) continue; // do not include urls that are in excludeContainer if ((excludeContainer != null) && (excludeContainer.getReference(iEntry.urlhash()) != null)) continue; // do not include urls that are in excludeContainer
dom = ASCII.String(iEntry.urlhash(), 6, 6); dom = (iEntry instanceof WordReferenceVars) ? ((WordReferenceVars) iEntry).hosthash() : ASCII.String(iEntry.urlhash(), 6, 6);
mod = ASCII.String(iEntry.urlhash(), 0, 6); mod = ASCII.String(iEntry.urlhash(), 0, 6);
if ((paths = doms.get(dom)) == null) { if ((paths = doms.get(dom)) == null) {
doms.put(dom, new StringBuilder(30).append(mod)); doms.put(dom, new StringBuilder(30).append(mod));

@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
@ -58,6 +59,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
public long lastModified; public long lastModified;
public byte[] language; public byte[] language;
public byte[] urlHash; public byte[] urlHash;
private String hostHash = null;
public char type; public char type;
public int hitcount, llocal, lother, phrasesintext, public int hitcount, llocal, lother, phrasesintext,
posinphrase, posofphrase, posinphrase, posofphrase,
@ -271,6 +273,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return urlHash; return urlHash;
} }
public String hosthash() {
if (hostHash != null) return hostHash;
hostHash = ASCII.String(urlHash, 6, 6);
return hostHash;
}
public int urlcomps() { public int urlcomps() {
return urlcomps; return urlcomps;
} }

Loading…
Cancel
Save