diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 1fe9dcf0b..1b9fc90e6 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -447,7 +447,7 @@ public class IndexControlRWIs_p { prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", BlockRank.ranking(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency()); - prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash())); + prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(ASCII.String(entry.hash(), 6, 6))); prop.put("genUrlList_urlList_"+i+"_urlExists_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(entry.word().lastModified()))); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext()); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 6e7bd6e38..2f763d89c 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -384,7 +384,7 @@ public class yacysearch { while (sitehost.endsWith(".")) { sitehost = sitehost.substring(0, sitehost.length() - 1); } - sitehash = DigestURI.domhash(sitehost); + sitehash = DigestURI.hosthash(sitehost); } final int heuristicScroogle = querystring.indexOf("heuristic:scroogle"); diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 493153daa..3d2a142d5 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -226,7 +226,7 @@ public final class RankingProcess extends Thread { //this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++; // check site constraints - String hosthash = new String(iEntry.urlhash(), 6, 6); + String hosthash = iEntry.hosthash(); if (query.sitehash == null) { // no site constraint there; maybe collect host navigation information if (nav_hosts && query.urlMask_isCatchall) { @@ -330,13 +330,13 @@ public final class RankingProcess extends Thread { } // check doubledom - final String domhash = ASCII.String(rwi.getElement().urlhash(), 6, 6); + final String hosthash = rwi.getElement().hosthash(); synchronized (this.doubleDomCache) { - m = this.doubleDomCache.get(domhash); + m = this.doubleDomCache.get(hosthash); if (m == null) { // first appearance of dom. we create an entry to signal that one of that domain was already returned m = new WeakPriorityBlockingQueue((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); - this.doubleDomCache.put(domhash, m); + this.doubleDomCache.put(hosthash, m); return rwi; } // second appearances of dom @@ -374,7 +374,7 @@ public final class RankingProcess extends Thread { if (bestEntry == null) return null; // finally remove the best entry from the doubledom cache - m = this.doubleDomCache.get(ASCII.String(bestEntry.getElement().urlhash()).substring(6)); + m = this.doubleDomCache.get(bestEntry.getElement().hosthash()); bestEntry = m.poll(); } return bestEntry; diff --git a/source/de/anomic/search/ReferenceOrder.java b/source/de/anomic/search/ReferenceOrder.java index 3961e447c..a82b70808 100644 --- a/source/de/anomic/search/ReferenceOrder.java +++ b/source/de/anomic/search/ReferenceOrder.java @@ -163,7 +163,7 @@ public class ReferenceOrder { if (max == null) max = iEntry.clone(); else max.max(iEntry); out.put(iEntry); // must be after the min/max check to prevent that min/max is null in cardinal() // update domcount - dom = ASCII.String(iEntry.urlhash(), 6, 6); + dom = iEntry.hosthash(); count = doms0.get(dom); if (count == null) { doms0.put(dom, int1); @@ -194,8 +194,9 @@ public class ReferenceOrder { } } - public int authority(final byte[] urlHash) { - return (doms.get(ASCII.String(urlHash, 6, 6)) << 8) / (1 + this.maxdomcount); + public int authority(final String hostHash) { + assert hostHash.length() == 6; + return (doms.get(hostHash) << 8) / (1 + this.maxdomcount); } /** @@ -232,7 +233,7 @@ public class ReferenceOrder { + ((max.lother() == min.lother()) ? 0 : (((t.lother() - min.lother() ) << 8) / (max.lother() - min.lother()) ) << ranking.coeff_lother) + ((max.hitcount() == min.hitcount()) ? 0 : (((t.hitcount() - min.hitcount() ) << 8) / (max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount) + tf - + ((ranking.coeff_authority > 12) ? (authority(t.urlhash()) << ranking.coeff_authority) : 0) + + ((ranking.coeff_authority > 12) ? (authority(t.hosthash()) << ranking.coeff_authority) : 0) + ((flags.get(WordReferenceRow.flag_app_dc_identifier)) ? 255 << ranking.coeff_appurl : 0) + ((flags.get(WordReferenceRow.flag_app_dc_title)) ? 255 << ranking.coeff_app_dc_title : 0) + ((flags.get(WordReferenceRow.flag_app_dc_creator)) ? 255 << ranking.coeff_app_dc_creator : 0) diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index 184689e02..b66ca15e1 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -59,7 +59,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable { * @param host * @return */ - public static String domhash(final String host) { + public static String hosthash(final String host) { String h = host; if (!h.startsWith("http://")) h = "http://" + h; DigestURI url = null; diff --git a/source/net/yacy/kelondro/data/word/WordReferenceFactory.java b/source/net/yacy/kelondro/data/word/WordReferenceFactory.java index 45800dd34..fa99371ec 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceFactory.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceFactory.java @@ -74,7 +74,7 @@ public class WordReferenceFactory implements ReferenceFactory { while (i.hasNext()) { iEntry = i.next(); if ((excludeContainer != null) && (excludeContainer.getReference(iEntry.urlhash()) != null)) continue; // do not include urls that are in excludeContainer - dom = ASCII.String(iEntry.urlhash(), 6, 6); + dom = (iEntry instanceof WordReferenceVars) ? ((WordReferenceVars) iEntry).hosthash() : ASCII.String(iEntry.urlhash(), 6, 6); mod = ASCII.String(iEntry.urlhash(), 0, 6); if ((paths = doms.get(dom)) == null) { doms.put(dom, new StringBuilder(30).append(mod)); diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index ff59353a2..ad043748c 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.Semaphore; +import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.order.Base64Order; @@ -58,6 +59,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc public long lastModified; public byte[] language; public byte[] urlHash; + private String hostHash = null; public char type; public int hitcount, llocal, lother, phrasesintext, posinphrase, posofphrase, @@ -271,6 +273,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return urlHash; } + public String hosthash() { + if (hostHash != null) return hostHash; + hostHash = ASCII.String(urlHash, 6, 6); + return hostHash; + } + public int urlcomps() { return urlcomps; }