From cdb8f3b10d15edc75f68c8170a967289f7c40981 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 8 Dec 2015 03:17:32 +0100 Subject: [PATCH] make current ranking score value avail. to search interface / api Update the result score result field with the result queue ranking value to reflect the actual calculated/used score, for rwi & solr stack results. (calc. etc. is unchanged, it's just that result entry carries the latest val as api retrieves the number from it) --- .../kelondro/data/meta/URIMetadataNode.java | 21 +++++++- source/net/yacy/search/query/SearchEvent.java | 48 +++++++++++-------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index bfa2325fb..b4f572967 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -151,7 +151,7 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable cs = new ArrayList(); cs.add(collection); this.setField(CollectionSchema.collection_sxt.name(), cs); @@ -166,8 +166,11 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable 0) + */ public float score() { return this.score; } + /** + * Set the YaCy ranking score to make it accessible in the search interface/api + * (should be set to the effective value of result queues getWeight) + * @param theScore YaCy ranking of search results + */ + public void setScore(float theScore) { + this.score = theScore; + } + public Date loaddate() { return getDate(CollectionSchema.load_date_dt); } diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 99d6c41d2..a93b112fa 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -978,9 +978,15 @@ public final class SearchEvent { this.urlhashes.putUnique(iEntry.hash()); rankingtryloop: while (true) { try { - long score = (long) Math.max(0, (1000000.0f * iEntry.score()) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly - //System.out.println("*** debug-score *** " + score + " for entry " + iEntry.urlstring()); - this.nodeStack.put(new ReverseElement(iEntry, score == 0 ? this.order.cardinal(iEntry) : score)); // inserts the element and removes the worst (which is smallest) + long score; + // determine nodestack ranking (will be altered by postranking) + // so far Solr score is used (with abitrary factor to get value similar to rwi ranking values) + Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result + if (scorex != null && scorex > 0) + score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly + else + score = this.order.cardinal(iEntry); + this.nodeStack.put(new ReverseElement(iEntry, score)); // inserts the element and removes the worst (which is smallest) break rankingtryloop; } catch (final ArithmeticException e ) { // this may happen if the concurrent normalizer changes values during cardinal computation @@ -1008,7 +1014,7 @@ public final class SearchEvent { * If the sjupDoubleDom option is selected, only different hosts are returned until no such rwi exists. * Then the best entry from domain stacks are returned. * @param skipDoubleDom - * @return a node from a rwi entry if one exist or null if not + * @return a node from a rwi entry if one exist or null if not (with score value set) */ private URIMetadataNode pullOneRWI(final boolean skipDoubleDom) { @@ -1119,7 +1125,7 @@ public final class SearchEvent { * the future by calling this.feedingIsFinished() * * @param skipDoubleDom should be true if it is wanted that double domain entries are skipped - * @return a metadata entry for a url + * @return a metadata entry for a url (with score value set) */ public URIMetadataNode pullOneFilteredFromRWI(final boolean skipDoubleDom) { // returns from the current RWI list the best URL entry and removes this entry from the list @@ -1324,8 +1330,8 @@ public final class SearchEvent { public boolean drainStacksToResult() { // we take one entry from both stacks at the same time boolean success = false; - Element localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null; - URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement(); + final Element localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null; + final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement(); if (node != null) { LinkedHashSet solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once if (solrsnippetlines != null && solrsnippetlines.size() > 0) { @@ -1341,16 +1347,16 @@ public final class SearchEvent { final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal()); final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal()); URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet); - addResult(re); + addResult(re, localEntryElement.getWeight()); success = true; } else { // we don't have a snippet from solr, try to get it in our way (by reloading, if necessary) if (SearchEvent.this.snippetFetchAlive.get() >= 10) { // too many concurrent processes - addResult(getSnippet(node, null)); + addResult(getSnippet(node, null), localEntryElement.getWeight()); success = true; } else { - final URIMetadataNode node1 = node; + new Thread() { @Override public void run() { @@ -1358,7 +1364,7 @@ public final class SearchEvent { try { SearchEvent.this.snippetFetchAlive.incrementAndGet(); try { - addResult(getSnippet(node1, SearchEvent.this.query.snippetCacheStrategy)); + addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight()); } catch (final Throwable e) {} finally { SearchEvent.this.snippetFetchAlive.decrementAndGet(); } @@ -1372,9 +1378,9 @@ public final class SearchEvent { } if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus()) { // too many concurrent processes - node = pullOneFilteredFromRWI(true); - if (node != null) { - addResult(getSnippet(node, null)); + final URIMetadataNode noderwi = pullOneFilteredFromRWI(true); + if (noderwi != null) { + addResult(getSnippet(noderwi, null), noderwi.score()); success = true; } } else { @@ -1383,11 +1389,11 @@ public final class SearchEvent { public void run() { SearchEvent.this.oneFeederStarted(); try { - final URIMetadataNode node = pullOneFilteredFromRWI(true); - if (node != null) { + final URIMetadataNode noderwi = pullOneFilteredFromRWI(true); + if (noderwi != null) { SearchEvent.this.snippetFetchAlive.incrementAndGet(); try { - addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy)); + addResult(getSnippet(noderwi, SearchEvent.this.query.snippetCacheStrategy), noderwi.score()); } catch (final Throwable e) { ConcurrentLog.logException(e); } finally { @@ -1406,12 +1412,14 @@ public final class SearchEvent { /** * place the result to the result vector and apply post-ranking - * @param resultEntry + * post-ranking is added to the current score, + * @param resultEntry to add + * @param score current ranking */ - public void addResult(URIMetadataNode resultEntry) { + public void addResult(URIMetadataNode resultEntry, final float score) { if (resultEntry == null) return; - float score = resultEntry.score(); final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/); + resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api this.resultList.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries. this.addTopics(resultEntry);