enhanced did-you-mean (a bit): can now remember previously searched

words (plus small enhancements)
pull/1/head
orbiter 12 years ago
parent a725a4242f
commit 940c6849ee

@ -58,7 +58,7 @@ public class suggest {
final String ext = header.get("EXT", ""); final String ext = header.get("EXT", "");
final boolean json = ext.equals("json"); final boolean json = ext.equals("json");
final boolean xml = ext.equals("xml"); final boolean xml = ext.equals("xml");
final boolean more = post != null && post.containsKey("more"); final boolean more = sb.index.connectedRWI() || (post != null && post.containsKey("more")); // with RWIs connected the guessing is super-fast
// get query // get query
final String originalquerystring = (post == null) ? "" : post.get("query", post.get("q", "")).trim(); final String originalquerystring = (post == null) ? "" : post.get("query", post.get("q", "")).trim();

@ -26,6 +26,7 @@ import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.Collection;
import java.util.ConcurrentModificationException; import java.util.ConcurrentModificationException;
import java.util.HashSet; import java.util.HashSet;
import java.util.Map; import java.util.Map;
@ -219,7 +220,7 @@ public class WordCache {
} }
} }
public static void learn(Set<String> wordset) { public static void learn(Collection<String> wordset) {
for (String s: wordset) { for (String s: wordset) {
learn(new StringBuilder(s)); learn(new StringBuilder(s));
} }
@ -320,8 +321,12 @@ public class WordCache {
} }
return size; return size;
} }
public static int sizeCommonWords() {
return commonWords.size();
}
public static void clear() { public static void clearCommonWords() {
commonWords.clear(); commonWords.clear();
} }

@ -432,7 +432,7 @@ public class DidYouMean {
StringBuilder s; StringBuilder s;
try { try {
while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) { while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) {
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.segment.getWordCountGuess(s.toString()) > 0) { if (s.length() >= MinimumOutputWordLength && DidYouMean.this.segment.getWordCountGuess(s.toString()) > 2) {
DidYouMean.this.resultSet.add(s); DidYouMean.this.resultSet.add(s);
} }
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) { if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {

@ -93,7 +93,7 @@ public class ResourceObserver {
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
this.sb.trail.clear(); this.sb.trail.clear();
Switchboard.urlBlacklist.clearblacklistCache(); Switchboard.urlBlacklist.clearblacklistCache();
WordCache.clear(); WordCache.clearCommonWords();
Domains.clear(); Domains.clear();
} }
} }

@ -2045,7 +2045,7 @@ public final class Switchboard extends serverSwitch {
PDFont.clearResources(); // eats up megabytes, see http://markmail.org/thread/quk5odee4hbsauhu PDFont.clearResources(); // eats up megabytes, see http://markmail.org/thread/quk5odee4hbsauhu
// clear caches // clear caches
WordCache.clear(); if (WordCache.sizeCommonWords() > 1000) WordCache.clearCommonWords();
Domains.clear(); Domains.clear();
// clean up image stack // clean up image stack

@ -287,15 +287,16 @@ public class Segment {
*/ */
public int getWordCountGuess(String word) { public int getWordCountGuess(String word) {
if (word == null || word.indexOf(':') >= 0 || word.indexOf(' ') >= 0 || word.indexOf('/') >= 0) return 0; if (word == null || word.indexOf(':') >= 0 || word.indexOf(' ') >= 0 || word.indexOf('/') >= 0) return 0;
if (this.termIndex == null) { if (this.termIndex != null) {
try { int count = this.termIndex.count(Word.word2hash(word));
return (int) this.fulltext.getDefaultConnector().getQueryCount(CollectionSchema.text_t.getSolrFieldName() + ':' + word); if (count > 0) return count;
} catch (Throwable e) { }
Log.logException(e); try {
return 0; return (int) this.fulltext.getDefaultConnector().getQueryCount(CollectionSchema.text_t.getSolrFieldName() + ':' + word);
} } catch (Throwable e) {
Log.logException(e);
return 0;
} }
return this.termIndex.count(Word.word2hash(word));
} }
public boolean exists(final String urlhash) { public boolean exists(final String urlhash) {

@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Map; import java.util.Map;
import java.util.SortedSet; import java.util.SortedSet;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaDeclaration; import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.SolrType; import net.yacy.cora.federate.solr.SolrType;
@ -92,6 +93,9 @@ public class QueryGoal {
for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words, this.all_words); for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words, this.all_words);
for (String s: this.exclude_strings) parseQuery(s, this.exclude_words, this.exclude_words, this.all_words); for (String s: this.exclude_strings) parseQuery(s, this.exclude_words, this.exclude_words, this.all_words);
WordCache.learn(this.include_strings);
WordCache.learn(this.exclude_strings);
this.include_hashes = null; this.include_hashes = null;
this.exclude_hashes = null; this.exclude_hashes = null;
this.all_hashes = null; this.all_hashes = null;

Loading…
Cancel
Save