From 051a65f7afb5938bb83e7a89a8a838253a32a323 Mon Sep 17 00:00:00 2001 From: theli Date: Mon, 4 Jun 2007 05:27:46 +0000 Subject: [PATCH] *) Snippet fetching: Snippet are now fetched synchronous if the query parameter "fetchSnippet=" is appended to the query string on the yacy search page. This is required for the RSS feed. See: http://www.yacy-forum.de/viewtopic.php?t=4051 *) Small changes in the XSLT-stylesheet that is used to generate a html page from the RSS feed. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3787 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/xml/snippet.java | 4 ++-- htroot/yacysearch.java | 49 +++++++++++++++++++++++++++++++++++++---- htroot/yacysearch.rss | 22 ++++++++++-------- htroot/yacysearch.xsl | 13 ++++++----- 4 files changed, 68 insertions(+), 20 deletions(-) diff --git a/htroot/xml/snippet.java b/htroot/xml/snippet.java index c8836afaf..0194d7449 100644 --- a/htroot/xml/snippet.java +++ b/htroot/xml/snippet.java @@ -25,8 +25,8 @@ public class snippet { //get the timeout for snippet-fetching int mediasnippet_timeout = 15000; int textsnippet_timeout = 10000; - mediasnippet_timeout = Integer.parseInt((env.getConfig("timeout_text", "15000"))); - textsnippet_timeout = Integer.parseInt((env.getConfig("timeout_media", "10000"))); + mediasnippet_timeout = Integer.parseInt(env.getConfig("timeout_text", "15000")); + textsnippet_timeout = Integer.parseInt(env.getConfig("timeout_media", "10000")); // getting url String urlString = post.get("url", ""); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index fdd747edd..bf2e30426 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -52,6 +52,7 @@ import java.net.MalformedURLException; import java.net.URLEncoder; import java.util.HashMap; import java.util.Iterator; +import java.util.Set; import java.util.regex.PatternSyntaxException; import java.util.TreeSet; @@ -69,6 +70,7 @@ import de.anomic.plasma.plasmaSearchPreOrder; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSearchRankingProfile; import de.anomic.plasma.plasmaSearchTimingProfile; +import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaSearchResults; @@ -266,9 +268,10 @@ public class yacysearch { final boolean globalsearch = (global) && (yacyonline) && (!samesearch); // do the search + TreeSet queryHashes = plasmaCondenser.words2hashes(query[0]); plasmaSearchQuery thisSearch = new plasmaSearchQuery( querystring, - plasmaCondenser.words2hashes(query[0]), + queryHashes, plasmaCondenser.words2hashes(query[1]), maxDistance, prefermask, @@ -338,9 +341,47 @@ public class yacysearch { if (result.hasSnippet()) { prop.put("type_results_" + i + "_snippet", 1); prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(results.getQuery().queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java - } else { - prop.put("type_results_" + i + "_snippet", 0); - prop.put("type_results_" + i + "_snippet_text", ""); + } else { + if (post.containsKey("fetchSnippet")) { + /* fetch the snippet now */ + try { + // snippet fetch timeout + int textsnippet_timeout = Integer.parseInt(env.getConfig("timeout_media", "10000")); + + // boolean line_end_with_punctuation + boolean pre = post.get("pre", "false").equals("true"); + +// if 'remove' is set to true, then RWI references to URLs that do not have the snippet are removed + boolean remove = post.get("remove", "false").equals("true"); + + URL resultURL = new URL(result.getUrl()); + plasmaSnippetCache.TextSnippet snippet = sb.snippetCache.retrieveTextSnippet( + resultURL, + queryHashes, + true, + pre, + 260, + textsnippet_timeout + ); + + if (snippet.getErrorCode() < 11) { + // no problems occurred + //prop.put("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown"); + prop.putASIS("type_results_" + i + "_snippet_text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown"); + } else { + // problems with snippet fetch + prop.put("type_results_" + i + "_snippet_text", (remove) ? sb.snippetCache.failConsequences(snippet, queryHashes) : snippet.getError()); + } + prop.put("type_results_" + i + "_snippet", 1); + } catch (MalformedURLException e) { + prop.put("type_results_" + i + "_snippet", 0); + prop.put("type_results_" + i + "_snippet_text", ""); + } + } else { + /* no snippet available (will be fetched later via ajax) */ + prop.put("type_results_" + i + "_snippet", 0); + prop.put("type_results_" + i + "_snippet_text", ""); + } } prop.put("type_results", results.numResults()); prop.put("references", results.getReferences()); diff --git a/htroot/yacysearch.rss b/htroot/yacysearch.rss index 3659995ea..87bb1eea3 100644 --- a/htroot/yacysearch.rss +++ b/htroot/yacysearch.rss @@ -3,6 +3,10 @@ + YaCy P2P-Search for #[former]# Search for #[former]# @@ -11,21 +15,21 @@ Search for #[former]# #[type_results]# - 1 - #[type_results]# - diff --git a/htroot/yacysearch.xsl b/htroot/yacysearch.xsl index cd064375f..0eaf20038 100644 --- a/htroot/yacysearch.xsl +++ b/htroot/yacysearch.xsl @@ -1,6 +1,7 @@ + @@ -9,6 +10,7 @@ @@ -24,11 +26,12 @@ -

-
-
-
-

+
+

+

+

+

+
\ No newline at end of file