added more configuration options for search:

- removed configuration button for 'search only for admin' from index.html and added this to ConfigPortal
- added configuration of link verification options (iffresh, cacheonly, nocache, ifexist) to ConfigPortal
- added configuration of navigation options to ConfigPortal
- added an option to switch off automatic index cleaning in case that a link verification method fails


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7613 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent e0c7d490f9
commit ba03ca8620

@ -723,6 +723,30 @@ search.result.show.metadata = true
search.result.show.parser = true search.result.show.parser = true
search.result.show.pictures = true search.result.show.pictures = true
# search navigators: comma-separated list of default values for search navigation.
# can be temporary different if search string is given with differen navigation values
# assigning no value(s) means that no navigation is shown
search.navigation=hosts,authors,namespace,topics
# search result verification and snippet fetch caching rules
# each search result can be verified byloading the link from the web
# this can be enhanced using a cache. In some cases it may be appropriate
# to not verify the link at all and do not compute a snippet
# the possible cases are:
# nocache: no use of web cache, load all snippets online
# iffresh: use the cache if the cache exists and is fresh otherwise load online
# ifexist: use the cache if the cache exist or load online
# cacheonly: never go online, use all content from cache. If no cache entry exist,
# consider content nevertheless as available and show result without snippet
# false: no link verification and not snippet generation:
all search results are valid without verification
search.verify = iffresh
# in case that a link verification fails then the corresponding index reference can be
# deleted to clean up the index. If this property is set then failed index verification in
# the cases of nocache, iffresh and ifexist causes an index deletion
search.verify.delete = true
# remote search details # remote search details
remotesearch.maxcount = 20 remotesearch.maxcount = 20
remotesearch.maxtime = 1000 remotesearch.maxtime = 1000

@ -15,7 +15,7 @@
and a link to a home page that is reached when the 'corporate identity'-images are clicked. and a link to a home page that is reached when the 'corporate identity'-images are clicked.
To change also colours and styles use the <a href="ConfigAppearance_p.html">Appearance Servlet</a> for different skins and languages. To change also colours and styles use the <a href="ConfigAppearance_p.html">Appearance Servlet</a> for different skins and languages.
</p> </p>
<form action="ConfigPortal.html" id="ConfigPortal" accept-charset="UTF-8"> <form action="ConfigPortal.html" method="post" enctype="multipart/form-data" id="ConfigPortal" accept-charset="UTF-8">
<fieldset> <fieldset>
<dl> <dl>
<dt>Greeting Line</dt> <dt>Greeting Line</dt>
@ -30,6 +30,12 @@
<dt>URL of a Large Corporate Image</dt> <dt>URL of a Large Corporate Image</dt>
<dd><input type="text" name="promoteSearchPageGreeting.largeImage" value="#[promoteSearchPageGreeting.largeImage]#" size="60" /></dd> <dd><input type="text" name="promoteSearchPageGreeting.largeImage" value="#[promoteSearchPageGreeting.largeImage]#" size="60" /></dd>
<dt>Enable Search for Everyone?</dt>
<dd>
<input type="radio" name="publicSearchpage" value="true" #(publicSearchpage)#::checked="checked"#(/publicSearchpage)# />Search is available for everyone&nbsp;
<input type="radio" name="publicSearchpage" value="false" #(publicSearchpage)#checked="checked"::#(/publicSearchpage)# />Only the administator is allowed to search
</dd>
<dt>Show Navigation Bar on Search Page?</dt> <dt>Show Navigation Bar on Search Page?</dt>
<dd> <dd>
<input type="radio" name="publicTopmenu" value="true" #(publicTopmenu)#::checked="checked"#(/publicTopmenu)# />Show Navigation Top-Menu&nbsp; <input type="radio" name="publicTopmenu" value="true" #(publicTopmenu)#::checked="checked"#(/publicTopmenu)# />Show Navigation Top-Menu&nbsp;
@ -42,6 +48,16 @@
<input type="radio" name="search.options" value="false" #(search.options)#checked="checked"::#(/search.options)# />do not show Advanced Search <input type="radio" name="search.options" value="false" #(search.options)#checked="checked"::#(/search.options)# />do not show Advanced Search
</dd> </dd>
<dt>Snippet Fetch Strategy &amp; Link Verification</dt>
<dd>
<input type="radio" name="search.verify" value="nocache" #(search.verify.nocache)#::checked="checked"#(/search.verify.nocache)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/>NOCACHE: no use of web cache, load all snippets online<br/>
<input type="radio" name="search.verify" value="iffresh" #(search.verify.iffresh)#::checked="checked"#(/search.verify.iffresh)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/>IFFRESH: use the cache if the cache exists and is fresh otherwise load online<br/>
<input type="radio" name="search.verify" value="ifexist" #(search.verify.ifexist)#::checked="checked"#(/search.verify.ifexist)# onclick="document.getElementById('search_verify_delete').disabled=false;document.getElementById('search_verify_delete').checked=true;"/>IFEXIST: use the cache if the cache exist or load online<br/>
<input type="checkbox" name="search.verify.delete" id="search_verify_delete" value="true" #(search.verify.delete)#::checked="checked"#(/search.verify.delete)# />If verification fails, delete index reference<br/><br/>
<input type="radio" name="search.verify" value="cacheonly" #(search.verify.cacheonly)#::checked="checked"#(/search.verify.cacheonly)# onclick="document.getElementById('search_verify_delete').disabled=true;document.getElementById('search_verify_delete').checked=false;"/>CACHEONLY: never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available and show result without snippet<br/>
<input type="radio" name="search.verify" value="false" #(search.verify.false)#::checked="checked"#(/search.verify.false)# onclick="document.getElementById('search_verify_delete').disabled=true;document.getElementById('search_verify_delete').checked=false;"/>FALSE: no link verification and not snippet generation: all search results are valid without verification
</dd>
<dt>Show Information Links for each Search Result Entry</dt> <dt>Show Information Links for each Search Result Entry</dt>
<dd> <dd>
<input type="checkbox" name="search.result.show.date" value="true" #(search.result.show.date)#::checked="checked"#(/search.result.show.date)# />Date&nbsp; <input type="checkbox" name="search.result.show.date" value="true" #(search.result.show.date)#::checked="checked"#(/search.result.show.date)# />Date&nbsp;
@ -51,6 +67,14 @@
<input type="checkbox" name="search.result.show.pictures" value="true" #(search.result.show.pictures)#::checked="checked"#(/search.result.show.pictures)# />Pictures <input type="checkbox" name="search.result.show.pictures" value="true" #(search.result.show.pictures)#::checked="checked"#(/search.result.show.pictures)# />Pictures
</dd> </dd>
<dt>Show Navigation on Side-Bar</dt>
<dd>
<input type="checkbox" name="search.navigation.hosts" value="true" #(search.navigation.hosts)#::checked="checked"#(/search.navigation.hosts)# />Host Navigation&nbsp;
<input type="checkbox" name="search.navigation.authors" value="true" #(search.navigation.authors)#::checked="checked"#(/search.navigation.authors)# />Author Navigation&nbsp;
<input type="checkbox" name="search.navigation.namespace" value="true" #(search.navigation.namespace)#::checked="checked"#(/search.navigation.namespace)# />Wiki Name-Space Navigation&nbsp;
<input type="checkbox" name="search.navigation.topics" value="true" #(search.navigation.topics)#::checked="checked"#(/search.navigation.topics)# />Topics (Tag-Cloud) Navigation&nbsp;
</dd>
<dt>Default Pop-Up Page</dt> <dt>Default Pop-Up Page</dt>
<dd> <dd>
<input type="radio" name="popup" value="status" #(popupStatus)#::checked="checked"#(/popupStatus)# />Status Page&nbsp; <input type="radio" name="popup" value="status" #(popupStatus)#::checked="checked"#(/popupStatus)# />Status Page&nbsp;

@ -71,12 +71,23 @@ public class ConfigPortal {
sb.setConfig(SwitchboardConstants.INDEX_FORWARD, post.get(SwitchboardConstants.INDEX_FORWARD, "")); sb.setConfig(SwitchboardConstants.INDEX_FORWARD, post.get(SwitchboardConstants.INDEX_FORWARD, ""));
HTTPDFileHandler.indexForward = post.get(SwitchboardConstants.INDEX_FORWARD, ""); HTTPDFileHandler.indexForward = post.get(SwitchboardConstants.INDEX_FORWARD, "");
sb.setConfig("publicTopmenu", post.getBoolean("publicTopmenu", true)); sb.setConfig("publicTopmenu", post.getBoolean("publicTopmenu", true));
sb.setConfig("publicSearchpage", post.getBoolean("publicSearchpage", true));
sb.setConfig("search.options", post.getBoolean("search.options", false)); sb.setConfig("search.options", post.getBoolean("search.options", false));
sb.setConfig("search.result.show.date", post.getBoolean("search.result.show.date", false)); sb.setConfig("search.result.show.date", post.getBoolean("search.result.show.date", false));
sb.setConfig("search.result.show.size", post.getBoolean("search.result.show.size", false)); sb.setConfig("search.result.show.size", post.getBoolean("search.result.show.size", false));
sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata", false)); sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata", false));
sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser", false)); sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser", false));
sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures", false)); sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures", false));
sb.setConfig("search.verify", post.get("search.verify", "ifexist"));
sb.setConfig("search.verify.delete", post.getBoolean("search.verify.delete", false));
// construct navigation String
String nav = "";
if (post.getBoolean("search.navigation.hosts", false)) nav += "hosts,";
if (post.getBoolean("search.navigation.authors", false)) nav += "authors,";
if (post.getBoolean("search.navigation.namespace", false)) nav += "namespace,";
if (post.getBoolean("search.navigation.topics", false)) nav += "topics,";
if (nav.endsWith(",")) nav = nav.substring(0, nav.length() - 1);
sb.setConfig("search.navigation", nav);
} }
if (post.containsKey("searchpage_default")) { if (post.containsKey("searchpage_default")) {
sb.setConfig(SwitchboardConstants.GREETING, "P2P Web Search"); sb.setConfig(SwitchboardConstants.GREETING, "P2P Web Search");
@ -88,12 +99,16 @@ public class ConfigPortal {
HTTPDFileHandler.indexForward = ""; HTTPDFileHandler.indexForward = "";
sb.setConfig(SwitchboardConstants.SEARCH_TARGET, "_self"); sb.setConfig(SwitchboardConstants.SEARCH_TARGET, "_self");
sb.setConfig("publicTopmenu", true); sb.setConfig("publicTopmenu", true);
sb.setConfig("publicSearchpage", true);
sb.setConfig("search.navigation", "hosts,authors,namespace,topics");
sb.setConfig("search.options", true); sb.setConfig("search.options", true);
sb.setConfig("search.result.show.date", true); sb.setConfig("search.result.show.date", true);
sb.setConfig("search.result.show.size", true); sb.setConfig("search.result.show.size", true);
sb.setConfig("search.result.show.metadata", true); sb.setConfig("search.result.show.metadata", true);
sb.setConfig("search.result.show.parser", true); sb.setConfig("search.result.show.parser", true);
sb.setConfig("search.result.show.pictures", true); sb.setConfig("search.result.show.pictures", true);
sb.setConfig("search.verify", "iffresh");
sb.setConfig("search.verify.delete", "true");
} }
} }
@ -103,13 +118,27 @@ public class ConfigPortal {
prop.putHTML(SwitchboardConstants.GREETING_SMALL_IMAGE, sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, "")); prop.putHTML(SwitchboardConstants.GREETING_SMALL_IMAGE, sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, ""));
prop.putHTML(SwitchboardConstants.INDEX_FORWARD, sb.getConfig(SwitchboardConstants.INDEX_FORWARD, "")); prop.putHTML(SwitchboardConstants.INDEX_FORWARD, sb.getConfig(SwitchboardConstants.INDEX_FORWARD, ""));
prop.put("publicTopmenu", sb.getConfigBool("publicTopmenu", false) ? 1 : 0); prop.put("publicTopmenu", sb.getConfigBool("publicTopmenu", false) ? 1 : 0);
prop.put("publicSearchpage", sb.getConfigBool("publicSearchpage", false) ? 1 : 0);
prop.put("search.options", sb.getConfigBool("search.options", false) ? 1 : 0); prop.put("search.options", sb.getConfigBool("search.options", false) ? 1 : 0);
prop.put("search.result.show.date", sb.getConfigBool("search.result.show.date", false) ? 1 : 0); prop.put("search.result.show.date", sb.getConfigBool("search.result.show.date", false) ? 1 : 0);
prop.put("search.result.show.size", sb.getConfigBool("search.result.show.size", false) ? 1 : 0); prop.put("search.result.show.size", sb.getConfigBool("search.result.show.size", false) ? 1 : 0);
prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0); prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0);
prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0); prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0);
prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0); prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0);
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts") >= 0 ? 1 : 0);
prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors") >= 0 ? 1 : 0);
prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace") >= 0 ? 1 : 0);
prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics") >= 0 ? 1 : 0);
prop.put("search.verify.nocache", sb.getConfig("search.verify", "").equals("nocache") ? 1 : 0);
prop.put("search.verify.iffresh", sb.getConfig("search.verify", "").equals("iffresh") ? 1 : 0);
prop.put("search.verify.ifexist", sb.getConfig("search.verify", "").equals("ifexist") ? 1 : 0);
prop.put("search.verify.cacheonly", sb.getConfig("search.verify", "").equals("cacheonly") ? 1 : 0);
prop.put("search.verify.false", sb.getConfig("search.verify", "").equals("false") ? 1 : 0);
prop.put("search.verify.delete", sb.getConfigBool("search.verify.delete", true) ? 1 : 0);
final String browserPopUpPage = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_PAGE, "ConfigBasic.html"); final String browserPopUpPage = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_PAGE, "ConfigBasic.html");
prop.put("popupFront", 0); prop.put("popupFront", 0);
prop.put("popupSearch", 0); prop.put("popupSearch", 0);

@ -49,7 +49,7 @@
<fieldset class="maininput"> <fieldset class="maininput">
<input name="query" id="search" type="text" size="52" maxlength="80" value="#[former]#" /> <input name="query" id="search" type="text" size="52" maxlength="80" value="#[former]#" />
<input type="submit" name="Enter" value="Search" /> <input type="submit" name="Enter" value="Search" />
<input type="hidden" name="verify" value="true" /> <input type="hidden" name="verify" value="#[search.verify]#" />
#(searchdomswitches)#:: #(searchdomswitches)#::
<div class="yacysearch"> <div class="yacysearch">
#(searchtext)#::<input type="radio" id="text" name="contentdom" value="text" #(check)#::checked="checked"#(/check)# /><label for="text">Text</label>&nbsp;&nbsp;#(/searchtext)# #(searchtext)#::<input type="radio" id="text" name="contentdom" value="text" #(check)#::checked="checked"#(/check)# /><label for="text">Text</label>&nbsp;&nbsp;#(/searchtext)#
@ -59,7 +59,7 @@
#(searchapp)#::<input type="radio" id="app" name="contentdom" value="app" #(check)#::checked="checked"#(/check)# /><label for="app">Applications</label>#(/searchapp)# #(searchapp)#::<input type="radio" id="app" name="contentdom" value="app" #(check)#::checked="checked"#(/check)# /><label for="app">Applications</label>#(/searchapp)#
</div> </div>
#(/searchdomswitches)# #(/searchdomswitches)#
<input type="hidden" name="nav" value="all" /> <input type="hidden" name="nav" value="#[search.navigation]#" />
<input type="hidden" name="startRecord" value="0" /> <input type="hidden" name="startRecord" value="0" />
<input type="hidden" name="resource" value="global" /> <input type="hidden" name="resource" value="global" />
<input type="hidden" name="urlmaskfilter" value=".*" /> <input type="hidden" name="urlmaskfilter" value=".*" />
@ -70,7 +70,6 @@
#(searchoptions)#:: #(searchoptions)#::
</fieldset> </fieldset>
<p><a href="/index.html?searchoptions=2" onclick="this.href='/index.html?searchoptions=2&amp;former='+document.getElementById('searchform').search.value+'&amp;contentdom='+radioValue(document.getElementById('searchform').contentdom)">more options...</a></p> <p><a href="/index.html?searchoptions=2" onclick="this.href='/index.html?searchoptions=2&amp;former='+document.getElementById('searchform').search.value+'&amp;contentdom='+radioValue(document.getElementById('searchform').contentdom)">more options...</a></p>
<p><a href="http://www.yacy-websuche.de/wiki/index.php/En:SearchParameters">advanced parameters</a></p>
:: ::
</fieldset> </fieldset>
@ -132,26 +131,17 @@
<input type="checkbox" id="indexof" name="indexof" #[indexofChecked]# /> <label for="indexof">only index pages</label> <input type="checkbox" id="indexof" name="indexof" #[indexofChecked]# /> <label for="indexof">only index pages</label>
</td> </td>
</tr> </tr>
<tr><td></td><td></td></tr>
<tr>
<td>
<a href="http://www.yacy-websuche.de/wiki/index.php/En:SearchParameters">advanced parameters</a>
</td>
<td>
</td>
</tr>
</table> </table>
#(/searchoptions)# #(/searchoptions)#
</form> </form>
#(searchoptions)#::
<form action="index.html" method="get" class="search" accept-charset="UTF-8">
<p>
#(publicSearchpage)#
<button type="submit" name="publicPage" value="0">
<img src="/env/grafics/lock.gif" alt="authentication required" />
Disable search function for users without authorization
</button>
::
<button type="submit" name="publicPage" value="1">
<img src="/env/grafics/lock.gif" alt="authentication required" />
Enable web search to everyone
</button>
#(/publicSearchpage)#
</p>
</form>
#(/searchoptions)#
#(topmenu)# #(topmenu)#
#%env/templates/embeddedfooter.template%# #%env/templates/embeddedfooter.template%#
:: ::

@ -51,15 +51,12 @@ public class index {
} }
// access control // access control
boolean publicPage = sb.getConfigBool("publicSearchpage", true);
final boolean authorizedAccess = sb.verifyAuthentication(header, false); final boolean authorizedAccess = sb.verifyAuthentication(header, false);
if ((post != null) && (post.containsKey("publicPage"))) { if ((post != null) && (post.containsKey("publicPage"))) {
if (!authorizedAccess) { if (!authorizedAccess) {
prop.put("AUTHENTICATE", "admin log-in"); // force log-in prop.put("AUTHENTICATE", "admin log-in"); // force log-in
return prop; return prop;
} }
publicPage = post.get("publicPage", "0").equals("1");
sb.setConfig("publicSearchpage", publicPage);
} }
final boolean global = (post == null) ? true : post.get("resource", "global").equals("global"); final boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
@ -114,7 +111,6 @@ public class index {
prop.put("searchoptions_prefermaskoptions", "0"); prop.put("searchoptions_prefermaskoptions", "0");
prop.putHTML("searchoptions_prefermaskoptions_prefermaskfilter", prefermaskfilter); prop.putHTML("searchoptions_prefermaskoptions_prefermaskfilter", prefermaskfilter);
prop.put("searchoptions_indexofChecked", ""); prop.put("searchoptions_indexofChecked", "");
prop.put("searchoptions_publicSearchpage", (publicPage) ? "0" : "1");
prop.put("results", ""); prop.put("results", "");
prop.putHTML("cat", cat); prop.putHTML("cat", cat);
prop.put("type", type); prop.put("type", type);
@ -132,6 +128,8 @@ public class index {
prop.put("searchdomswitches_searchvideo_check", (contentdom == ContentDomain.VIDEO) ? "1" : "0"); prop.put("searchdomswitches_searchvideo_check", (contentdom == ContentDomain.VIDEO) ? "1" : "0");
prop.put("searchdomswitches_searchimage_check", (contentdom == ContentDomain.IMAGE) ? "1" : "0"); prop.put("searchdomswitches_searchimage_check", (contentdom == ContentDomain.IMAGE) ? "1" : "0");
prop.put("searchdomswitches_searchapp_check", (contentdom == ContentDomain.APP) ? "1" : "0"); prop.put("searchdomswitches_searchapp_check", (contentdom == ContentDomain.APP) ? "1" : "0");
prop.put("search.navigation", sb.getConfig("search.navigation", "all") );
prop.put("search.verify", sb.getConfig("search.verify", "iffresh") );
// online caution timing // online caution timing
sb.localSearchLastAccess = System.currentTimeMillis(); sb.localSearchLastAccess = System.currentTimeMillis();

@ -121,9 +121,9 @@ $(function() {
<input type="hidden" name="former" value="#[former]#" /> <input type="hidden" name="former" value="#[former]#" />
<input type="hidden" name="maximumRecords" value="#[count]#" /> <input type="hidden" name="maximumRecords" value="#[count]#" />
<input type="hidden" name="startRecord" value="#[offset]#" /> <input type="hidden" name="startRecord" value="#[offset]#" />
<input type="hidden" name="verify" value="#[verify]#" /> <input type="hidden" name="verify" value="#[search.verify]#" />
<input type="hidden" name="resource" value="#[resource]#" /> <input type="hidden" name="resource" value="#[resource]#" />
<input type="hidden" name="nav" value="all" /> <input type="hidden" name="nav" value="#[search.navigation]#" />
<input type="hidden" name="urlmaskfilter" value="#[urlmaskfilter]#" /> <input type="hidden" name="urlmaskfilter" value="#[urlmaskfilter]#" />
<input type="hidden" name="prefermaskfilter" value="#[prefermaskfilter]#" /> <input type="hidden" name="prefermaskfilter" value="#[prefermaskfilter]#" />
<input type="hidden" name="depth" value="#[depth]#" /> <input type="hidden" name="depth" value="#[depth]#" />

@ -131,7 +131,8 @@ public class yacysearch {
prop.put("constraint", ""); prop.put("constraint", "");
prop.put("cat", "href"); prop.put("cat", "href");
prop.put("depth", "0"); prop.put("depth", "0");
prop.put("verify", (post == null) ? "true" : post.get("verify", "true")); prop.put("search.verify", (post == null) ? sb.getConfig("search.verify", "iffresh") : post.get("verify", "iffresh"));
prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
prop.put("contentdom", "text"); prop.put("contentdom", "text");
prop.put("contentdomCheckText", "1"); prop.put("contentdomCheckText", "1");
prop.put("contentdomCheckAudio", "0"); prop.put("contentdomCheckAudio", "0");
@ -403,7 +404,7 @@ public class yacysearch {
} }
// navigation // navigation
final String navigation = (post == null) ? "" : post.get("nav", ""); final String navigation = (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
// the query // the query
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute final TreeSet<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
@ -707,7 +708,8 @@ public class yacysearch {
prop.putHTML("prefermaskfilter", prefermask); prop.putHTML("prefermaskfilter", prefermask);
prop.put("indexof", (indexof) ? "on" : "off"); prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", (constraint == null) ? "" : constraint.exportB64()); prop.put("constraint", (constraint == null) ? "" : constraint.exportB64());
prop.put("verify", snippetFetchStrategy == null ? "false" : snippetFetchStrategy.toName()); prop.put("search.verify", snippetFetchStrategy == null ? sb.getConfig("search.verify", "iffresh") : snippetFetchStrategy.toName());
prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text"))); prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text")));
prop.put("searchdomswitches", sb.getConfigBool("search.text", true) || sb.getConfigBool("search.audio", true) || sb.getConfigBool("search.video", true) || sb.getConfigBool("search.image", true) || sb.getConfigBool("search.app", true) ? 1 : 0); prop.put("searchdomswitches", sb.getConfigBool("search.text", true) || sb.getConfigBool("search.audio", true) || sb.getConfigBool("search.video", true) || sb.getConfigBool("search.image", true) || sb.getConfigBool("search.app", true) ? 1 : 0);
prop.put("searchdomswitches_searchtext", sb.getConfigBool("search.text", true) ? 1 : 0); prop.put("searchdomswitches_searchtext", sb.getConfigBool("search.text", true) ? 1 : 0);

@ -174,7 +174,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
return (WordReferenceVars) word; return (WordReferenceVars) word;
} }
public boolean hasTextSnippet() { public boolean hasTextSnippet() {
return (this.textSnippet != null) && (this.textSnippet.getErrorCode() < 11); return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
} }
public boolean hasMediaSnippets() { public boolean hasMediaSnippets() {
return (this.mediaSnippets != null) && (!this.mediaSnippets.isEmpty()); return (this.mediaSnippets != null) && (!this.mediaSnippets.isEmpty());

@ -66,6 +66,7 @@ public class ResultFetcher {
long urlRetrievalAllTime; long urlRetrievalAllTime;
long snippetComputationAllTime; long snippetComputationAllTime;
int taketimeout; int taketimeout;
private final boolean deleteIfSnippetFail;
public ResultFetcher( public ResultFetcher(
final LoaderDispatcher loader, final LoaderDispatcher loader,
@ -73,7 +74,8 @@ public class ResultFetcher {
final QueryParams query, final QueryParams query,
final yacySeedDB peers, final yacySeedDB peers,
final WorkTables workTables, final WorkTables workTables,
final int taketimeout) { final int taketimeout,
final boolean deleteIfSnippetFail) {
assert query != null; assert query != null;
this.loader = loader; this.loader = loader;
this.rankingProcess = rankedCache; this.rankingProcess = rankedCache;
@ -81,6 +83,7 @@ public class ResultFetcher {
this.peers = peers; this.peers = peers;
this.workTables = workTables; this.workTables = workTables;
this.taketimeout = taketimeout; this.taketimeout = taketimeout;
this.deleteIfSnippetFail = deleteIfSnippetFail;
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
@ -399,9 +402,9 @@ public class ResultFetcher {
Integer.MAX_VALUE, Integer.MAX_VALUE,
!query.isLocal()); !query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime; final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
if (snippet.getErrorCode() < 11) { if (!snippet.getErrorCode().fail()) {
// we loaded the file and found the snippet // we loaded the file and found the snippet
return new ResultEntry(page, query.getSegment(), peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached return new ResultEntry(page, query.getSegment(), peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached
} else if (cacheStrategy.mustBeOffline()) { } else if (cacheStrategy.mustBeOffline()) {
@ -411,7 +414,7 @@ public class ResultFetcher {
} else { } else {
// problems with snippet fetch // problems with snippet fetch
String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
this.workTables.failURLsRegisterMissingWord(query.getSegment().termIndex(), metadata.url(), query.queryHashes, reason); if (deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(query.getSegment().termIndex(), metadata.url(), query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason); Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
return null; return null;
} }
@ -430,7 +433,7 @@ public class ResultFetcher {
} else { } else {
// problems with snippet fetch // problems with snippet fetch
String reason = "no media snippet"; String reason = "no media snippet";
this.workTables.failURLsRegisterMissingWord(query.getSegment().termIndex(), metadata.url(), query.queryHashes, reason); if (deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(query.getSegment().termIndex(), metadata.url(), query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason); Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
return null; return null;
} }

@ -91,7 +91,8 @@ public final class SearchEvent {
final int remote_maxcount, final int remote_maxcount,
final long remote_maxtime, final long remote_maxtime,
final int burstRobinsonPercent, final int burstRobinsonPercent,
final int burstMultiwordPercent) { final int burstMultiwordPercent,
final boolean deleteIfSnippetFail) {
if (MemoryControl.available() < 1024 * 1024 * 100) SearchEventCache.cleanupEvents(true); if (MemoryControl.available() < 1024 * 1024 * 100) SearchEventCache.cleanupEvents(true);
this.eventTime = System.currentTimeMillis(); // for lifetime check this.eventTime = System.currentTimeMillis(); // for lifetime check
this.peers = peers; this.peers = peers;
@ -155,7 +156,7 @@ public final class SearchEvent {
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 3000); this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 3000, deleteIfSnippetFail);
} else { } else {
// do a local search // do a local search
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation); this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
@ -199,7 +200,7 @@ public final class SearchEvent {
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 500); this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 500, deleteIfSnippetFail);
} }
// clean up events // clean up events

@ -130,7 +130,8 @@ public class SearchEventCache {
} }
if (event == null) { if (event == null) {
// start a new event // start a new event
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, burstRobinsonPercent, burstMultiwordPercent); boolean delete = Switchboard.getSwitchboard() == null | Switchboard.getSwitchboard().getConfigBool("search.verify.delete", true);
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, burstRobinsonPercent, burstMultiwordPercent, delete);
} }
return event; return event;

@ -1264,7 +1264,7 @@ public final class Switchboard extends serverSwitch {
File infile = new File(this.surrogatesInPath, s); File infile = new File(this.surrogatesInPath, s);
if (!infile.exists() || !infile.canWrite() || !infile.canRead()) return false; if (!infile.exists() || !infile.canWrite() || !infile.canRead()) return false;
File outfile = new File(this.surrogatesOutPath, s); File outfile = new File(this.surrogatesOutPath, s);
if (outfile.exists()) return false; //if (outfile.exists()) return false;
boolean moved = false; boolean moved = false;
if (s.endsWith("xml.zip")) { if (s.endsWith("xml.zip")) {
// open the zip file with all the xml files in it // open the zip file with all the xml files in it
@ -1332,7 +1332,7 @@ public final class Switchboard extends serverSwitch {
assert crawlStacker != null; assert crawlStacker != null;
final String urlRejectReason = crawlStacker.urlInAcceptedDomain(surrogate.getIdentifier(true)); final String urlRejectReason = crawlStacker.urlInAcceptedDomain(surrogate.getIdentifier(true));
if (urlRejectReason != null) { if (urlRejectReason != null) {
if (this.log.isFine()) this.log.logInfo("Rejected URL '" + surrogate.getIdentifier(true) + "': " + urlRejectReason); this.log.logWarning("Rejected URL '" + surrogate.getIdentifier(true) + "': " + urlRejectReason);
continue; continue;
} }

@ -56,17 +56,6 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
private static final int maxCache = 1000; private static final int maxCache = 1000;
public static final int SOURCE_CACHE = 0;
public static final int SOURCE_FILE = 1;
public static final int SOURCE_WEB = 2;
public static final int SOURCE_METADATA = 3;
public static final int ERROR_NO_HASH_GIVEN = 11;
public static final int ERROR_SOURCE_LOADING = 12;
public static final int ERROR_RESOURCE_LOADING = 13;
public static final int ERROR_PARSER_FAILED = 14;
public static final int ERROR_PARSER_NO_LINES = 15;
public static final int ERROR_NO_MATCH = 16;
/** /**
* <code>\\A[^\\p{L}\\p{N}].+</code> * <code>\\A[^\\p{L}\\p{N}].+</code>
@ -118,12 +107,32 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
public static final Cache snippetsCache = new Cache(); public static final Cache snippetsCache = new Cache();
public static enum ResultClass {
SOURCE_CACHE(false),
SOURCE_FILE(false),
SOURCE_WEB(false),
SOURCE_METADATA(false),
ERROR_NO_HASH_GIVEN(true),
ERROR_SOURCE_LOADING(true),
ERROR_RESOURCE_LOADING(true),
ERROR_PARSER_FAILED(true),
ERROR_PARSER_NO_LINES(true),
ERROR_NO_MATCH(true);
private final boolean fail;
private ResultClass(final boolean fail) {
this.fail = fail;
}
public boolean fail() {
return this.fail;
}
}
private byte[] urlhash; private byte[] urlhash;
private String line; private String line;
private String error; private String error;
private int errorCode; private ResultClass resultStatus;
public TextSnippet(final byte[] urlhash, final String line, final int errorCode, final String errortext) { public TextSnippet(final byte[] urlhash, final String line, final ResultClass errorCode, final String errortext) {
init(urlhash, line, errorCode, errortext); init(urlhash, line, errorCode, errortext);
} }
@ -132,12 +141,12 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
final DigestURI url = comp.url(); final DigestURI url = comp.url();
if (queryhashes.isEmpty()) { if (queryhashes.isEmpty()) {
//System.out.println("found no queryhashes for URL retrieve " + url); //System.out.println("found no queryhashes for URL retrieve " + url);
init(url.hash(), null, ERROR_NO_HASH_GIVEN, "no query hashes given"); init(url.hash(), null, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given");
return; return;
} }
// try to get snippet from snippetCache // try to get snippet from snippetCache
int source = SOURCE_CACHE; ResultClass source = ResultClass.SOURCE_CACHE;
final String wordhashes = yacySearch.set2string(queryhashes); final String wordhashes = yacySearch.set2string(queryhashes);
final String urls = UTF8.String(url.hash()); final String urls = UTF8.String(url.hash());
String snippetLine = snippetsCache.get(wordhashes, urls); String snippetLine = snippetsCache.get(wordhashes, urls);
@ -161,19 +170,19 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
boolean useMetadata = !objectWasInCache && !cacheStrategy.mustBeOffline(); boolean useMetadata = !objectWasInCache && !cacheStrategy.mustBeOffline();
if (useMetadata && containsAllHashes(loc = comp.dc_title(), queryhashes)) { if (useMetadata && containsAllHashes(loc = comp.dc_title(), queryhashes)) {
// try to create the snippet from information given in the url itself // try to create the snippet from information given in the url itself
init(url.hash(), loc, SOURCE_METADATA, null); init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return; return;
} else if (useMetadata && containsAllHashes(loc = comp.dc_creator(), queryhashes)) { } else if (useMetadata && containsAllHashes(loc = comp.dc_creator(), queryhashes)) {
// try to create the snippet from information given in the creator metadata // try to create the snippet from information given in the creator metadata
init(url.hash(), loc, SOURCE_METADATA, null); init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return; return;
} else if (useMetadata && containsAllHashes(loc = comp.dc_subject(), queryhashes)) { } else if (useMetadata && containsAllHashes(loc = comp.dc_subject(), queryhashes)) {
// try to create the snippet from information given in the subject metadata // try to create the snippet from information given in the subject metadata
init(url.hash(), loc, SOURCE_METADATA, null); init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return; return;
} else if (useMetadata && containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) { } else if (useMetadata && containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
// try to create the snippet from information given in the url // try to create the snippet from information given in the url
init(url.hash(), loc, SOURCE_METADATA, null); init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return; return;
} else { } else {
// try to load the resource from the cache // try to load the resource from the cache
@ -181,23 +190,23 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
if (response == null) { if (response == null) {
// in case that we did not get any result we can still return a success when we are not allowed to go online // in case that we did not get any result we can still return a success when we are not allowed to go online
if (cacheStrategy.mustBeOffline()) { if (cacheStrategy.mustBeOffline()) {
init(url.hash(), null, ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry"); init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry");
return; return;
} }
// if it is still not available, report an error // if it is still not available, report an error
init(url.hash(), null, ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry"); init(url.hash(), null, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry");
return; return;
} }
if (!objectWasInCache) { if (!objectWasInCache) {
// place entry on indexing queue // place entry on indexing queue
Switchboard.getSwitchboard().toIndexer(response); Switchboard.getSwitchboard().toIndexer(response);
source = SOURCE_WEB; source = ResultClass.SOURCE_WEB;
} }
} }
} catch (final Exception e) { } catch (final Exception e) {
//Log.logException(e); //Log.logException(e);
init(url.hash(), null, ERROR_SOURCE_LOADING, "error loading resource: " + e.getMessage()); init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "error loading resource: " + e.getMessage());
return; return;
} }
@ -208,11 +217,11 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
try { try {
document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse()); document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
} catch (final Parser.Failure e) { } catch (final Parser.Failure e) {
init(url.hash(), null, ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
return; return;
} }
if (document == null) { if (document == null) {
init(url.hash(), null, ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
return; return;
} }
@ -224,7 +233,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// compute snippet from text // compute snippet from text
final Collection<StringBuilder> sentences = document.getSentences(pre); final Collection<StringBuilder> sentences = document.getSentences(pre);
if (sentences == null) { if (sentences == null) {
init(url.hash(), null, ERROR_PARSER_NO_LINES, "parser returned no sentences"); init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
return; return;
} }
final SnippetExtractor tsr; final SnippetExtractor tsr;
@ -235,7 +244,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
textline = tsr.getSnippet(); textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords(); remainingHashes = tsr.getRemainingWords();
} catch (UnsupportedOperationException e) { } catch (UnsupportedOperationException e) {
init(url.hash(), null, ERROR_NO_MATCH, "no matching snippet found"); init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
return; return;
} }
@ -254,7 +263,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
if (textline != null) snippetLine += (snippetLine.length() == 0) ? textline : "<br />" + textline; if (textline != null) snippetLine += (snippetLine.length() == 0) ? textline : "<br />" + textline;
if (snippetLine == null || !remainingHashes.isEmpty()) { if (snippetLine == null || !remainingHashes.isEmpty()) {
init(url.hash(), null, ERROR_NO_MATCH, "no matching snippet found"); init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
return; return;
} }
if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength); if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);
@ -266,10 +275,10 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
init(url.hash(), snippetLine, source, null); init(url.hash(), snippetLine, source, null);
} }
private void init(final byte[] urlhash, final String line, final int errorCode, final String errortext) { private void init(final byte[] urlhash, final String line, final ResultClass errorCode, final String errortext) {
this.urlhash = urlhash; this.urlhash = urlhash;
this.line = line; this.line = line;
this.errorCode = errorCode; this.resultStatus = errorCode;
this.error = errortext; this.error = errortext;
} }
@ -285,8 +294,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
return (error == null) ? "" : error.trim(); return (error == null) ? "" : error.trim();
} }
public int getErrorCode() { public ResultClass getErrorCode() {
return errorCode; return resultStatus;
} }
public String getLineMarked(final HandleSet queryHashes) { public String getLineMarked(final HandleSet queryHashes) {

Loading…
Cancel
Save