diff --git a/build.properties b/build.properties index 58349756e..6a15e6b57 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.546 +releaseVersion=0.547 releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFileParentDir=yacy diff --git a/htroot/CrawlStartExpert_p.html b/htroot/CrawlStartExpert_p.html index 136dfeda5..f38d9c72b 100644 --- a/htroot/CrawlStartExpert_p.html +++ b/htroot/CrawlStartExpert_p.html @@ -72,13 +72,13 @@ : - - + + Use filter  |   + Restrict to start domain - This is an emacs-like regular expression that must match with the URLs which are used to be crawled. - Use this i.e. to crawl a single domain. If you set this filter it makes sense to increase - the crawling depth. + The filter is an emacs-like regular expression that must match with the URLs which are used to be crawled; default is 'catch all'. + You can also use an automatic domain-restriction to fully crawl a single domain. diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 393751991..3fc7043b5 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -104,15 +104,15 @@ public class WatchCrawler_p { boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start String newcrawlingfilter = post.get("crawlingFilter", ".*"); + if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted + env.setConfig("crawlingFilter", newcrawlingfilter); if (fullDomain) try { newcrawlingfilter = ".*" + (new yacyURL(post.get("crawlingURL",""), null)).getHost() + ".*"; } catch (MalformedURLException e) {} - if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted - env.setConfig("crawlingFilter", newcrawlingfilter); int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "8")); - if (fullDomain) newcrawlingdepth = 8; env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth)); + if ((fullDomain) && (newcrawlingdepth < 8)) newcrawlingdepth = 8; boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on"); int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1")); diff --git a/htroot/opensearchdescription.xml b/htroot/opensearchdescription.xml index d6290636d..4606d6ed5 100644 --- a/htroot/opensearchdescription.xml +++ b/htroot/opensearchdescription.xml @@ -1,5 +1,5 @@ - + YaCy/#[clientname]# YaCy.net - #[SearchPageGreeting]# http://#[thisaddress]#/env/grafics/yacy.gif diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 6f1cd7139..e5d7ab202 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -152,7 +152,7 @@ public class yacysearch { } if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {} - int count = post.getInt("count", 10); + int itemsPerPage = post.getInt("count", 10); int offset = post.getInt("offset", 0); boolean global = (post == null) ? true : post.get("resource", "global").equals("global"); final boolean indexof = post.get("indexof","").equals("on"); @@ -186,7 +186,7 @@ public class yacysearch { int contentdomCode = plasmaSearchQuery.contentdomParser(post.get("contentdom", "text")); // patch until better search profiles are available - if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 30)) count = 30; + if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (itemsPerPage <= 30)) itemsPerPage = 30; serverObjects prop = new serverObjects(); if (post.get("cat", "href").equals("href")) { @@ -257,7 +257,7 @@ public class yacysearch { prefermask, contentdomCode, true, - count, + itemsPerPage, offset, searchtime, urlmask, @@ -319,8 +319,9 @@ public class yacysearch { prop.put("num-results_totalcount", theSearch.getLocalCount() + theSearch.getGlobalCount()); prop.put("num-results_globalresults", 1); prop.put("num-results_globalresults_globalcount", theSearch.getGlobalCount()); - prop.put("num-results_offset", 0); + prop.put("num-results_offset", offset); prop.put("num-results_linkcount", 0); + prop.put("num-results_itemsPerPage", itemsPerPage); // compose page navigation StringBuffer resnav = new StringBuffer(); @@ -391,7 +392,7 @@ public class yacysearch { prop.putASIS("input_promoteSearchPageGreeting", promoteSearchPageGreeting); prop.put("input_former", querystring); prop.put("former", post.get("search", "")); - prop.put("input_count", count); + prop.put("input_count", itemsPerPage); prop.put("input_offset", offset); prop.put("input_resource", (global) ? "global" : "local"); prop.put("input_time", searchtime / 1000); diff --git a/htroot/yacysearch.rss b/htroot/yacysearch.rss index 6b56bd4ec..c5b4960f8 100644 --- a/htroot/yacysearch.rss +++ b/htroot/yacysearch.rss @@ -11,9 +11,9 @@ #[rssYacyImageURL]# Search for #[former]# - #[results]# - 1 - #[results]# + #[num-results_totalcount]# + #[num-results_offset]# + #[num-results_itemsPerPage]#