- This is an emacs-like regular expression that must match with the URLs which are used to be crawled.
- Use this i.e. to crawl a single domain. If you set this filter it makes sense to increase
- the crawling depth.
+ The filter is an emacs-like regular expression that must match with the URLs which are used to be crawled; default is 'catch all'.
+ You can also use an automatic domain-restriction to fully crawl a single domain.
diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java
index 393751991..3fc7043b5 100644
--- a/htroot/WatchCrawler_p.java
+++ b/htroot/WatchCrawler_p.java
@@ -104,15 +104,15 @@ public class WatchCrawler_p {
boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start
String newcrawlingfilter = post.get("crawlingFilter", ".*");
+ if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
+ env.setConfig("crawlingFilter", newcrawlingfilter);
if (fullDomain) try {
newcrawlingfilter = ".*" + (new yacyURL(post.get("crawlingURL",""), null)).getHost() + ".*";
} catch (MalformedURLException e) {}
- if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
- env.setConfig("crawlingFilter", newcrawlingfilter);
int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "8"));
- if (fullDomain) newcrawlingdepth = 8;
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
+ if ((fullDomain) && (newcrawlingdepth < 8)) newcrawlingdepth = 8;
boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));
diff --git a/htroot/opensearchdescription.xml b/htroot/opensearchdescription.xml
index d6290636d..4606d6ed5 100644
--- a/htroot/opensearchdescription.xml
+++ b/htroot/opensearchdescription.xml
@@ -1,5 +1,5 @@
-
+YaCy/#[clientname]#YaCy.net - #[SearchPageGreeting]#http://#[thisaddress]#/env/grafics/yacy.gif
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 6f1cd7139..e5d7ab202 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -152,7 +152,7 @@ public class yacysearch {
}
if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
- int count = post.getInt("count", 10);
+ int itemsPerPage = post.getInt("count", 10);
int offset = post.getInt("offset", 0);
boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
final boolean indexof = post.get("indexof","").equals("on");
@@ -186,7 +186,7 @@ public class yacysearch {
int contentdomCode = plasmaSearchQuery.contentdomParser(post.get("contentdom", "text"));
// patch until better search profiles are available
- if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 30)) count = 30;
+ if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (itemsPerPage <= 30)) itemsPerPage = 30;
serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) {
@@ -257,7 +257,7 @@ public class yacysearch {
prefermask,
contentdomCode,
true,
- count,
+ itemsPerPage,
offset,
searchtime,
urlmask,
@@ -319,8 +319,9 @@ public class yacysearch {
prop.put("num-results_totalcount", theSearch.getLocalCount() + theSearch.getGlobalCount());
prop.put("num-results_globalresults", 1);
prop.put("num-results_globalresults_globalcount", theSearch.getGlobalCount());
- prop.put("num-results_offset", 0);
+ prop.put("num-results_offset", offset);
prop.put("num-results_linkcount", 0);
+ prop.put("num-results_itemsPerPage", itemsPerPage);
// compose page navigation
StringBuffer resnav = new StringBuffer();
@@ -391,7 +392,7 @@ public class yacysearch {
prop.putASIS("input_promoteSearchPageGreeting", promoteSearchPageGreeting);
prop.put("input_former", querystring);
prop.put("former", post.get("search", ""));
- prop.put("input_count", count);
+ prop.put("input_count", itemsPerPage);
prop.put("input_offset", offset);
prop.put("input_resource", (global) ? "global" : "local");
prop.put("input_time", searchtime / 1000);
diff --git a/htroot/yacysearch.rss b/htroot/yacysearch.rss
index 6b56bd4ec..c5b4960f8 100644
--- a/htroot/yacysearch.rss
+++ b/htroot/yacysearch.rss
@@ -11,9 +11,9 @@
#[rssYacyImageURL]#Search for #[former]#
- #[results]#
- 1
- #[results]#
+ #[num-results_totalcount]#
+ #[num-results_offset]#
+ #[num-results_itemsPerPage]#