diff --git a/htroot/CrawlStartExpert_p.html b/htroot/CrawlStartExpert_p.html
index 50f85dcc3..665f5a374 100644
--- a/htroot/CrawlStartExpert_p.html
+++ b/htroot/CrawlStartExpert_p.html
@@ -50,7 +50,7 @@
Each of these URLs are the root for a crawl start, existing start URLs are always re-loaded.
Other already visited URLs are sorted out as "double", if they are not allowed using the re-crawl option.
-
+
@@ -59,20 +59,20 @@
-
+
From Link-List of URL
-
+
From Sitemap
-
+
From File (enter a path within your local file system)
-
+
@@ -129,10 +129,10 @@
@@ -149,8 +149,8 @@
Crawls can be restricted to specific countries. This uses the country code that can be computed from
the IP of the server that hosts the page. The filter is not a regular expressions but a list of country codes, separated by comma.
- no country code restriction
- Use filter
+ no country code restriction
+ Use filter
@@ -187,24 +187,33 @@
After a crawl was done in the past, document may become stale and eventually they are also deleted on the target host.
To remove old files from the search index it is not sufficient to just consider them for re-load but it may be necessary
to delete them because they simply do not exist any more. Use this in combination with re-crawl while this time should be longer.
- Do not delete any document before the crawl is started.
+ Do not delete any document before the crawl is started.
Delete sub-path
- For each host in the start url list, delete all documents (in the given subpath) from that host.
+ For each host in the start url list, delete all documents (in the given subpath) from that host.
Delete only old
- Treat documents that are loaded
+ Treat documents that are loaded
- 1 2 3
- 4 5 6
- 7
- 8 9 10
- 12 14 21
- 28 30
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 12
+ 14
+ 21
+ 28
+ 30
-
- years
- months
- days
- hours
+
+ years
+ months
+ days
+ hours
ago as stale and delete them before the crawl is started.
@@ -217,22 +226,31 @@
A web crawl performs a double-check on all links found in the internet against the internal database. If the same url is found again,
then the url is treated as double when you check the 'no doubles' option. A url may be loaded again when it has reached a specific age,
to use that check the 're-load' option.
- Never load any page that is already known. Only the start-url may be loaded again.
+ Never load any page that is already known. Only the start-url may be loaded again.
Re-load
- Treat documents that are loaded
+ Treat documents that are loaded
- 1 2 3
- 4 5 6
- 7
- 8 9 10
- 12 14 21
- 28 30
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 12
+ 14
+ 21
+ 28
+ 30
-
- years
- months
- days
- hours
+
+ years
+ months
+ days
+ hours
ago as stale and load them again. If they are younger, they are ignored.
@@ -256,10 +274,10 @@
if exist : use the cache if the cache exist. Do no check freshness. Otherwise use online source;
cache only : never go online, use all content from cache. If no cache exist, treat content as unavailable
- no cache
- if fresh
- if exist
- cache only
+ no cache
+ if fresh
+ if exist
+ cache only
@@ -290,7 +308,7 @@
Do Local Indexing
- This enables indexing of the wepages the crawler will download. This should be switched on by default, unless you want to crawl only to fill the
+ This enables indexing of the webpages the crawler will download. This should be switched on by default, unless you want to crawl only to fill the
Document Cache without indexing.
index text :
@@ -315,7 +333,7 @@
Describe your intention to start this global crawl (optional) :
-
+
This message will appear in the 'Other Peer Crawl Start' table of other peers.
diff --git a/htroot/CrawlStartExpert_p.java b/htroot/CrawlStartExpert_p.java
index 3b235becf..6bb18d52a 100644
--- a/htroot/CrawlStartExpert_p.java
+++ b/htroot/CrawlStartExpert_p.java
@@ -43,35 +43,434 @@ public class CrawlStartExpert_p {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
- // define visible variables
- prop.put("starturl", /*(intranet) ? repository :*/ "");
- prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
- prop.put("crawlingDepth", Math.min(3, env.getConfigLong("crawlingDepth", 0)));
- prop.put("crawlingDepthExtension", CrawlProfile.MATCH_NEVER_STRING);
- prop.put("directDocByURLChecked", sb.getConfigBool("crawlingDirectDocByURL", true) ? "1" : "0");
- prop.put("mustmatch", /*(intranet) ? repository + ".*" :*/ CrawlProfile.MATCH_ALL_STRING);
- prop.put("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
- prop.put("indexmustmatch", CrawlProfile.MATCH_ALL_STRING);
- prop.put("indexmustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
- prop.put("indexcontentmustmatch", CrawlProfile.MATCH_ALL_STRING);
- prop.put("indexcontentmustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
- prop.put("ipMustmatch", sb.getConfig("crawlingIPMustMatch", CrawlProfile.MATCH_ALL_STRING));
- prop.put("ipMustnotmatch", sb.getConfig("crawlingIPMustNotMatch", CrawlProfile.MATCH_NEVER_STRING));
- prop.put("countryMustMatch", sb.getConfig("crawlingCountryMustMatch", ""));
+ // ---------- Start point
+ // crawl start URL
+ if (post != null && post.containsKey("crawlingURL")) {
+ prop.put("starturl", post.get("crawlingURL"));
+ // simple check for content since it may be empty
+ if (!post.get("crawlingURL").trim().isEmpty()) {
+ prop.put("has_url", "1");
+ }
+ } else {
+ prop.put("starturl", "");
+ }
+
+ // sitemap URL
+ if (post != null && post.containsKey("sitemapURL")) {
+ prop.put("sitemapURL", post.get("sitemapURL"));
+ // simple check for content since it may be empty
+ if (!post.get("sitemapURL").trim().isEmpty()) {
+ prop.put("has_sitemapURL", "1");
+ }
+ } else {
+ prop.put("sitemapURL", "");
+ }
+
+ // crawling file
+ if (post != null && post.containsKey("crawlingFile")) {
+ prop.put("crawlingFile", post.get("crawlingFile"));
+ // simple check for content since it may be empty
+ if (!post.get("crawlingFile").trim().isEmpty()) {
+ prop.put("has_crawlingFile", "1");
+ }
+ } else {
+ prop.put("crawlingFile", "");
+ }
+
+ // Crawling mode
+ if (post != null && post.containsKey("crawlingMode")) {
+ final String crawlingMode = post.get("crawlingMode");
+ boolean hasMode = false;
+ if (crawlingMode.equalsIgnoreCase("sitelist")
+ && prop.getBoolean("has_url")) {
+ // sitelist needs "crawlingURL" parameter, checked already
+ prop.put("crawlingMode_sitelist", "1");
+ hasMode = true;
+ } else if (crawlingMode.equalsIgnoreCase("sitemap")
+ && prop.getBoolean("has_sitemapURL")) {
+ // sitemap needs "sitemapURL" parameter, checked already
+ prop.put("crawlingMode_sitemap", "1");
+ hasMode = true;
+ } else if (crawlingMode.equalsIgnoreCase("file")
+ && prop.getBoolean("has_crawlingFile")) {
+ // sitemap needs "crawlingFile" parameter, checked already
+ prop.put("crawlingMode_file", "1");
+ hasMode = true;
+ }
+ // default to URL mode
+ if (!hasMode) {
+ prop.put("crawlingMode_url", "1");
+ }
+ } else {
+ // default to URL
+ prop.put("crawlingMode_url", "1");
+ }
+
+
+ // Bookmark title (set by script)
+ if (post != null && post.containsKey("bookmarkTitle")) {
+ prop.put("bookmarkTitle", post.get("bookmarkTitle"));
+ } else {
+ prop.put("bookmarkTitle", "");
+ }
+
+
+ // ---------- Crawling filter
+ final int crawlingDomMaxPages = env.getConfigInt(
+ "crawlingDomMaxPages", -1);
+
+ // crawling depth
+ if (post != null && post.containsKey("crawlingDepth")) {
+ final Integer depth = post.getInt("crawlingDepth", -1);
+ // depth is limited to two digits, zero allowed
+ if (depth >= 0 && depth < 100) {
+ prop.put("crawlingDepth", depth);
+ }
+ }
+ if (!prop.containsKey("crawlingDepth")) {
+ prop.put("crawlingDepth", Math.min(3,
+ env.getConfigLong("crawlingDepth", 0)));
+ }
+
+ // linked non-parseable documents?
+ if (post == null) {
+ prop.put("directDocByURLChecked",
+ sb.getConfigBool("crawlingDirectDocByURL", true) ? "1" : "0");
+ } else {
+ prop.put("directDocByURLChecked",
+ post.getBoolean("directDocByURL") ? "1" : "0");
+ }
+
+ // Unlimited crawl depth for URLs matching with
+ if (post != null && post.containsKey("crawlingDepthExtension")) {
+ prop.put("crawlingDepthExtension", post.get("crawlingDepthExtension"));
+ } else {
+ prop.put("crawlingDepthExtension", CrawlProfile.MATCH_NEVER_STRING);
+ }
+
+ // Limit by maximum Pages per Domain?
+ if (post == null) {
+ prop.put("crawlingDomMaxCheck",
+ (crawlingDomMaxPages == -1) ? "0" : "1");
+ } else {
+ prop.put("crawlingDomMaxCheck",
+ post.getBoolean("crawlingDomMaxCheck") ? "1" : "0");
+ }
+
+ // Maximum Pages per Domain
+ if (post != null && post.containsKey("crawlingDomMaxPages")) {
+ final Integer maxPages = post.getInt("crawlingDomMaxPages", -1);
+ // depth is limited to six digits, zero not allowed
+ if (maxPages > 0 && maxPages < 1000000) {
+ prop.put("crawlingDomMaxPages", maxPages);
+ }
+ }
+ if (!prop.containsKey("crawlingDomMaxPages")) {
+ prop.put("crawlingDomMaxPages",
+ (crawlingDomMaxPages == -1) ? 10000 : crawlingDomMaxPages);
+ }
+
+ // Accept URLs with query-part?
+ // Obey html-robots-noindex?
+ if (post == null) {
+ prop.put("crawlingQChecked",
+ env.getConfigBool("crawlingQ", true) ? "1" : "0");
+ prop.put("obeyHtmlRobotsNoindexChecked",
+ env.getConfigBool("obeyHtmlRobotsNoindex", true) ? "1" : "0");
+ } else {
+ prop.put("crawlingQChecked", post.getBoolean("crawlingQ") ? "1" : "0");
+ prop.put("obeyHtmlRobotsNoindexChecked",
+ post.getBoolean("obeyHtmlRobotsNoindex") ? "1" : "0");
+ }
+
+ // Load Filter on URLs (range)
+ if (post != null && post.containsKey("range")) {
+ final String range = post.get("range");
+ if (range.equalsIgnoreCase("domain")) {
+ prop.put("range_domain", "1");
+ } else if (range.equalsIgnoreCase("subpath")) {
+ prop.put("range_subpath", "1");
+ } else if (range.equalsIgnoreCase("wide")) {
+ prop.put("range_wide", "1");
+ }
+ } else {
+ prop.put("range_wide", "1");
+ }
+
+ // Load Filter on URLs: must match
+ if (post != null && post.containsKey("mustmatch")) {
+ prop.put("mustmatch", post.get("mustmatch"));
+ } else {
+ prop.put("mustmatch", CrawlProfile.MATCH_ALL_STRING);
+ }
+
+ // Load Filter on URLs: must-not-match
+ if (post != null && post.containsKey("mustnotmatch")) {
+ prop.put("mustnotmatch", post.get("mustnotmatch"));
+ } else {
+ prop.put("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
+ }
+
+ // Load Filter on IPs: must match
+ if (post != null && post.containsKey("ipMustmatch")) {
+ prop.put("ipMustmatch", post.get("ipMustmatch"));
+ } else {
+ prop.put("ipMustmatch", sb.getConfig("crawlingIPMustMatch",
+ CrawlProfile.MATCH_ALL_STRING));
+ }
+
+ // Load Filter on IPs: must-not-match
+ if (post != null && post.containsKey("ipMustnotmatch")) {
+ prop.put("ipMustnotmatch", post.get("ipMustnotmatch"));
+ } else {
+ prop.put("ipMustnotmatch", sb.getConfig("crawlingIPMustNotMatch",
+ CrawlProfile.MATCH_NEVER_STRING));
+ }
+
+ // Use Country Codes Match-List?
+ if (post == null) {
+ // use the default that was set in the original template
+ prop.put("countryMustMatchSwitchChecked", "0");
+ } else {
+ prop.put("countryMustMatchSwitchChecked",
+ post.getBoolean("countryMustMatchSwitch") ? "1" : "0");
+ }
+
+ // Must-Match List for Country Codes
+ if (post != null && post.containsKey("countryMustMatchList")) {
+ prop.put("countryMustMatch", post.get("countryMustMatchList"));
+ } else {
+ prop.put("countryMustMatch",
+ sb.getConfig("crawlingCountryMustMatch", ""));
+ }
+
+
+ // ---------- Document filter
+ // Indexer filter on URLs: must match
+ if (post != null && post.containsKey("indexmustmatch")) {
+ prop.put("indexmustmatch", post.get("indexmustmatch"));
+ } else {
+ prop.put("indexmustmatch", CrawlProfile.MATCH_ALL_STRING);
+ }
+
+ // Indexer filter on URLs: must-no-match
+ if (post != null && post.containsKey("indexmustnotmatch")) {
+ prop.put("indexmustnotmatch", post.get("indexmustnotmatch"));
+ } else {
+ prop.put("indexmustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
+ }
+
+ // Filter on Content of Document: must match
+ if (post != null && post.containsKey("indexcontentmustmatch")) {
+ prop.put("indexcontentmustmatch", post.get("indexcontentmustmatch"));
+ } else {
+ prop.put("indexcontentmustmatch", CrawlProfile.MATCH_ALL_STRING);
+ }
+
+ // Filter on Content of Document: must-not-match
+ if (post != null && post.containsKey("indexcontentmustnotmatch")) {
+ prop.put("indexcontentmustnotmatch",
+ post.get("indexcontentmustnotmatch"));
+ } else {
+ prop.put("indexcontentmustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
+ }
+
+
+ // ---------- Clean-Up before Crawl Start
+ // delete if older settings: number value
+ if (post != null && post.containsKey("deleteIfOlderNumber")) {
+ final Integer olderNumber = post.getInt("deleteIfOlderNumber", -1);
+ if (olderNumber >0 && olderNumber <=12) {
+ prop.put("deleteIfOlderNumber_" + olderNumber, "1");
+ } else {
+ switch (olderNumber) {
+ case 14: prop.put("deleteIfOlderNumber_14", "1"); break;
+ case 21: prop.put("deleteIfOlderNumber_21", "1"); break;
+ case 28: prop.put("deleteIfOlderNumber_28", "1"); break;
+ case 30: prop.put("deleteIfOlderNumber_30", "1"); break;
+ default: prop.put("deleteIfOlderNumber_14", "1"); break;
+ }
+ }
+ } else {
+ prop.put("deleteIfOlderNumber_14", "1");
+ }
+
+ // delete if older settings: number unit
+ if (post != null && post.containsKey("deleteIfOlderUnit")) {
+ final String olderUnit = post.get("deleteIfOlderUnit");
+ if (olderUnit.equalsIgnoreCase("year")) {
+ prop.put("deleteIfOlderUnit_year", "1");
+ } else if (olderUnit.equalsIgnoreCase("month")) {
+ prop.put("deleteIfOlderUnit_month", "1");
+ } else if (olderUnit.equalsIgnoreCase("hour")) {
+ prop.put("deleteIfOlderUnit_hour", "1");
+ } else {
+ prop.put("deleteIfOlderUnit_day", "1");
+ }
+ } else {
+ prop.put("deleteIfOlderUnit_day", "1");
+ }
+
+ // delete any document before the crawl is started?
+ if (post != null && post.containsKey("deleteold")) {
+ final String deleteold = post.get("deletold");
+ if (deleteold.equalsIgnoreCase("on")){
+ post.put("deleteold_on", "1");
+ } else if (deleteold.equalsIgnoreCase("age")) {
+ post.put("deleteold_age", "1");
+ } else {
+ post.put("deleteold_off", "1");
+ }
+ } else {
+ prop.put("deleteold_off", "1");
+ }
+
+ // ---------- Double-Check Rules
+ // reload settings: number value
+ if (post != null && post.containsKey("reloadIfOlderNumber")) {
+ final Integer olderNumber = post.getInt("reloadIfOlderNumber", -1);
+ if (olderNumber >0 && olderNumber <=12) {
+ prop.put("reloadIfOlderNumber" + olderNumber, "1");
+ } else {
+ switch (olderNumber) {
+ case 14: prop.put("reloadIfOlderNumber_14", "1"); break;
+ case 21: prop.put("reloadIfOlderNumber_21", "1"); break;
+ case 28: prop.put("reloadIfOlderNumber_28", "1"); break;
+ case 30: prop.put("reloadIfOlderNumber_30", "1"); break;
+ default: prop.put("reloadIfOlderNumber_14", "1"); break;
+ }
+ }
+ } else {
+ prop.put("reloadIfOlderNumber_14", "1");
+ }
+
+ // reload settings: number unit
+ if (post != null && post.containsKey("reloadIfOlderUnit")) {
+ final String olderUnit = post.get("reloadIfOlderUnit");
+ if (olderUnit.equalsIgnoreCase("year")) {
+ prop.put("reloadIfOlderUnit_year", "1");
+ } else if (olderUnit.equalsIgnoreCase("month")) {
+ prop.put("reloadIfOlderUnit_month", "1");
+ } else if (olderUnit.equalsIgnoreCase("hour")) {
+ prop.put("reloadIfOlderUnit_hour", "1");
+ } else {
+ prop.put("reloadIfOlderUnit_day", "1");
+ }
+ } else {
+ prop.put("reloadIfOlderUnit_day", "1");
+ }
+
+ if (post != null && post.containsKey("recrawl")) {
+ final String recrawl = post.get("recrawl");
+ if (recrawl.equalsIgnoreCase("reload")) {
+ prop.put("recrawl_reload", "1");
+ } else {
+ prop.put("recrawl_nodoubles", "1");
+ }
+ } else {
+ prop.put("recrawl_nodoubles", "1");
+ }
+
+
+ // ---------- Document Cache
+ // Store to Web Cache?
+ if (post == null) {
+ prop.put("storeHTCacheChecked",
+ env.getConfigBool("storeHTCache", true) ? "1" : "0");
+ } else {
+ prop.put("storeHTCacheChecked",
+ post.getBoolean("storeHTCache") ? "1" : "0");
+ }
+
+ // Policy for usage of Web Cache
+ if (post != null && post.containsKey("cachePolicy")) {
+ final String cachePolicy = post.get("chachePolicy");
+ if (cachePolicy.equalsIgnoreCase("nocache")) {
+ prop.put("cachePolicy_nocache", "1");
+ } else if (cachePolicy.equalsIgnoreCase("ifexist")) {
+ prop.put("cachePolicy_ifexist", "1");
+ } else if (cachePolicy.equalsIgnoreCase("cacheonly")) {
+ prop.put("cachePolicy_cacheonly", "1");
+ } else {
+ prop.put("cachePolicy_iffresh", "1");
+ }
+ } else {
+ prop.put("cachePolicy_iffresh", "1");
+ }
+
+
+ // ---------- Agent name (untested & untouched)
+ if (sb.isP2PMode()) {
+ prop.put("agentSelect", 0);
+ } else {
+ prop.put("agentSelect", 1);
+ List agentNames = new ArrayList();
+ if (sb.isIntranetMode()) {
+ agentNames.add(ClientIdentification.yacyIntranetCrawlerAgentName);
+ }
+ if (sb.isGlobalMode()) {
+ agentNames.add(ClientIdentification.yacyInternetCrawlerAgentName);
+ }
+ agentNames.add(ClientIdentification.googleAgentName);
+ if (sb.isAllIPMode()) {
+ agentNames.add(ClientIdentification.browserAgentName);
+ }
+ for (int i = 0; i < agentNames.size(); i++) {
+ prop.put("agentSelect_list_" + i + "_name", agentNames.get(i));
+ }
+ prop.put("agentSelect_list", agentNames.size());
+ }
+ prop.put("agentSelect_defaultAgentName",
+ ClientIdentification.yacyInternetCrawlerAgentName);
+
+
+ // ---------- Index Administration
+ // Do Local Indexing
+ if (post == null) {
+ // Local index text?
+ prop.put("indexingTextChecked",
+ env.getConfigBool("indexText", true) ? "1" : "0");
+ // Local index media?
+ prop.put("indexingMediaChecked",
+ env.getConfigBool("indexMedia", true) ? "1" : "0");
+ // Do Remote Indexing?
+ prop.put("crawlOrderChecked",
+ env.getConfigBool("crawlOrder", true) ? "1" : "0");
+ // Remote crawl intention
+ prop.put("intention", "");
+ } else {
+ prop.put("indexingTextChecked",
+ post.getBoolean("indexText") ? "1" : "0");
+ prop.put("indexingMediaChecked",
+ post.getBoolean("indexMedia") ? "1" : "0");
+ prop.put("crawlOrderChecked",
+ post.getBoolean("crawlOrder") ? "1" : "0");
+ prop.put("intention", post.get("intention"));
+ }
+
+ // Target collection
+ boolean collectionEnabled =
+ sb.index.fulltext().getDefaultConfiguration().isEmpty() ||
+ sb.index.fulltext().getDefaultConfiguration().contains(
+ CollectionSchema.collection_sxt);
+ prop.put("collectionEnabled", collectionEnabled ? 1 : 0);
+ if (collectionEnabled) {
+ if (post != null && post.containsKey("collection")) {
+ prop.put("collection", post.get("collection"));
+ } else {
+ prop.put("collection", collectionEnabled ? "user" : "");
+ }
+ }
+
+ /* problaby unused (no corresponding entry in template)
+ prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
+
final int crawlingDomFilterDepth = env.getConfigInt("crawlingDomFilterDepth", -1);
prop.put("crawlingDomFilterCheck", (crawlingDomFilterDepth == -1) ? "0" : "1");
prop.put("crawlingDomFilterDepth", (crawlingDomFilterDepth == -1) ? 1 : crawlingDomFilterDepth);
- final int crawlingDomMaxPages = env.getConfigInt("crawlingDomMaxPages", -1);
- prop.put("crawlingDomMaxCheck", (crawlingDomMaxPages == -1) ? "0" : "1");
- prop.put("crawlingDomMaxPages", (crawlingDomMaxPages == -1) ? 10000 : crawlingDomMaxPages);
- prop.put("crawlingQChecked", env.getConfigBool("crawlingQ", true) ? "1" : "0");
+
prop.put("followFramesChecked", env.getConfigBool("followFrames", true) ? "1" : "0");
- prop.put("obeyHtmlRobotsNoindexChecked", env.getConfigBool("obeyHtmlRobotsNoindex", true) ? "1" : "0");
- prop.put("storeHTCacheChecked", env.getConfigBool("storeHTCache", true) ? "1" : "0");
- prop.put("indexingTextChecked", env.getConfigBool("indexText", true) ? "1" : "0");
- prop.put("indexingMediaChecked", env.getConfigBool("indexMedia", true) ? "1" : "0");
- prop.put("crawlOrderChecked", env.getConfigBool("crawlOrder", true) ? "1" : "0");
final long LCbusySleep = env.getConfigLong(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, 100L);
final int LCppm = (LCbusySleep == 0) ? 1000 : (int) (60000L / LCbusySleep);
@@ -83,25 +482,8 @@ public class CrawlStartExpert_p {
prop.put("xsstopwChecked", env.getConfigBool("xsstopw", true) ? "1" : "0");
prop.put("xdstopwChecked", env.getConfigBool("xdstopw", true) ? "1" : "0");
prop.put("xpstopwChecked", env.getConfigBool("xpstopw", true) ? "1" : "0");
+ */
- boolean collectionEnabled = sb.index.fulltext().getDefaultConfiguration().isEmpty() || sb.index.fulltext().getDefaultConfiguration().contains(CollectionSchema.collection_sxt);
- prop.put("collectionEnabled", collectionEnabled ? 1 : 0);
- prop.put("collection", collectionEnabled ? "user" : "");
- if (sb.isP2PMode()) {
- prop.put("agentSelect", 0);
- } else {
- prop.put("agentSelect", 1);
- List agentNames = new ArrayList();
- if (sb.isIntranetMode()) agentNames.add(ClientIdentification.yacyIntranetCrawlerAgentName);
- if (sb.isGlobalMode()) agentNames.add(ClientIdentification.yacyInternetCrawlerAgentName);
- agentNames.add(ClientIdentification.googleAgentName);
- if (sb.isAllIPMode()) agentNames.add(ClientIdentification.browserAgentName);
- for (int i = 0; i < agentNames.size(); i++) {
- prop.put("agentSelect_list_" + i + "_name", agentNames.get(i));
- }
- prop.put("agentSelect_list", agentNames.size());
- }
- prop.put("agentSelect_defaultAgentName", ClientIdentification.yacyInternetCrawlerAgentName);
// return rewrite properties
return prop;
}
diff --git a/nbproject/project.xml b/nbproject/project.xml
index ca5a5d3bb..8160fd2a1 100644
--- a/nbproject/project.xml
+++ b/nbproject/project.xml
@@ -1,81 +1,81 @@
-
-
- org.netbeans.modules.ant.freeform
-
-
- YaCy
-
-
-
- YaCy
-
-
-
- source
- java
- source
- UTF-8
-
-
- htroot
- java
- htroot
- UTF-8
-
-
-
-
- compile
-
-
- clean
-
-
- javadoc
-
-
- run
-
-
- test
-
-
- clean
- compile
-
-
-
-
-
- source
- source
-
-
- htroot
- htroot
-
-
- build.xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- source
- htroot
- lib/activation.jar;lib/apache-mime4j-0.6.jar;lib/arq-2.8.7.jar;lib/bcmail-jdk15-145.jar;lib/bcprov-jdk15-145.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.4.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-httpclient-3.1.jar;lib/commons-io-2.1.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.7.1.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-13.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.jar;lib/httpcore-4.3.jar;lib/httpmime-4.3.jar;lib/icu4j-core.jar;lib/iri-0.8.jar;lib/J7Zip-modified.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/jcifs-1.3.15.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.7.1.jar;lib/jena-2.6.4.jar;lib/jsch-0.1.42.jar;lib/json-simple-1.1.jar;lib/jsoup-1.6.3.jar;lib/log4j-1.2.17.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.2.1.jar;lib/lucene-analyzers-phonetic-4.2.1.jar;lib/lucene-core-4.2.1.jar;lib/lucene-misc-4.2.1.jar;lib/lucene-spatial-4.2.1.jar;lib/metadata-extractor-2.4.0-beta-1.jar;lib/mysql-connector-java-5.1.12-bin.jar;lib/pdfbox-1.7.1.jar;lib/poi-3.6-20091214.jar;lib/poi-scratchpad-3.6-20091214.jar;lib/sax-2.0.1.jar;lib/servlet-api-2.5-20081211.jar;lib/slf4j-api-1.7.2.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.2.1.jar;lib/solr-solrj-4.2.1.jar;lib/spatial4j-0.3.jar;lib/webcat-0.1-swf.jar;lib/wstx-asl-3.2.7.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.5.jar
- 1.6
-
-
-
-
+
+
+ org.netbeans.modules.ant.freeform
+
+
+ YaCy-clone
+
+
+
+ YaCy-clone
+
+
+
+ source
+ java
+ source
+ UTF-8
+
+
+ htroot
+ java
+ htroot
+ UTF-8
+
+
+
+
+ compile
+
+
+ clean
+
+
+ javadoc
+
+
+ run
+
+
+ test
+
+
+ clean
+ compile
+
+
+
+
+
+ source
+ source
+
+
+ htroot
+ htroot
+
+
+ build.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ source
+ htroot
+ lib/activation.jar;lib/apache-mime4j-0.6.jar;lib/arq-2.8.7.jar;lib/bcmail-jdk15-145.jar;lib/bcprov-jdk15-145.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.4.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-httpclient-3.1.jar;lib/commons-io-2.1.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.7.1.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-13.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.jar;lib/httpcore-4.3.jar;lib/httpmime-4.3.jar;lib/icu4j-core.jar;lib/iri-0.8.jar;lib/J7Zip-modified.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/jcifs-1.3.15.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.7.1.jar;lib/jena-2.6.4.jar;lib/jsch-0.1.42.jar;lib/json-simple-1.1.jar;lib/jsoup-1.6.3.jar;lib/log4j-1.2.17.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.2.1.jar;lib/lucene-analyzers-phonetic-4.2.1.jar;lib/lucene-core-4.2.1.jar;lib/lucene-misc-4.2.1.jar;lib/lucene-spatial-4.2.1.jar;lib/metadata-extractor-2.4.0-beta-1.jar;lib/mysql-connector-java-5.1.12-bin.jar;lib/pdfbox-1.7.1.jar;lib/poi-3.6-20091214.jar;lib/poi-scratchpad-3.6-20091214.jar;lib/sax-2.0.1.jar;lib/servlet-api-2.5-20081211.jar;lib/slf4j-api-1.7.2.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.2.1.jar;lib/solr-solrj-4.2.1.jar;lib/spatial4j-0.3.jar;lib/webcat-0.1-swf.jar;lib/wstx-asl-3.2.7.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.5.jar
+ 1.6
+
+
+
+