diff --git a/htroot/CrawlProfileEditor_p.java b/htroot/CrawlProfileEditor_p.java index 7ab881a48..f041ecd80 100644 --- a/htroot/CrawlProfileEditor_p.java +++ b/htroot/CrawlProfileEditor_p.java @@ -114,7 +114,7 @@ public class CrawlProfileEditor_p { for (final byte[] h : sb.crawler.getActive()) { selentry = sb.crawler.getActive(h); if (selentry != null && !CrawlProfile.ignoreNames.contains(selentry.name())) { - orderdHandles.put(selentry.name(), selentry.handle()); + orderdHandles.put(selentry.collectionName(), selentry.handle()); } } @@ -184,7 +184,7 @@ public class CrawlProfileEditor_p { prop.put("edit", "0"); } else { prop.put("edit", "1"); - prop.put("edit_name", selentry.name()); + prop.put("edit_name", selentry.collectionName()); prop.put("edit_handle", selentry.handle()); final Iterator lit = labels.iterator(); count = 0; diff --git a/htroot/IndexCreateQueues_p.java b/htroot/IndexCreateQueues_p.java index 13280211d..ce8da5799 100644 --- a/htroot/IndexCreateQueues_p.java +++ b/htroot/IndexCreateQueues_p.java @@ -63,7 +63,7 @@ public class IndexCreateQueues_p { final Pattern compiledPattern = Pattern.compile(deletepattern); if (option == PROFILE) { - // search and delete the crawl profile (_much_ faster, independant of queue size) + // search and delete the crawl profile (_much_ faster, independent of queue size) CrawlProfile entry; for (final byte[] handle: sb.crawler.getActive()) { entry = sb.crawler.getActive(handle); @@ -143,7 +143,7 @@ public class IndexCreateQueues_p { profileHandle = request.profileHandle(); profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes()); prop.putHTML("crawler_host_" + hc + "_list_" + count + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); - prop.put("crawler_host_" + hc + "_list_" + count + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); + prop.put("crawler_host_" + hc + "_list_" + count + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.collectionName())); prop.put("crawler_host_" + hc + "_list_" + count + "_depth", request.depth()); prop.put("crawler_host_" + hc + "_list_" + count + "_modified", daydate(request.appdate()) ); prop.putHTML("crawler_host_" + hc + "_list_" + count + "_anchor", request.name()); diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index 2a918bfd8..34bf1c68b 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -482,12 +482,12 @@ public final class CrawlStacker { if (maxAllowedPagesPerDomain < Integer.MAX_VALUE && maxAllowedPagesPerDomain > 0) { final AtomicInteger dp = profile.getCount(url.getHost()); if (dp != null && dp.get() >= maxAllowedPagesPerDomain) { - if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed."); + if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + maxAllowedPagesPerDomain + " is allowed."); return "crawl stack domain counter exceeded"; } if (ResultURLs.domainCount(EventOrigin.LOCAL_CRAWLING, url.getHost()) >= maxAllowedPagesPerDomain) { - if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in result stack, a maximum of " + profile.domMaxPages() + " is allowed."); + if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in result stack, a maximum of " + maxAllowedPagesPerDomain + " is allowed."); return "result stack domain counter exceeded"; } } diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java index c09072efb..ba75b72d4 100644 --- a/source/net/yacy/crawler/CrawlSwitchboard.java +++ b/source/net/yacy/crawler/CrawlSwitchboard.java @@ -121,7 +121,7 @@ public final class CrawlSwitchboard { CrawlProfile p; try { p = new CrawlProfile(this.profilesPassiveCrawls.get(handle)); - Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); + Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.collectionName()); } catch ( final IOException e ) { continue; } catch ( final SpaceExceededException e ) { diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java index 4b8695494..17a9486d7 100644 --- a/source/net/yacy/crawler/data/CrawlProfile.java +++ b/source/net/yacy/crawler/data/CrawlProfile.java @@ -259,6 +259,15 @@ public class CrawlProfile extends ConcurrentHashMap implements M return r; } + /** + * create a name that takes the collection as name if this is not "user". + * @return the name of the collection if that is not "user" or the name() otherwise; + */ + public String collectionName() { + final String r = get(COLLECTIONS); + return r == null || r.length() == 0 || "user".equals(r) ? name() : r; + } + /** * Gets the regex which must be matched by URLs in order to be crawled. * @return regex which must be matched @@ -521,7 +530,7 @@ public class CrawlProfile extends ConcurrentHashMap implements M final int domlistlength) { prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0"); - prop.put(CRAWL_PROFILE_PREFIX + count + "_name", this.name()); + prop.put(CRAWL_PROFILE_PREFIX + count + "_name", this.collectionName()); prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", (!active || ignoreNames.contains(this.name())) ? "0" : "1"); prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", this.handle()); prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", (active) ? "0" : "1"); diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java index 7d61d16a3..d7eda49f5 100644 --- a/source/net/yacy/crawler/retrieval/Response.java +++ b/source/net/yacy/crawler/retrieval/Response.java @@ -538,7 +538,7 @@ public class Response { // check profile if (!profile().indexText() && !profile().indexMedia()) { - return "indexing not allowed - indexText and indexMedia not set (for proxy = " + this.profile.name()+ ")"; + return "indexing not allowed - indexText and indexMedia not set (for proxy = " + this.profile.collectionName()+ ")"; } // -CGI access in request @@ -683,7 +683,7 @@ public class Response { // check profile if (!profile().indexText() && !profile().indexMedia()) { - return "indexing not allowed - indexText and indexMedia not set (for crawler = " + this.profile.name() + ")"; + return "indexing not allowed - indexText and indexMedia not set (for crawler = " + this.profile.collectionName() + ")"; } // -CGI access in request diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 17993c2e3..5fc12c56a 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1988,7 +1988,7 @@ public final class Switchboard extends serverSwitch CrawlProfile selentry; for ( final byte[] handle : this.crawler.getActive() ) { selentry = this.crawler.getActive(handle); - assert selentry.handle() != null : "profile.name = " + selentry.name(); + assert selentry.handle() != null : "profile.name = " + selentry.collectionName(); if ( selentry.handle() == null ) { this.crawler.removeActive(handle); continue; @@ -2583,7 +2583,7 @@ public final class Switchboard extends serverSwitch "denied by profile rule, process case=" + processCase + ", profile name = " - + queueEntry.profile().name()); + + queueEntry.profile().collectionName()); return; }