diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index cf61e928c..0351f0620 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -148,7 +148,7 @@ public final class CrawlStacker { final String rejectReason = stackCrawl(entry); // if the url was rejected we store it into the error URL db - if (rejectReason != null) { + if (rejectReason != null && !rejectReason.startsWith("double in")) { final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(entry.profileHandle())); this.nextQueue.errorURL.push(entry, profile, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1); } @@ -436,11 +436,12 @@ public final class CrawlStacker { if (dbocc == null) { return "double in: LURL-DB, oldDate = " + oldDate.toString(); } - if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "'. " + "Stack processing time:"); if (dbocc == HarvestProcess.ERRORS) { final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash()); + if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.anycause()); return "double in: errors (" + errorEntry.anycause() + "), oldDate = " + oldDate.toString(); } + if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "'. "); return "double in: " + dbocc.toString() + ", oldDate = " + oldDate.toString(); } } diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 2e076d087..008cfa4f1 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -85,6 +85,7 @@ public class CrawlQueues { FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME)); this.errorURL = new ZURL(sb.index.fulltext(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727); this.delegatedURL = new ZURL(sb.index.fulltext(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727); + try {this.errorURL.clear();} catch (IOException e) {} // start with empty errors each time } public void relocate(final File newQueuePath) { @@ -97,6 +98,7 @@ public class CrawlQueues { FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME)); this.errorURL = new ZURL(this.sb.index.fulltext(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727); this.delegatedURL = new ZURL(this.sb.index.fulltext(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727); + try {this.errorURL.clear();} catch (IOException e) {} // start with empty errors each time } public synchronized void close() {