From 0d81731e8832b7fba7d73dfa4b6e6c67909c45fb Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 12 Aug 2010 01:29:56 +0000 Subject: [PATCH] fixed crawler bug caused by NPE in logging git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7033 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/CrawlStacker.java | 6 ++++-- source/de/anomic/crawler/ZURL.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 656d20f4b..0e3fce96e 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -28,6 +28,7 @@ package de.anomic.crawler; +import java.net.InetAddress; import java.net.UnknownHostException; import java.util.Date; @@ -361,9 +362,10 @@ public final class CrawlStacker { // check if this is a local address and we are allowed to index local pages: //boolean local = hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress(); //assert local == yacyURL.isLocalDomain(url.hash()); // TODO: remove the dnsResolve above! + InetAddress ia = Domains.dnsResolve(host); return (local) ? - ("the host '" + host + "' is local, but local addresses are not accepted: " + Domains.dnsResolve(host).getHostAddress()) : - ("the host '" + host + "' is global, but global addresses are not accepted: " + Domains.dnsResolve(host).getHostAddress()); + ("the host '" + host + "' is local, but local addresses are not accepted: " + ((ia == null) ? "null" : ia.getHostAddress())) : + ("the host '" + host + "' is global, but global addresses are not accepted: " + ((ia == null) ? "null" : ia.getHostAddress())); } public String urlInAcceptedDomainHash(final byte[] urlhash) { diff --git a/source/de/anomic/crawler/ZURL.java b/source/de/anomic/crawler/ZURL.java index e95afb4da..11ce978c5 100755 --- a/source/de/anomic/crawler/ZURL.java +++ b/source/de/anomic/crawler/ZURL.java @@ -132,7 +132,7 @@ public class ZURL implements Iterable { Entry entry = new Entry(bentry, executor, workdate, workcount, anycause); put(entry); stack.add(entry.hash()); - Log.logInfo("URL Errors", bentry.url().toNormalform(false, false) + " - " + anycause); + Log.logInfo("Rejected URL", bentry.url().toNormalform(false, false) + " - " + anycause); while (stack.size() > maxStackSize) stack.poll(); }