From 042c2868dff4ee8924a81e2c239e868920b3622c Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 8 Nov 2016 23:42:12 +0100 Subject: [PATCH] del abandoned indexcleaner.html, servlet deleted with commit https://github.com/yacy/yacy_search_server/commit/3bcd9d622b1988e221d271bdb088fd1fbcd5e018 crawler servlet log warning line on failure in one of multiple urls (instead of exception msg) indexcontrolrwi skip not needed type conversion on ranking --- htroot/Crawler_p.java | 2 +- htroot/IndexCleaner_p.html | 60 ---------------------------------- htroot/IndexControlRWIs_p.java | 6 ++-- 3 files changed, 4 insertions(+), 64 deletions(-) delete mode 100644 htroot/IndexCleaner_p.html diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 1e8409da4..1f1294721 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -258,7 +258,7 @@ public class Crawler_p { if (crawlingStartURL != null && (crawlingStartURL.isFile() || crawlingStartURL.isSMB())) storeHTCache = false; } catch (final MalformedURLException e) { - ConcurrentLog.logException(e); + ConcurrentLog.warn("Crawler_p", "crawl start url invalid" + e.getMessage()); } } else { crawlName = crawlingFile.getName(); diff --git a/htroot/IndexCleaner_p.html b/htroot/IndexCleaner_p.html deleted file mode 100644 index db9a3dfca..000000000 --- a/htroot/IndexCleaner_p.html +++ /dev/null @@ -1,60 +0,0 @@ - - - - YaCy '#[clientname]#': Index Cleaner - #%env/templates/metas.template%# - - - - #%env/templates/header.template%# - #%env/templates/submenuBlacklist.template%# -

Index Cleaner

- - #(urldb)# - :: -
URL-DB-Cleaner -
-
ThreadAlive: #[threadAlive]#
-
ThreadToString: #[threadToString]#
-
Total URLs searched: #[total]# (#[percentUrls]#%)
-
Blacklisted URLs found: #[blacklisted]#
-
Percentage blacklisted: #[percent]#%
-
last searched URL: #[lastUrl]# (#[lastHash]#)
-
last blacklisted URL found: #[lastBlacklistedUrl]# (#[lastBlacklistedHash]#)
-
-
- #(/urldb)# - #(rwidb)# - :: -
RWI-DB-Cleaner -
-
ThreadAlive: #[threadAlive]#
-
ThreadToString: #[threadToString]#
-
RWIs at Start: #[RWIcountstart]#
-
RWIs now: #[RWIcountnow]#
-
wordHash in Progress: #[wordHashNow]#
-
last wordHash with deleted URLs: #[lastWordHash]#
-
Number of deleted URLs in on this Hash: #[lastDeletionCounter]#
-
-
- #(/rwidb)# -

- URL-DB-Cleaner - Clean up the database by deletion of blacklisted urls:
- - Start/Resume - Stop - Pause -

-

- RWI-DB-Cleaner - Clean up the database by deletion of words with reference to blacklisted urls:
- Start/Resume - Stop - Pause -

- #%env/templates/footer.template%# - - \ No newline at end of file diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 2bcadaca5..69364b2ca 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -489,14 +489,14 @@ public class IndexControlRWIs_p { DigestURL url; URIMetadataNode entry; String us; - float rn = Float.MIN_VALUE; + long rn = Long.MIN_VALUE; while (!theSearch.rwiIsEmpty() && (entry = theSearch.pullOneFilteredFromRWI(false)) != null) { url = entry.url(); if ( url == null ) { continue; } us = url.toNormalform(true); - if ( rn == Float.MIN_VALUE ) { + if ( rn == Long.MIN_VALUE ) { rn = entry.score(); } prop.put("genUrlList_urlList_" + i + "_urlExists", "1"); @@ -507,7 +507,7 @@ public class IndexControlRWIs_p { prop.putHTML("genUrlList_urlList_" + i + "_urlExists_urlString", us); prop.put("genUrlList_urlList_" + i + "_urlExists_urlStringShort", (us.length() > 40) ? (us.substring(0, 20) + "
" + us.substring(20, 40) + "...") : ((us.length() > 30) ? (us.substring(0, 20) + "
" + us.substring(20)) : us)); - prop.putNum("genUrlList_urlList_" + i + "_urlExists_ranking", Float.toString(entry.score() - rn)); + prop.putNum("genUrlList_urlList_" + i + "_urlExists_ranking", entry.score() - rn); prop.putNum("genUrlList_urlList_" + i + "_urlExists_domlength", DigestURL.domLengthEstimation(entry.hash())); prop.putNum("genUrlList_urlList_" + i + "_urlExists_tf", 1000.0 * entry.word().termFrequency()); prop.putNum("genUrlList_urlList_" + i + "_urlExists_authority", (theSearch.getOrder() == null) ? -1 : theSearch.getOrder().authority(ASCII.String(entry.hash(), 6, 6)));