From dba02f399f09779ac54e271e271dae36de981e9b Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 17 Mar 2006 20:52:43 +0000 Subject: [PATCH] starting of re-design of kelondroTree iterator - new access to iterator - added many IOException handling in other Classes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1914 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControl_p.java | 52 ++++++------ source/de/anomic/kelondro/kelondroDyn.java | 10 +-- source/de/anomic/kelondro/kelondroMap.java | 4 +- source/de/anomic/kelondro/kelondroTables.java | 4 +- source/de/anomic/kelondro/kelondroTree.java | 42 +++++----- .../de/anomic/plasma/plasmaCrawlStacker.java | 10 ++- source/de/anomic/plasma/plasmaDHTChunk.java | 8 +- source/de/anomic/plasma/plasmaURL.java | 2 +- source/de/anomic/plasma/plasmaWordIndex.java | 80 +++++++++++-------- .../plasma/plasmaWordIndexAssortment.java | 2 +- .../plasmaWordIndexAssortmentCluster.java | 3 +- 11 files changed, 127 insertions(+), 90 deletions(-) diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 6d99487e1..27ff63657 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -284,19 +284,21 @@ public class IndexControl_p { // generate list if (post.containsKey("keyhashsimilar")) { + try { final Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, plasmaWordIndex.RL_WORDFILES, true, 256).iterator(); - StringBuffer result = new StringBuffer("Sequential List of Word-Hashes:
"); - String hash; - int i = 0; - while (hashIt.hasNext() && i < 256) { - hash = (String) hashIt.next(); - result.append("").append(hash).append(" ") - .append(((i + 1) % 8 == 0) ? "
" : ""); - i++; + StringBuffer result = new StringBuffer("Sequential List of Word-Hashes:
"); + String hash; + int i = 0; + while (hashIt.hasNext() && i < 256) { + hash = (String) hashIt.next(); + result.append("").append(hash).append(" ").append(((i + 1) % 8 == 0) ? "
" : ""); + i++; + } + prop.put("result", result); + } catch (IOException e) { + prop.put("result", "unknown keys: " + e.getMessage()); } - prop.put("result", result); } if (post.containsKey("urlstringsearch")) { @@ -329,19 +331,23 @@ public class IndexControl_p { // generate list if (post.containsKey("urlhashsimilar")) { - final Iterator hashIt = switchboard.urlPool.loadedURL.urlHashes(urlhash, true); - StringBuffer result = new StringBuffer("Sequential List of URL-Hashes:
"); - String hash; - int i = 0; - while (hashIt.hasNext() && i < 256) { - hash = (String) hashIt.next(); - result.append("").append(hash).append(" ") - .append(((i + 1) % 8 == 0) ? "
" : ""); - i++; + try { + final Iterator hashIt = switchboard.urlPool.loadedURL.urlHashes(urlhash, true); + + StringBuffer result = new StringBuffer( + "Sequential List of URL-Hashes:
"); + String hash; + int i = 0; + while (hashIt.hasNext() && i < 256) { + hash = (String) hashIt.next(); + result.append("").append(hash).append(" ").append(((i + 1) % 8 == 0) ? "
" : ""); + i++; + } + prop.put("result", result.toString()); + } catch (IOException e) { + prop.put("result", "No Entries for URL hash " + urlhash); } - prop.put("result", result.toString()); } // list known hosts diff --git a/source/de/anomic/kelondro/kelondroDyn.java b/source/de/anomic/kelondro/kelondroDyn.java index a2e0740b6..352ecab2c 100644 --- a/source/de/anomic/kelondro/kelondroDyn.java +++ b/source/de/anomic/kelondro/kelondroDyn.java @@ -187,13 +187,13 @@ public class kelondroDyn extends kelondroTree { } public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating) throws IOException { - // iterates only the keys of the Nodes - // enumerated objects are of type String - return new dynKeyIterator(super.rows(up, rotating)); + // iterates only the keys of the Nodes + // enumerated objects are of type String + return new dynKeyIterator(super.rows(up, rotating)); } - public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating, byte[] firstKey) { - return new dynKeyIterator(super.rows(up, rotating, firstKey)); + public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating, byte[] firstKey) throws IOException { + return new dynKeyIterator(super.rows(up, rotating, firstKey)); } private byte[] getValueCached(byte[] key) throws IOException { diff --git a/source/de/anomic/kelondro/kelondroMap.java b/source/de/anomic/kelondro/kelondroMap.java index 6c5c71873..1cc4826d9 100644 --- a/source/de/anomic/kelondro/kelondroMap.java +++ b/source/de/anomic/kelondro/kelondroMap.java @@ -287,7 +287,7 @@ public class kelondroMap { return dyn.dynKeys(up, rotating); } - public synchronized kelondroDyn.dynKeyIterator keys(final boolean up, final boolean rotating, final byte[] firstKey) { + public synchronized kelondroDyn.dynKeyIterator keys(final boolean up, final boolean rotating, final byte[] firstKey) throws IOException { // simple enumeration of key names without special ordering return dyn.dynKeys(up, rotating, firstKey); } @@ -305,7 +305,7 @@ public class kelondroMap { return new mapIterator(keys(up, rotating)); } - public synchronized mapIterator maps(final boolean up, final boolean rotating, final byte[] firstKey) { + public synchronized mapIterator maps(final boolean up, final boolean rotating, final byte[] firstKey) throws IOException { return new mapIterator(keys(up, rotating, firstKey)); } diff --git a/source/de/anomic/kelondro/kelondroTables.java b/source/de/anomic/kelondro/kelondroTables.java index 8424eef2d..7ccd0ca4c 100644 --- a/source/de/anomic/kelondro/kelondroTables.java +++ b/source/de/anomic/kelondro/kelondroTables.java @@ -151,7 +151,7 @@ public class kelondroTables { return table.maps(up, rotating); } - public synchronized kelondroMap.mapIterator /* of Map-Elements */ maps(String tablename, boolean up, boolean rotating, byte[] firstKey) { + public synchronized kelondroMap.mapIterator /* of Map-Elements */ maps(String tablename, boolean up, boolean rotating, byte[] firstKey) throws IOException { kelondroMap table = (kelondroMap) mTables.get(tablename); if (table == null) throw new RuntimeException("kelondroTables.maps: map table '" + tablename + "' does not exist."); return table.maps(up, rotating, firstKey); @@ -163,7 +163,7 @@ public class kelondroTables { return table.maps(up, field); } - public synchronized Iterator /* of byte[][]-Elements */ rows(String tablename, boolean up, boolean rotating, byte[] firstKey) { + public synchronized Iterator /* of byte[][]-Elements */ rows(String tablename, boolean up, boolean rotating, byte[] firstKey) throws IOException { kelondroTree tree = (kelondroTree) tTables.get(tablename); if (tree == null) throw new RuntimeException("kelondroTables.bytes: tree table '" + tablename + "' does not exist."); return tree.rows(up, rotating, firstKey); diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 23d1c84cb..5db751190 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -807,6 +807,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { return node; } + /* private synchronized Iterator nodeIterator(boolean up, boolean rotating) { // iterates the elements in a sorted way. returns Node - type Objects try { @@ -824,6 +825,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { throw new RuntimeException("error creating an iteration: " + e.getMessage()); } } + */ private class nodeIterator implements Iterator { // we implement an iteration! (not a recursive function as the structure would suggest...) @@ -1011,11 +1013,11 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { // iterates the rows of the Nodes // enumerated objects are of type byte[][] // iterates the elements in a sorted way. - return new rowIterator(nodeIterator(up, rotating)); + return new rowIterator(new nodeIterator(up, rotating)); } - public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) { - return new rowIterator((firstKey == null) ? nodeIterator(up, rotating) : nodeIterator(up, rotating, firstKey)); + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { + return new rowIterator((firstKey == null) ? new nodeIterator(up, rotating) : new nodeIterator(up, rotating, firstKey, true)); } public class rowIterator implements Iterator { @@ -1045,15 +1047,15 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { } - public synchronized keyIterator keys(boolean up, boolean rotating) { - // iterates only the keys of the Nodes - // enumerated objects are of type String + public synchronized keyIterator keys(boolean up, boolean rotating) throws IOException { + // iterates only the keys of the Nodes + // enumerated objects are of type String // iterates the elements in a sorted way. - return new keyIterator(nodeIterator(up, rotating)); + return new keyIterator(new nodeIterator(up, rotating)); } - public Iterator keys(boolean up, boolean rotating, byte[] firstKey) { - return new keyIterator(nodeIterator(up, rotating, firstKey)); + public Iterator keys(boolean up, boolean rotating, byte[] firstKey) throws IOException { + return new keyIterator(new nodeIterator(up, rotating, firstKey, true)); } public class keyIterator implements Iterator { @@ -1448,9 +1450,9 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { b = testWord('L'); tt.put(b, b); int c = countElements(tt); System.out.println("elements: " + c); - Iterator i = tt.nodeIterator(true, true, testWord('G')); + Iterator i = tt.rows(true, true, testWord('G')); for (int j = 0; j < c; j++) { - System.out.println("Node " + j + ": " + new String(((Node) i.next()).getKey())); + System.out.println("Row " + j + ": " + new String(((byte[][]) i.next())[0])); } System.out.println("TERMINATED"); } catch (IOException e) { @@ -1555,13 +1557,17 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { public static int countElements(kelondroTree t) { int count = 0; - Iterator iter = t.nodeIterator(true, false); - Node n; - while (iter.hasNext()) { - count++; - n = (Node) iter.next(); - if (n == null) System.out.println("ERROR! null element found"); - //else System.out.println("counted element: " + new String(n.getKey())); + try { + Iterator iter = t.rows(true, false); + byte[][] row; + while (iter.hasNext()) { + count++; + row = (byte[][]) iter.next(); + if (row == null) System.out.println("ERROR! null element found"); + // else System.out.println("counted element: " + new + // String(n.getKey())); + } + } catch (IOException e) { } return count; } diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 452f8eeed..e60d9712f 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -555,7 +555,15 @@ public final class plasmaCrawlStacker { } } catch (kelondroException e) { /* if we have an error, we start with a fresh database */ - plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue. Reseting DB.\n",e); + plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue, kelondroException:" + e.getMessage() + ". Reseting DB.\n",e); + + // deleting old db and creating a new db + try {this.urlEntryCache.close();}catch(Exception ex){} + cacheFile.delete(); + this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, plasmaCrawlNURL.ce, true); + } catch (IOException e) { + /* if we have an error, we start with a fresh database */ + plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue, IOException:" + e.getMessage() + ". Reseting DB.\n",e); // deleting old db and creating a new db try {this.urlEntryCache.close();}catch(Exception ex){} diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java index 31407ffc9..6daf0e643 100644 --- a/source/de/anomic/plasma/plasmaDHTChunk.java +++ b/source/de/anomic/plasma/plasmaDHTChunk.java @@ -239,9 +239,13 @@ public class plasmaDHTChunk { log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e); indexContainers = new plasmaWordIndexEntryContainer[0]; urlCache = new HashMap(); - this.status = chunkStatus_FAILED; - + return 0; + } catch (IOException e) { + log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e); + indexContainers = new plasmaWordIndexEntryContainer[0]; + urlCache = new HashMap(); + this.status = chunkStatus_FAILED; return 0; } } diff --git a/source/de/anomic/plasma/plasmaURL.java b/source/de/anomic/plasma/plasmaURL.java index 2d0c1e7cb..2a27b73ec 100644 --- a/source/de/anomic/plasma/plasmaURL.java +++ b/source/de/anomic/plasma/plasmaURL.java @@ -564,7 +564,7 @@ public class plasmaURL { return hash; } - public Iterator urlHashes(String urlHash, boolean up) { + public Iterator urlHashes(String urlHash, boolean up) throws IOException { return urlHashCache.keys(up, false, urlHash.getBytes()); } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 579b58334..11759a3ab 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -378,7 +378,7 @@ public final class plasmaWordIndex { public static final int RL_ASSORTMENTS = 2; public static final int RL_WORDFILES = 3; - public synchronized TreeSet wordHashes(String startHash, int resourceLevel, boolean rot, int count) { + public synchronized TreeSet wordHashes(String startHash, int resourceLevel, boolean rot, int count) throws IOException { kelondroOrder hashOrder = (kelondroOrder) indexOrder.clone(); if (rot) hashOrder.rotate(startHash.getBytes()); else hashOrder.rotate(null); TreeSet hashes = new TreeSet(hashOrder); @@ -391,12 +391,12 @@ public final class plasmaWordIndex { return hashes; } - public Iterator wordHashes(String startHash, int resourceLevel, boolean rot) { + public Iterator wordHashes(String startHash, int resourceLevel, boolean rot) throws IOException { if (rot) return new rotatingWordIterator(startHash, resourceLevel); else return new correctedWordIterator(startHash, resourceLevel, rot); // use correction until bug is found } - private Iterator wordHashesX(String startWordHash, int resourceLevel, boolean rot) { + private Iterator wordHashesX(String startWordHash, int resourceLevel, boolean rot) throws IOException { if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) { return ramCache.wordHashes(startWordHash, rot); } @@ -431,7 +431,7 @@ public final class plasmaWordIndex { Iterator iter; String nextWord; - public correctedWordIterator(String firstWord, int resourceLevel, boolean rotating) { + public correctedWordIterator(String firstWord, int resourceLevel, boolean rotating) throws IOException { iter = wordHashesX(firstWord, resourceLevel, rotating); try { nextWord = (iter.hasNext()) ? (String) iter.next() : null; @@ -481,7 +481,7 @@ public final class plasmaWordIndex { Iterator i; int resourceLevel; - public rotatingWordIterator(String startWordHash, int resourceLevel) { + public rotatingWordIterator(String startWordHash, int resourceLevel) throws IOException { this.resourceLevel = resourceLevel; i = new correctedWordIterator(startWordHash, resourceLevel, false); } @@ -492,9 +492,11 @@ public final class plasmaWordIndex { public boolean hasNext() { if (i.hasNext()) return true; - else { + else try { i = new correctedWordIterator("------------", resourceLevel, false); return i.hasNext(); + } catch (IOException e) { + return false; } } @@ -588,36 +590,42 @@ public final class plasmaWordIndex { plasmaWordIndexEntry entry = null; URL url = null; HashSet urlHashs = new HashSet(); - Iterator wordHashIterator = wordHashes(startHash, plasmaWordIndex.RL_WORDFILES, false); - while (wordHashIterator.hasNext() && run) { - waiter(); - wordHash = (String) wordHashIterator.next(); - wordContainer = getContainer(wordHash, true, -1); - Iterator containerIterator = wordContainer.entries(); - wordHashNow = wordHash; - while (containerIterator.hasNext() && run) { + try { + Iterator wordHashIterator = wordHashes(startHash, plasmaWordIndex.RL_WORDFILES, false); + while (wordHashIterator.hasNext() && run) { waiter(); - entry = (plasmaWordIndexEntry) containerIterator.next(); - //System.out.println("Wordhash: "+wordHash+" UrlHash: "+entry.getUrlHash()); - try { - url = lurl.getEntry(entry.getUrlHash(), null).url(); - if ((url == null) || - (plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(),url.getPath())==true)) { + wordHash = (String) wordHashIterator.next(); + wordContainer = getContainer(wordHash, true, -1); + Iterator containerIterator = wordContainer.entries(); + wordHashNow = wordHash; + while (containerIterator.hasNext() && run) { + waiter(); + entry = (plasmaWordIndexEntry) containerIterator.next(); + // System.out.println("Wordhash: "+wordHash+" UrlHash: + // "+entry.getUrlHash()); + try { + url = lurl.getEntry(entry.getUrlHash(), null).url(); + if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(), url.getPath()) == true)) { + urlHashs.add(entry.getUrlHash()); + } + } catch (IOException e) { urlHashs.add(entry.getUrlHash()); } - } catch (IOException e) { - urlHashs.add(entry.getUrlHash()); + } + if (urlHashs.size() > 0) { + String[] urlArray; + urlArray = (String[]) urlHashs.toArray(new String[0]); + int removed = removeEntries(wordHash, urlArray, true); + serverLog.logFine("INDEXCLEANER", wordHash + ": " + removed + " of " + wordContainer.size() + " URL-entries deleted"); + lastWordHash = wordHash; + lastDeletionCounter = urlHashs.size(); + urlHashs.clear(); } } - if (urlHashs.size()>0) { - String [] urlArray; - urlArray = (String[]) urlHashs.toArray(new String[0]); - int removed = removeEntries(wordHash, urlArray, true); - serverLog.logFine("INDEXCLEANER", wordHash + ": " + removed + " of " + wordContainer.size() + " URL-entries deleted"); - lastWordHash = wordHash; - lastDeletionCounter = urlHashs.size(); - urlHashs.clear(); - } + } catch (IOException e) { + serverLog.logSevere("INDEXCLEANER", + "IndexCleaner-Thread: unable to start: " + + e.getMessage()); } serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread stopped"); } @@ -667,9 +675,13 @@ public final class plasmaWordIndex { // System.out.println(new Date(reverseMicroDateDays(microDateDays(System.currentTimeMillis())))); plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555, new serverLog("TESTAPP")); - Iterator iter = index.wordHashes("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true); - while (iter.hasNext()) { - System.out.println("File: " + (String) iter.next()); + try { + Iterator iter = index.wordHashes("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true); + while (iter.hasNext()) { + System.out.println("File: " + (String) iter.next()); + } + } catch (IOException e) { + e.printStackTrace(); } } diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index 843e5d6de..b8ef187f8 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -243,7 +243,7 @@ public final class plasmaWordIndexAssortment { assortments = new kelondroTree(assortmentFile, bufferSize, bufferStructure(assortmentLength), true); } - public Iterator hashes(String startWordHash, boolean up, boolean rot) { + public Iterator hashes(String startWordHash, boolean up, boolean rot) throws IOException { try { return assortments.keys(up, rot, startWordHash.getBytes()); } catch (kelondroException e) { diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index aa769b740..c37ca1e35 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -47,6 +47,7 @@ package de.anomic.plasma; import java.io.File; +import java.io.IOException; import java.util.HashSet; import java.util.Iterator; @@ -235,7 +236,7 @@ public final class plasmaWordIndexAssortmentCluster { return size; } - public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) { + public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) throws IOException { HashSet iterators = new HashSet(); //if (rot) System.out.println("WARNING: kelondroMergeIterator does not work correctly when individual iterators rotate on their own!"); for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, rot));