diff --git a/source/de/anomic/data/URLAnalysis.java b/source/de/anomic/data/URLAnalysis.java index 2edf1590f..b3a63123b 100644 --- a/source/de/anomic/data/URLAnalysis.java +++ b/source/de/anomic/data/URLAnalysis.java @@ -401,20 +401,20 @@ public class URLAnalysis { plasmaWordIndex.wordReferenceFactory, Base64Order.enhancedCoder, WordReferenceRow.urlEntryRow); - System.out.println("COLLECTION INDEX REFERENCE COLLECTION starting dump of statistics"); + System.out.println("INDEX REFERENCE COLLECTION starting dump of statistics"); idx.dump(new File(statisticPath)); - System.out.println("COLLECTION INDEX REFERENCE COLLECTION finished dump, wrote " + idx.size() + " entries to " + statisticPath); + System.out.println("INDEX REFERENCE COLLECTION finished dump, wrote " + idx.size() + " entries to " + statisticPath); } catch (IOException e) { e.printStackTrace(); } } public static int diffurlcol(String metadataPath, String statisticFile, String diffFile) throws IOException { - System.out.println("COLLECTION INDEX DIFF URL-COL startup"); + System.out.println("INDEX DIFF URL-COL startup"); IntegerHandleIndex idx = new IntegerHandleIndex(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, new File(statisticFile), 0); MetadataRepository mr = new MetadataRepository(new File(metadataPath)); HandleSet hs = new HandleSet(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, 0, 1000000); - System.out.println("COLLECTION INDEX DIFF URL-COL loaded dump, starting diff"); + System.out.println("INDEX DIFF URL-COL loaded dump, starting diff"); long start = System.currentTimeMillis(); long update = start - 7000; int c = 0; @@ -425,14 +425,14 @@ public class URLAnalysis { } c++; if (System.currentTimeMillis() - update > 10000) { - System.out.println("COLLECTION INDEX DIFF URL-COL running, checked " + c + ", found " + hs.size() + " missing references so far, " + (((System.currentTimeMillis() - start) * (mr.size() - c) / c) / 60000) + " minutes remaining"); + System.out.println("INDEX DIFF URL-COL running, checked " + c + ", found " + hs.size() + " missing references so far, " + (((System.currentTimeMillis() - start) * (mr.size() - c) / c) / 60000) + " minutes remaining"); update = System.currentTimeMillis(); } } mr.close(); - System.out.println("COLLECTION INDEX DIFF URL-COL finished diff, starting dump to " + diffFile); + System.out.println("INDEX DIFF URL-COL finished diff, starting dump to " + diffFile); c = hs.dump(new File(diffFile)); - System.out.println("COLLECTION INDEX DIFF URL-COL finished dump, wrote " + c + " references that occur in the URL-DB, but not in the collection-dump"); + System.out.println("INDEX DIFF URL-COL finished dump, wrote " + c + " references that occur in the URL-DB, but not in the collection-dump"); return c; } diff --git a/source/de/anomic/kelondro/text/ReferenceContainerArray.java b/source/de/anomic/kelondro/text/ReferenceContainerArray.java index 0f1a07e44..7ed2e63f6 100644 --- a/source/de/anomic/kelondro/text/ReferenceContainerArray.java +++ b/source/de/anomic/kelondro/text/ReferenceContainerArray.java @@ -294,7 +294,7 @@ public final class ReferenceContainerArray { IntegerHandleIndex references = new IntegerHandleIndex(payloadrow.primaryKeyLength, termOrder, 0, 1000000); String[] files = heapLocation.list(); for (String f: files) { - if (f.length() < 22 && !f.startsWith("index") && !f.endsWith(".blob")) continue; + if (f.length() < 22 || !f.startsWith("index") || !f.endsWith(".blob")) continue; File fl = new File(heapLocation, f); System.out.println("CELL REFERENCE COLLECTION opening blob " + fl); CloneableIterator> ei = new ReferenceContainerCache.blobFileEntries(fl, factory, payloadrow);