removed assortments from indexing data structures

removed options to switch on assortments

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3041 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 2372b4fe0c
commit 052f28312a

@ -3,11 +3,11 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.49
#releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseVersion=0.491
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
#releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseFileParentDir=yacy
releaseNr=$Revision$

@ -49,7 +49,6 @@
import java.io.File;
import java.lang.reflect.Method;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.anomic.data.translator;

@ -307,7 +307,6 @@ public class IndexControl_p {
// generate list
if (post.containsKey("keyhashsimilar")) {
try {
final Iterator containerIt = switchboard.wordIndex.indexContainerSet(keyhash, plasmaWordIndex.RL_WORDFILES, true, 256).iterator();
indexContainer container;
int i = 0;
@ -327,9 +326,6 @@ public class IndexControl_p {
prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
prop.put("keyhashsimilar_rows", rows + 1);
prop.put("result", "");
} catch (IOException e) {
prop.put("result", "unknown keys: " + e.getMessage());
}
}
if (post.containsKey("urlstringsearch")) {

@ -166,17 +166,7 @@ public class PerformanceMemory_p {
dfltTotal = 0;
bestTotal = 0;
if (sb.wordIndex.useCollectionIndex) {
prop.put("useRWICache", 0);
} else {
prop.put("useRWICache", 1);
req = sb.wordIndex.size();
chk = sb.wordIndex.assortmentsCacheChunkSizeAvg();
obj = sb.wordIndex.assortmentsCacheObjectSizeAvg();
slt = sb.wordIndex.assortmentsCacheNodeStatus();
ost = sb.wordIndex.assortmentsCacheObjectStatus();
putprop(prop, env, "useRWICache", "RWI", set);
}
req = sb.cacheManager.dbSize();
chk = sb.cacheManager.cacheNodeChunkSize();

@ -205,25 +205,6 @@
</table>
</form>
<p>
<strong>Index Assortments:</strong>
</p>
<table border="0" cellpadding="5" cellspacing="1">
#{assortmentCluster}#
<tr valign="top" class="TableCellDark">
<td>Assortments #[assortmentSlots]#:</td>
<td align="right">#[assortmentSizeA]#</td>
<td align="right">#[assortmentSizeB]#</td>
<td align="right">#[assortmentSizeC]#</td>
<td align="right">#[assortmentSizeD]#</td>
<td align="right">#[assortmentSizeE]#</td>
<td align="right">#[assortmentSizeF]#</td>
<td align="right">#[assortmentSizeG]#</td>
<td align="right">#[assortmentSizeH]#</td>
</tr>
#{/assortmentCluster}#
</table>
<p>
<strong>Proxy Performance Settings:</strong>
</p>

@ -280,24 +280,6 @@ public class PerformanceQueues_p {
prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000"));
prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess);
int[] asizes = switchboard.wordIndex.assortmentsSizes();
if (asizes != null) {
for (int i = 0; i < asizes.length; i += 8) {
prop.put("assortmentCluster_" + (i/8) + "_assortmentSlots", (i + 1) + "-" + (i + 8));
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeA", asizes[i]);
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeB", asizes[i + 1]);
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeC", asizes[i + 2]);
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeD", asizes[i + 3]);
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeE", asizes[i + 4]);
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeF", asizes[i + 5]);
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeG", asizes[i + 6]);
prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeH", asizes[i + 7]);
}
prop.put("assortmentCluster", asizes.length / 8);
} else {
prop.put("assortmentCluster", 0);
}
// table thread pool settings
GenericKeyedObjectPool.Config crawlerPoolConfig = switchboard.cacheLoader.getPoolConfig();
prop.put("pool_0_name","Crawler Pool");

@ -448,7 +448,7 @@ public final class indexRAMRI implements indexRI {
public synchronized indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow, newEntry instanceof indexRWIEntryNew);
if (container == null) container = new indexContainer(wordHash, this.payloadrow, true);
indexRWIEntry[] entries = new indexRWIEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
cache.put(wordHash, container);

@ -80,6 +80,7 @@ import java.util.TreeSet;
import java.util.logging.Logger;
import de.anomic.server.serverMemory;
import de.anomic.server.logging.serverLog;
public class kelondroRecords {
@ -1175,7 +1176,13 @@ public class kelondroRecords {
public Node next0() {
// read Objects until a non-deleted Node appears
while (hasNext0()) {
Node nn = next00();
Node nn;
try {
nn = next00();
} catch (IOException e) {
serverLog.logSevere("kelondroRecords", filename + " failed with " + e.getMessage());
return null;
}
byte[] key = nn.getKey();
if ((key == null) ||
((key.length == 1) && (key[0] == (byte) 0x80)) || // the NUL pointer ('lost' chain terminator)
@ -1193,8 +1200,7 @@ public class kelondroRecords {
return null;
}
public Node next00() {
try {
public Node next00() throws IOException {
// see if the next record is in the bulk, and if not re-fill the bulk
if ((pos.index - bulkstart) >= bulksize) {
bulkstart = pos.index;
@ -1207,10 +1213,6 @@ public class kelondroRecords {
pos.index++;
while ((markedDeleted.contains(pos)) && (pos.index < USAGE.allCount())) pos.index++;
return n;
} catch (IOException e) {
e.printStackTrace();
throw new kelondroException(filename, e.getMessage());
}
}
public void remove() {

@ -88,7 +88,7 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
// init noticeUrlDB
this.log.logInfo("Initializing the source noticeUrlDB");
this.importNurlDB = new plasmaCrawlNURL(this.importPath, ((this.cacheSize*3)/4)/1024, preloadTime, false);
this.importNurlDB = new plasmaCrawlNURL(this.importPath, ((this.cacheSize*3)/4)/1024, preloadTime);
this.importStartSize = this.importNurlDB.size();
//int stackSize = this.importNurlDB.stackSize();

@ -1,7 +1,6 @@
package de.anomic.plasma.dbImport;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.TreeSet;
@ -76,14 +75,10 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
}
this.log.logFine("Initializing source word index db.");
try {
this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, true, (this.cacheSize/2)/1024, preloadTime / 2, this.log, sb.getConfigBool("useCollectionIndex", false));
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, true, (this.cacheSize/2)/1024, preloadTime / 2, this.log);
this.log.logFine("Initializing import URL db.");
this.importUrlDB = new plasmaCrawlLURL(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, false);
this.importUrlDB = new plasmaCrawlLURL(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2);
this.importStartSize = this.importWordIndex.size();
}

@ -46,7 +46,6 @@ package de.anomic.plasma.parser.swf;
import java.io.InputStream;
import de.anomic.net.URL;
import java.util.Hashtable;
import java.util.TreeSet;
import java.util.HashMap;
import pt.tumba.parser.swf.*;
@ -102,7 +101,7 @@ public class swfParser extends AbstractParser implements Parser {
String longTitle = null;
String[] sections = null;
String abstrct = null;
TreeSet images = null;
//TreeSet images = null;
HashMap anchors = new HashMap();
int urls = 0;
int urlStart = -1;

@ -150,11 +150,8 @@ public class plasmaCrawlEURL {
// the class object
private kelondroIndex urlIndexFile = null;
public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime) {
super();
if (newdb) {
String newCacheName = "urlErr3.table";
cachePath.mkdirs();
try {
@ -163,11 +160,6 @@ public class plasmaCrawlEURL {
e.printStackTrace();
System.exit(-1);
}
} else {
File oldCacheFile = new File(cachePath, "urlErr0.db");
oldCacheFile.getParentFile().mkdirs();
urlIndexFile = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, rowdef);
}
}

@ -66,7 +66,6 @@ import de.anomic.index.indexRWIEntry;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexURLEntry;
import de.anomic.index.indexURLEntryNew;
import de.anomic.index.indexURLEntryOld;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroFlexSplitTable;
@ -93,23 +92,14 @@ public final class plasmaCrawlLURL {
private final LinkedList lcrawlResultStack; // 5 - local index: result of local crawling
private final LinkedList gcrawlResultStack; // 6 - local index: triggered external
private boolean newdb;
// the class object
private kelondroIndex urlIndexFile = null;
public plasmaCrawlLURL(File plasmaPath, File indexPath, int bufferkb, long preloadTime, boolean newdb) {
public plasmaCrawlLURL(File plasmaPath, File indexPath, int bufferkb, long preloadTime) {
super();
this.newdb = newdb;
try {
if (newdb) {
urlIndexFile = new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", bufferkb * 0x400, preloadTime, indexURLEntryNew.rowdef, kelondroBase64Order.enhancedCoder);
} else {
File oldLURLDB = new File(plasmaPath, "urlHash.db");
oldLURLDB.getParentFile().mkdirs();
urlIndexFile = new kelondroCache(new kelondroTree(oldLURLDB, bufferkb / 2 * 0x400, preloadTime, indexURLEntryOld.rowdef), bufferkb / 2 * 0x400, true, false);
}
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
@ -216,10 +206,7 @@ public final class plasmaCrawlLURL {
try {
kelondroRow.Entry entry = urlIndexFile.get(urlHash.getBytes());
if (entry == null) return null;
if (newdb)
return new indexURLEntryNew(entry, searchedWord);
else
return new indexURLEntryOld(entry, searchedWord);
} catch (IOException e) {
return null;
}
@ -250,10 +237,7 @@ public final class plasmaCrawlLURL {
public synchronized indexURLEntry newEntry(String propStr) {
if (propStr.startsWith("{") && propStr.endsWith("}")) {
if (newdb)
return new indexURLEntryNew(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)));
else
return new indexURLEntryOld(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)));
} else {
return null;
}
@ -281,12 +265,8 @@ public final class plasmaCrawlLURL {
int limage,
int lvideo,
int lapp) {
if (newdb)
return new indexURLEntryNew(url, descr, author, tags, ETag, mod, load, fresh, referrer, md5,
size, wc, dt, flags, lang, llocal, lother, laudio, limage, lvideo, lapp);
else
return new indexURLEntryOld(url, descr, author, tags, ETag, mod, load, fresh, referrer, md5,
size, wc, dt, flags, lang, llocal, lother, laudio, limage, lvideo, lapp);
}
public synchronized int getStackSize(int stack) {
@ -411,14 +391,7 @@ public final class plasmaCrawlLURL {
public Object next() throws RuntimeException {
kelondroRow.Entry e = (kelondroRow.Entry) i.next();
if (e == null) return null;
try {
if (newdb)
return new indexURLEntryNew(e, null);
else
return new indexURLEntryOld(e, null);
} catch (IOException ex) {
throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0, null));
}
}
public void remove() {
@ -610,7 +583,7 @@ public final class plasmaCrawlLURL {
} catch (MalformedURLException e) {}
if (args[0].equals("-l")) try {
// arg 1 is path to URLCache
final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), new File(args[2]), 1, 0, false);
final plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), new File(args[2]), 1, 0);
final Iterator enu = urls.entries(true, false, null);
while (enu.hasNext()) {
System.out.println(((indexURLEntry) enu.next()).toString());

@ -106,13 +106,12 @@ public class plasmaCrawlNURL {
private File cacheStacksPath;
private int bufferkb;
private long preloadTime;
private boolean newdb;
initStackIndex initThead;
// the class object
private kelondroIndex urlIndexFile = null;
public plasmaCrawlNURL(File cachePath, int bufferkb, long preloadTime, boolean newdb) {
public plasmaCrawlNURL(File cachePath, int bufferkb, long preloadTime) {
super();
this.cacheStacksPath = cachePath;
this.bufferkb = bufferkb;
@ -120,7 +119,6 @@ public class plasmaCrawlNURL {
// create a stack for newly entered entries
if (!(cachePath.exists())) cachePath.mkdir(); // make the path
this.newdb = newdb;
openHashCache();
File coreStackFile = new File(cachePath, "urlNoticeLocal0.stack");
@ -195,7 +193,6 @@ public class plasmaCrawlNURL {
}
private void openHashCache() {
if (newdb) {
String newCacheName = "urlNotice5.table";
cacheStacksPath.mkdirs();
try {
@ -204,16 +201,6 @@ public class plasmaCrawlNURL {
e.printStackTrace();
System.exit(-1);
}
} else {
File oldCacheFile = new File(cacheStacksPath, "urlNotice2.db");
oldCacheFile.getParentFile().mkdirs();
try {
urlIndexFile = new kelondroCache(kelondroTree.open(oldCacheFile, bufferkb / 2 * 0x400, preloadTime, rowdef), bufferkb / 2 * 0x400, true, true);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
}
}
private void resetHashCache() {

@ -41,7 +41,6 @@
package de.anomic.plasma;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@ -278,12 +277,6 @@ public class plasmaDHTChunk {
urlCache = new HashMap();
this.status = chunkStatus_FAILED;
return 0;
} catch (IOException e) {
log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e);
indexContainers = new indexContainer[0];
urlCache = new HashMap();
this.status = chunkStatus_FAILED;
return 0;
}
}

@ -237,7 +237,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public dbImportManager dbImportManager;
public plasmaDHTFlush transferIdxThread = null;
private plasmaDHTChunk dhtTransferChunk = null;
private boolean newIndex;
/*
* Remote Proxy configuration
@ -431,17 +430,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// start indexing management
log.logConfig("Starting Indexing Management");
urlPool = new plasmaURLPool(plasmaPath, indexPath,
ramLURL, getConfigBool("useFlexTableForLURL", false),
ramNURL, getConfigBool("useFlexTableForNURL", false),
ramEURL, getConfigBool("useFlexTableForEURL", true),
ramLURL,
ramNURL,
ramEURL,
ramLURL_time);
newIndex = getConfigBool("useCollectionIndex", false);
try {
wordIndex = new plasmaWordIndex(plasmaPath, indexPath, true, ramRWI, ramRWI_time, log, newIndex);
} catch (IOException e1) {
e1.printStackTrace();
System.exit(-1);
}
wordIndex = new plasmaWordIndex(plasmaPath, indexPath, true, ramRWI, ramRWI_time, log);
// set a high maximum cache size to current size; this is adopted later automatically
int wordCacheMaxCount = Math.max((int) getConfigLong("wordCacheInitCount", 30000),

@ -59,13 +59,13 @@ public class plasmaURLPool {
public final plasmaCrawlEURL errorURL;
public plasmaURLPool(File plasmaPath, File indexPath,
int ramLURL, boolean newLURL,
int ramNURL, boolean newNURL,
int ramEURL, boolean newEURL,
int ramLURL,
int ramNURL,
int ramEURL,
long preloadTime) {
loadedURL = new plasmaCrawlLURL(plasmaPath, indexPath, ramLURL, preloadTime, newLURL);
noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, -1, newNURL);
errorURL = new plasmaCrawlEURL(plasmaPath, ramEURL, -1, newEURL);
loadedURL = new plasmaCrawlLURL(plasmaPath, indexPath, ramLURL, preloadTime);
noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL, -1);
errorURL = new plasmaCrawlEURL(plasmaPath, ramEURL, -1);
}
public String exists(String hash) {

@ -59,8 +59,6 @@ import de.anomic.yacy.yacyDHTAction;
public final class plasmaWordIndex implements indexRI {
private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64;
private static final kelondroRow payloadrowold = indexRWIEntryOld.urlEntryRow;
private static final kelondroRow payloadrownew = indexRWIEntryNew.urlEntryRow;
@ -68,50 +66,28 @@ public final class plasmaWordIndex implements indexRI {
private final kelondroOrder indexOrder = kelondroBase64Order.enhancedCoder;
private final indexRAMRI dhtOutCache, dhtInCache;
private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster
private int assortmentBufferSize; // kb
private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI
private final plasmaWordIndexFileCluster backend; // old database structure, to be replaced by CollectionRI
public boolean busyCacheFlush; // shows if a cache flush is currently performed
public boolean useCollectionIndex; // flag for usage of new collectionIndex db
private int idleDivisor, busyDivisor;
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) throws IOException {
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log) {
this.oldDatabaseRoot = oldDatabaseRoot;
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, payloadrowold, log);
File textindexcache = new File(newIndexRoot, "PUBLIC/TEXT/RICACHE");
if (!(textindexcache.exists())) textindexcache.mkdirs();
if (useCollectionIndex) {
this.dhtOutCache = new indexRAMRI(textindexcache, payloadrownew, 1024, "dump1.array", log, true);
this.dhtInCache = new indexRAMRI(textindexcache, payloadrownew, 1024, "dump2.array", log, true);
} else {
this.dhtOutCache = new indexRAMRI(oldDatabaseRoot, payloadrowold, 64, "indexDump1.array", log, false);
this.dhtInCache = new indexRAMRI(oldDatabaseRoot, payloadrowold, 64, "indexDump2.array", log, false);
}
// create assortment cluster path
File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath);
this.assortmentBufferSize = bufferkb;
// create collections storage path
File textindexcollections = new File(newIndexRoot, "PUBLIC/TEXT/RICOLLECTION");
if (!(textindexcollections.exists())) textindexcollections.mkdirs();
if (useCollectionIndex) {
this.collections = new indexCollectionRI(textindexcollections, "collection", bufferkb * 1024, preloadTime, payloadrownew);
this.assortmentCluster = null;
} else {
this.collections = null;
if (!(assortmentClusterPath.exists())) assortmentClusterPath.mkdirs();
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrowold, assortmentBufferSize, preloadTime, log);
}
busyCacheFlush = false;
this.useCollectionIndex = useCollectionIndex;
this.busyDivisor = 5000;
this.idleDivisor = 420;
}
public kelondroRow payloadrow() {
if (useCollectionIndex) return payloadrownew; else return payloadrowold;
return payloadrownew;
}
public indexRWIEntry newRWIEntry(
@ -135,14 +111,9 @@ public final class plasmaWordIndex implements indexRI {
int outlinksSame,
int outlinksOther,
kelondroBitfield flags ) {
if (useCollectionIndex)
return new indexRWIEntryNew(urlHash, urlLength, urlComps, titleLength, hitcount, wordcount, phrasecount,
posintext, posinphrase, posofphrase, worddistance, sizeOfPage, lastmodified, updatetime, quality, language, doctype,
outlinksSame, outlinksOther, flags);
else
return new indexRWIEntryOld(urlHash, urlLength, urlComps, titleLength, hitcount, wordcount, phrasecount,
posintext, posinphrase, posofphrase, worddistance, sizeOfPage, lastmodified, updatetime, quality, language, doctype,
outlinksSame, outlinksOther, false);
}
public File getRoot() {
@ -181,28 +152,6 @@ public final class plasmaWordIndex implements indexRI {
return dhtInCache.size();
}
public int[] assortmentsSizes() {
return (assortmentCluster == null) ? null : assortmentCluster.sizes();
}
public int assortmentsCacheChunkSizeAvg() {
return (assortmentCluster == null) ? 0 : assortmentCluster.cacheChunkSizeAvg();
}
public int assortmentsCacheObjectSizeAvg() {
return (assortmentCluster == null) ? 0 : assortmentCluster.cacheObjectSizeAvg();
}
public int[] assortmentsCacheNodeStatus() {
if (assortmentCluster != null) return assortmentCluster.cacheNodeStatus();
return new int[]{0,0,0,0,0,0,0,0,0,0};
}
public long[] assortmentsCacheObjectStatus() {
if (assortmentCluster != null) return assortmentCluster.cacheObjectStatus();
return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
}
public void setMaxWordCount(int maxWords) {
dhtOutCache.setMaxWordCount(maxWords);
}
@ -235,11 +184,11 @@ public final class plasmaWordIndex implements indexRI {
}
public indexContainer emptyContainer(String wordHash) {
return new indexContainer(wordHash, payloadrow(), useCollectionIndex);
return new indexContainer(wordHash, payloadrow(), true);
}
public indexContainer addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) {
if ((useCollectionIndex) && (entry instanceof indexRWIEntryOld)) {
if (entry instanceof indexRWIEntryOld) {
if (entry.urlHash() == null) return null;
entry = new indexRWIEntryNew((indexRWIEntryOld) entry);
}
@ -259,7 +208,7 @@ public final class plasmaWordIndex implements indexRI {
private indexContainer convertOld2New(indexContainer entries) {
// convert old entries to new entries
indexContainer newentries = new indexContainer(entries.getWordHash(), payloadrownew, useCollectionIndex);
indexContainer newentries = new indexContainer(entries.getWordHash(), payloadrownew, true);
Iterator i = entries.entries();
indexRWIEntryOld old;
while (i.hasNext()) {
@ -272,7 +221,7 @@ public final class plasmaWordIndex implements indexRI {
}
public indexContainer addEntries(indexContainer entries, long updateTime, boolean dhtInCase) {
if ((useCollectionIndex) && (entries.row().objectsize() == payloadrowold.objectsize())) entries = convertOld2New(entries);
if (entries.row().objectsize() == payloadrowold.objectsize()) entries = convertOld2New(entries);
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(entries.getWordHash()))) dhtInCase = true;
@ -297,7 +246,7 @@ public final class plasmaWordIndex implements indexRI {
if (flushCount > 100) flushCount = 100;
if (flushCount < 1) flushCount = Math.min(1, ram.size());
flushCache(ram, flushCount);
while (ram.maxURLinCache() > ((useCollectionIndex) ? 1024 : 64)) flushCache(ram, 1);
while (ram.maxURLinCache() >= 2040) flushCache(ram, 1);
}
private void flushCache(indexRAMRI ram, int count) {
@ -315,17 +264,10 @@ public final class plasmaWordIndex implements indexRI {
// flush the wordHash
indexContainer c = ram.deleteContainer(wordHash);
if (c != null) {
if (useCollectionIndex) {
indexContainer feedback = collections.addEntries(c, c.updated(), false);
if (feedback != null) {
throw new RuntimeException("indexCollectionRI shall not return feedback entries; feedback = " + feedback.toString());
}
} else {
indexContainer feedback = assortmentCluster.addEntries(c, c.updated(), false);
if (feedback != null) {
backend.addEntries(feedback, System.currentTimeMillis(), true);
}
}
}
// pause to next loop to give other processes a chance to use IO
@ -413,7 +355,6 @@ public final class plasmaWordIndex implements indexRI {
}
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis();
// get from cache
indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, -1);
@ -424,34 +365,11 @@ public final class plasmaWordIndex implements indexRI {
}
// get from collection index
if (useCollectionIndex) {
if (container == null) {
container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
}
} else {
// get from assortments
if (assortmentCluster != null) {
if (container == null) {
container = assortmentCluster.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
} else {
// add containers from assortment cluster
container.add(assortmentCluster.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
}
}
// get from backend
if (maxTime > 0) {
maxTime = maxTime - (System.currentTimeMillis() - start);
if (maxTime < 0) maxTime = 100;
}
if (container == null) {
container = backend.getContainer(wordHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(backend.getContainer(wordHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1);
}
}
return container;
}
@ -486,28 +404,14 @@ public final class plasmaWordIndex implements indexRI {
}
public int size() {
if (useCollectionIndex)
return java.lang.Math.max(collections.size(), java.lang.Math.max(dhtInCache.size(), dhtOutCache.size()));
else
return java.lang.Math.max((assortmentCluster == null) ? 0 : assortmentCluster.size(),
java.lang.Math.max(backend.size(),
java.lang.Math.max(dhtInCache.size(), dhtOutCache.size())));
}
public int indexSize(String wordHash) {
int size = 0;
size += dhtInCache.indexSize(wordHash);
size += dhtOutCache.indexSize(wordHash);
if (useCollectionIndex) {
size += collections.indexSize(wordHash);
} else try {
size += (assortmentCluster == null) ? 0 : assortmentCluster.indexSize(wordHash);
plasmaWordIndexFile entity = backend.getEntity(wordHash, true, -1);
if (entity != null) {
size += entity.size();
entity.close();
}
} catch (IOException e) {}
return size;
}
@ -515,25 +419,15 @@ public final class plasmaWordIndex implements indexRI {
synchronized (this) {
dhtInCache.close(waitingBoundSeconds);
dhtOutCache.close(waitingBoundSeconds);
if (useCollectionIndex) {
collections.close(-1);
} else {
if (assortmentCluster != null) assortmentCluster.close(-1);
backend.close(10);
}
}
}
public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, payloadrow(), useCollectionIndex);
indexContainer c = new indexContainer(wordHash, payloadrow(), true);
c.add(dhtInCache.deleteContainer(wordHash), -1);
c.add(dhtOutCache.deleteContainer(wordHash), -1);
if (useCollectionIndex) {
c.add(collections.deleteContainer(wordHash), -1);
} else {
if (assortmentCluster != null) c.add(assortmentCluster.deleteContainer(wordHash), -1);
c.add(backend.deleteContainer(wordHash), -1);
}
return c;
}
@ -541,12 +435,7 @@ public final class plasmaWordIndex implements indexRI {
boolean removed = false;
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete));
if (useCollectionIndex) {
removed = removed | (collections.removeEntry(wordHash, urlHash, deleteComplete));
} else {
if (assortmentCluster != null) removed = removed | (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | backend.removeEntry(wordHash, urlHash, deleteComplete);
}
return removed;
}
@ -554,12 +443,7 @@ public final class plasmaWordIndex implements indexRI {
int removed = 0;
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete);
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete);
if (useCollectionIndex) {
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
} else if (assortmentCluster != null) {
removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete);
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
}
return removed;
}
@ -567,12 +451,7 @@ public final class plasmaWordIndex implements indexRI {
String removed = "";
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
if (useCollectionIndex) {
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
} else {
if (assortmentCluster != null) removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
}
return removed;
}
@ -589,7 +468,7 @@ public final class plasmaWordIndex implements indexRI {
return dhtInCache.tryRemoveURLs(urlHash) | dhtOutCache.tryRemoveURLs(urlHash);
}
public TreeSet indexContainerSet(String startHash, int resourceLevel, boolean rot, int count) throws IOException {
public TreeSet indexContainerSet(String startHash, int resourceLevel, boolean rot, int count) {
// creates a set of indexContainers
// this does not use the dhtInCache
kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
@ -610,62 +489,33 @@ public final class plasmaWordIndex implements indexRI {
public Iterator wordContainers(String startHash, boolean rot) {
// returns an iteration of indexContainers
try {
return wordContainers(startHash, RL_WORDFILES, rot);
} catch (IOException e) {
return new HashSet().iterator();
}
}
public Iterator wordContainers(String startHash, int resourceLevel, boolean rot) throws IOException {
public Iterator wordContainers(String startHash, int resourceLevel, boolean rot) {
if (rot) return new rotatingContainerIterator(startHash, resourceLevel);
else return wordContainers(startHash, resourceLevel);
}
private Iterator wordContainers(String startWordHash, int resourceLevel) throws IOException {
private Iterator wordContainers(String startWordHash, int resourceLevel) {
kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
containerOrder.rotate(startWordHash.getBytes());
if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) {
return dhtOutCache.wordContainers(startWordHash, false);
}
if (useCollectionIndex) {
return new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
containerOrder,
indexContainer.containerMergeMethod,
true);
} else {
if (resourceLevel == plasmaWordIndex.RL_ASSORTMENTS) {
return new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
(assortmentCluster == null) ? null : assortmentCluster.wordContainers(startWordHash, true, false),
containerOrder,
indexContainer.containerMergeMethod,
true);
}
if (resourceLevel == plasmaWordIndex.RL_WORDFILES) {
return new kelondroMergeIterator(
new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
(assortmentCluster == null) ? null : assortmentCluster.wordContainers(startWordHash, true, false),
containerOrder,
indexContainer.containerMergeMethod,
true),
backend.wordContainers(startWordHash, false),
containerOrder,
indexContainer.containerMergeMethod,
true);
}
}
return null;
}
public class rotatingContainerIterator implements Iterator {
Iterator i;
int resourceLevel;
public rotatingContainerIterator(String startWordHash, int resourceLevel) throws IOException {
public rotatingContainerIterator(String startWordHash, int resourceLevel) {
this.resourceLevel = resourceLevel;
i = wordContainers(startWordHash, resourceLevel);
}
@ -676,11 +526,9 @@ public final class plasmaWordIndex implements indexRI {
public boolean hasNext() {
if (i.hasNext()) return true;
else try {
else {
i = wordContainers("------------", resourceLevel);
return i.hasNext();
} catch (IOException e) {
return false;
}
}
@ -693,57 +541,6 @@ public final class plasmaWordIndex implements indexRI {
}
} // class rotatingContainerIterator
public Object migrateWords2Assortment(String wordhash) throws IOException {
// returns the number of entries that had been added to the assortments
// can be negative if some assortments have been moved to the backend
File db = plasmaWordIndexFile.wordHash2path(oldDatabaseRoot, wordhash);
if (!(db.exists())) return "not available";
plasmaWordIndexFile entity = null;
try {
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true);
int size = entity.size();
if (size > assortmentCluster.clusterCapacity) {
// this will be too big to integrate it
entity.close(); entity = null;
return "too big";
} else {
// take out all words from the assortment to see if it fits
// together with the extracted assortment
indexContainer container = assortmentCluster.deleteContainer(wordhash, -1);
if (size + container.size() > assortmentCluster.clusterCapacity) {
// this will also be too big to integrate, add to entity
entity.addEntries(container);
entity.close(); entity = null;
return new Integer(-container.size());
} else {
// the combined container will fit, read the container
try {
Iterator entries = entity.elements(true);
indexRWIEntry entry;
while (entries.hasNext()) {
entry = (indexRWIEntry) entries.next();
// System.out.println("ENTRY = " + entry.getUrlHash());
container.add(new indexRWIEntry[]{entry}, System.currentTimeMillis());
}
// we have read all elements, now delete the entity
entity.deleteComplete();
entity.close(); entity = null;
// integrate the container into the assortments; this will work
assortmentCluster.addEntries(container, container.updated(), false);
return new Integer(size);
} catch (kelondroException e) {
// database corrupted, we simply give up the database and delete it
try {entity.close();} catch (Exception ee) {} entity = null;
try {db.delete();} catch (Exception ee) {}
return "database corrupted; deleted";
}
}
}
} finally {
if (entity != null) try {entity.close();}catch(Exception e){}
}
}
public Object migrateWords2index(String wordhash) throws IOException {
// returns the number of entries that had been added to the assortments
// can be negative if some assortments have been moved to the backend
@ -753,7 +550,7 @@ public final class plasmaWordIndex implements indexRI {
try {
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true);
int size = entity.size();
indexContainer container = new indexContainer(wordhash, payloadrow(), useCollectionIndex);
indexContainer container = new indexContainer(wordhash, payloadrow(), true);
try {
Iterator entries = entity.elements(true);
@ -812,7 +609,6 @@ public final class plasmaWordIndex implements indexRI {
indexRWIEntry entry = null;
URL url = null;
HashSet urlHashs = new HashSet();
try {
Iterator indexContainerIterator = indexContainerSet(startHash, plasmaWordIndex.RL_WORDFILES, false, 100).iterator();
while (indexContainerIterator.hasNext() && run) {
waiter();
@ -822,7 +618,8 @@ public final class plasmaWordIndex implements indexRI {
while (containerIterator.hasNext() && run) {
waiter();
entry = (indexRWIEntry) containerIterator.next();
// System.out.println("Wordhash: "+wordHash+" UrlHash: "+entry.getUrlHash());
// System.out.println("Wordhash: "+wordHash+" UrlHash:
// "+entry.getUrlHash());
indexURLEntry ue = lurl.load(entry.urlHash(), null);
if (ue == null) {
urlHashs.add(entry.urlHash());
@ -850,11 +647,6 @@ public final class plasmaWordIndex implements indexRI {
}
}
}
} catch (IOException e) {
serverLog.logSevere("INDEXCLEANER",
"IndexCleaner-Thread: unable to start: "
+ e.getMessage());
}
serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread stopped");
}
@ -903,16 +695,11 @@ public final class plasmaWordIndex implements indexRI {
// System.out.println(new Date(reverseMicroDateDays(microDateDays(System.currentTimeMillis()))));
File plasmadb = new File("D:\\dev\\proxy\\DATA\\PLASMADB");
File indexdb = new File("D:\\dev\\proxy\\DATA\\INDEX");
try {
plasmaWordIndex index = new plasmaWordIndex(plasmadb, indexdb, true, 555, 1000, new serverLog("TESTAPP"), false);
plasmaWordIndex index = new plasmaWordIndex(plasmadb, indexdb, true, 555, 1000, new serverLog("TESTAPP"));
Iterator containerIter = index.wordContainers("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true);
while (containerIter.hasNext()) {
System.out.println("File: " + (indexContainer) containerIter.next());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

@ -68,7 +68,6 @@ import org.apache.axis.attachments.Attachments;
import org.w3c.dom.Document;
import de.anomic.data.listManager;
import de.anomic.http.httpd;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
@ -501,11 +500,13 @@ public class BlacklistService extends AbstractService {
};
}
/* not used
private String[] getSharedBlacklistArray() {
String sharedBlacklists = this.switchboard.getConfig(BLACKLIST_SHARED, "");
String[] supportedBlacklistTypeArray = sharedBlacklists.split(",");
return supportedBlacklistTypeArray;
}
*/
private File getBlacklistFile(String blacklistName) {
File blacklistFile = new File(listManager.listsPath, blacklistName);
@ -517,10 +518,12 @@ public class BlacklistService extends AbstractService {
return blacklistFile.exists();
}
/* not used
private HashSet getSharedBlacklistSet() {
HashSet supportedTypesSet = new HashSet(Arrays.asList(getSharedBlacklistArray()));
return supportedTypesSet;
}
*/
private String[] getSupportedBlacklistTypeArray() {
String supportedBlacklistTypesStr = this.switchboard.getConfig(BLACKLISTS_TYPES, "");
@ -555,10 +558,12 @@ public class BlacklistService extends AbstractService {
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig(LIST_MANAGER_LISTS_PATH, "DATA/LISTS"));
}
/* not used
private void ativateBlacklistForAllTypes(String blacklistName) {
String[] supportedBlacklistTypes = getSupportedBlacklistTypeArray();
this.activateBlacklistForTypes(blacklistName,supportedBlacklistTypes);
}
*/
private void activateBlacklistForTypes(String blacklistName, String[] activateForBlacklistTypes) {
if (activateForBlacklistTypes == null) return;

@ -552,7 +552,7 @@ public final class yacyClient {
}
// add the url entry to the word indexes
for (int m = 0; m < words; m++) {
if ((wordIndex.useCollectionIndex) && (entry instanceof indexRWIEntryOld)) {
if (entry instanceof indexRWIEntryOld) {
if (entry.urlHash() == null) continue;
entry = new indexRWIEntryNew((indexRWIEntryOld) entry);
}

@ -95,8 +95,6 @@ import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverMemory;
import de.anomic.server.serverPlainSwitch;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverSystem;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.enumerateFiles;
@ -652,20 +650,13 @@ public final class yacy {
*/
public static void migrateWords(String homePath) {
// run with "java -classpath classes yacy -migratewords"
final serverSwitch sps = new serverPlainSwitch(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
File dbroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX");
serverLog log = new serverLog("WORDMIGRATION");
log.logInfo("STARTING MIGRATION");
boolean useCollectionIndex = sps.getConfigBool("useCollectionIndex", false);
plasmaWordIndex wordIndexCache = null;
try {
wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, true, 20000, 10000, log, useCollectionIndex);
} catch (IOException e1) {
e1.printStackTrace();
System.exit(-1);
}
wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, true, 20000, 10000, log);
enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true);
String wordhash;
File wordfile;
@ -675,10 +666,7 @@ public final class yacy {
wordfile = (File) words.nextElement();
wordhash = wordfile.getName().substring(0, 12);
// System.out.println("NOW: " + wordhash);
if (useCollectionIndex)
migrationStatus = wordIndexCache.migrateWords2index(wordhash);
else
migrationStatus = wordIndexCache.migrateWords2Assortment(wordhash);
if (migrationStatus instanceof Integer) {
int migrationCount = ((Integer) migrationStatus).intValue();
if (migrationCount == 0)
@ -704,7 +692,6 @@ public final class yacy {
*/
public static void minimizeUrlDB(String homePath, int dbcache) {
// run with "java -classpath classes yacy -minimizeUrlDB"
final serverSwitch sps = new serverPlainSwitch(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
File plasmaroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX");
@ -715,16 +702,16 @@ public final class yacy {
// db containing all currently loades urls
int cache = dbcache * 1024; // in KB
log.logFine("URLDB-Caches: "+cache+" bytes");
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(plasmaroot, indexRoot, cache, 10000, false);
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(plasmaroot, indexRoot, cache, 10000);
// db used to hold all neede urls
plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(plasmaroot, "minimized"), indexRoot, cache, 10000, false);
plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(new File(plasmaroot, "minimized"), indexRoot, cache, 10000);
Runtime rt = Runtime.getRuntime();
int cacheMem = (int)((serverMemory.max-rt.totalMemory())/1024)-(2*cache + 8*1024);
if (cacheMem < 2048) throw new OutOfMemoryError("Not enough memory available to start clean up.");
plasmaWordIndex wordIndex = new plasmaWordIndex(plasmaroot, indexRoot, true, cacheMem, 10000, log, sps.getConfigBool("useCollectionIndex", false));
plasmaWordIndex wordIndex = new plasmaWordIndex(plasmaroot, indexRoot, true, cacheMem, 10000, log);
Iterator indexContainerIterator = wordIndex.wordContainers("------------", plasmaWordIndex.RL_WORDFILES, false);
long urlCounter = 0, wordCounter = 0;
@ -954,7 +941,7 @@ public final class yacy {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, false, 1000, false, 1000, false, 10000);
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, 1000, 1000, 10000);
HashMap doms = new HashMap();
System.out.println("Started domain list extraction from " + pool.loadedURL.size() + " url entries.");
System.out.println("a dump will be written after double-check of all extracted domains.");
@ -1070,7 +1057,7 @@ public final class yacy {
private static void urllist(String homePath, String source, boolean html, String targetName) {
File root = new File(homePath);
try {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, false, 1000, false, 1000, false, 10000);
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, 1000, 1000, 10000);
File file = new File(root, targetName);
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
@ -1131,7 +1118,7 @@ public final class yacy {
}
private static void migratelurls(File root, File urlHash) {
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, true, 1000, true, 1000, true, 10000);
plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, 1000, 1000, 10000);
kelondroTree oldindex = null;
try {
oldindex = new kelondroTree(urlHash, 1000, -1, indexURLEntryOld.rowdef);
@ -1211,7 +1198,7 @@ public final class yacy {
serverLog log = new serverLog("URLDBCLEANUP");
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
try {
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(plasmaroot, indexroot, 4194304, 10000, false);
plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(plasmaroot, indexroot, 4194304, 10000);
currentUrlDB.urldbcleanup();
currentUrlDB.close();
} catch (IOException e) {
@ -1222,7 +1209,6 @@ public final class yacy {
private static void RWIHashList(String homePath, String targetName, String resource, String format) {
plasmaWordIndex WordIndex = null;
serverLog log = new serverLog("HASHLIST");
final serverSwitch sps = new serverPlainSwitch(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
File homeDBroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX");
String wordChunkStartHash = "------------";
@ -1232,7 +1218,7 @@ public final class yacy {
try {
Iterator indexContainerIterator = null;
if (resource.equals("all")) {
WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, true, 8*1024*1024, 3000, log, sps.getConfigBool("useCollectionIndex", false));
WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, true, 8*1024*1024, 3000, log);
indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false);
} else if (resource.equals("assortments")) {
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, indexRWIEntryOld.urlEntryRow, 16*1024*1024, 3000, log);

@ -818,10 +818,6 @@ currentSkin=
# temporary flag for new database structure. set only true for testing
# ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION
# table-types: RAM = 0, TREE = 1, FLEX = 2;
useCollectionIndex=true
useFlexTableForNURL=true
useFlexTableForEURL=true
useFlexTableForLURL=true
tableTypeForPreNURL=2
# flag to show surftipps on index.html page

Loading…
Cancel
Save