- added kelondroTree index option to kelondroFlexTable

- automatic generation of index file when index is too large for RAM


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2261 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent dd2865178a
commit 6af70febef

@ -187,7 +187,7 @@ public class dbtest {
} }
if (dbe.equals("kelondroFlexTable")) { if (dbe.equals("kelondroFlexTable")) {
File tablepath = new File(tablename).getParentFile(); File tablepath = new File(tablename).getParentFile();
table = new kelondroFlexTable(tablepath, new File(tablename).getName(), testRow, true); table = new kelondroFlexTable(tablepath, new File(tablename).getName(), buffer, testRow, true);
} }
if (dbe.equals("mysql")) { if (dbe.equals("mysql")) {
table = new dbTable("mysql", testRow); table = new dbTable("mysql", testRow);

@ -24,45 +24,42 @@
package de.anomic.kelondro; package de.anomic.kelondro;
public class kelondroBytesIntMap extends kelondroRowBufferedSet { import java.io.IOException;
public class kelondroBytesIntMap {
private kelondroIndex ki;
public kelondroBytesIntMap(int keySize, int initSize) { public kelondroBytesIntMap(kelondroIndex ki) throws IOException {
super(new kelondroRow(new int[]{keySize, 4}), initSize); assert (ki.row().columns() == 2); // must be a key/index relation
assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long
// initialize ordering this.ki = ki;
super.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
} }
public int geti(byte[] key) { public int geti(byte[] key) throws IOException {
kelondroRow.Entry indexentry = super.get(key); kelondroRow.Entry indexentry = ki.get(key);
if (indexentry == null) return -1; if (indexentry == null) return -1;
return (int) indexentry.getColLongB256(1); return (int) indexentry.getColLongB256(1);
} }
public int puti(byte[] key, int i) { public int puti(byte[] key, int i) throws IOException {
kelondroRow.Entry newentry = rowdef.newEntry(); kelondroRow.Entry newentry = ki.row().newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setColLongB256(1, i); newentry.setColLongB256(1, i);
kelondroRow.Entry oldentry = super.put(newentry); kelondroRow.Entry oldentry = ki.put(newentry);
if (oldentry == null) return -1; if (oldentry == null) return -1;
return (int) oldentry.getColLongB256(1); return (int) oldentry.getColLongB256(1);
} }
public void addi(byte[] key, int i) {
kelondroRow.Entry indexentry = rowdef.newEntry();
indexentry.setCol(0, key);
indexentry.setColLongB256(1, i);
add(indexentry);
}
public int removei(byte[] key) { public int removei(byte[] key) throws IOException {
if (size() == 0) { if (ki.size() == 0) return -1;
if (System.currentTimeMillis() - this.lastTimeWrote > 10000) this.trim(); kelondroRow.Entry indexentry = ki.remove(key);
return -1;
}
kelondroRow.Entry indexentry = removeMarked(key);
if (indexentry == null) return -1; if (indexentry == null) return -1;
return (int) indexentry.getColLongB256(1); return (int) indexentry.getColLongB256(1);
} }
public int size() throws IOException {
return ki.size();
}
} }

@ -33,24 +33,58 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
private kelondroBytesIntMap index; private kelondroBytesIntMap index;
public kelondroFlexTable(File path, String tablename, kelondroRow rowdef, boolean exitOnFail) throws IOException { public kelondroFlexTable(File path, String tablename, long buffersize, kelondroRow rowdef, boolean exitOnFail) throws IOException {
super(path, tablename, rowdef, exitOnFail); super(path, tablename, rowdef, exitOnFail);
File newpath = new File(path, tablename + ".table");
// fill the index File indexfile = new File(newpath, "col.000.index");
this.index = new kelondroBytesIntMap(super.row().width(0), 0); kelondroIndex ki = null;
/* String description = new String(this.col[0].getDescription());
kelondroFixedWidthArray indexArray = new kelondroFixedWidthArray(new File(path, colfilename(0,0))); System.out.println("*** Last Startup time: " + description.substring(4));
for (int i = 0; i < indexArray.size(); i++) index.put(indexArray.get(i).getColBytes(0), new Integer(i)); long start = System.currentTimeMillis();
indexArray.close();
*/ if (indexfile.exists()) {
System.out.print("*** Loading " + path); // use existing index file
System.out.println("*** Using File index " + indexfile);
ki = new kelondroTree(indexfile, buffersize, 10);
} else if (size() > 100000) {
// generate new index file
System.out.print("*** Genrating File index for " + size() + " entries from " + indexfile);
ki = initializeTreeIndex(indexfile, buffersize);
System.out.println(" -done-");
System.out.println(ki.size()
+ " entries indexed from "
+ super.col[0].size() + " keys.");
} else {
// fill the index
System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath);
ki = initializeRamIndex();
System.out.println(" -done-");
System.out.println(ki.size()
+ " index entries initialized and sorted from "
+ super.col[0].size() + " keys.");
}
// assign index to wrapper
index = new kelondroBytesIntMap(ki);
description = "stt=" + Long.toString(System.currentTimeMillis() - start) + ";";
super.col[0].setDescription(description.getBytes());
}
private kelondroIndex initializeRamIndex() throws IOException {
kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new int[]{super.row().width(0), 4}), 0);
ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
Iterator content = super.col[0].contentNodes(); Iterator content = super.col[0].contentNodes();
kelondroRecords.Node node; kelondroRecords.Node node;
kelondroRow.Entry indexentry;
int i; int i;
while (content.hasNext()) { while (content.hasNext()) {
node = (kelondroRecords.Node) content.next(); node = (kelondroRecords.Node) content.next();
i = node.handle().hashCode(); i = node.handle().hashCode();
index.addi(node.getValueRow(), i); indexentry = ri.rowdef.newEntry();
indexentry.setCol(0, node.getValueRow());
indexentry.setColLongB256(1, i);
ri.add(indexentry);
if ((i % 10000) == 0) { if ((i % 10000) == 0) {
System.out.print('.'); System.out.print('.');
System.out.flush(); System.out.flush();
@ -58,33 +92,36 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
} }
System.out.print(" -ordering- "); System.out.print(" -ordering- ");
System.out.flush(); System.out.flush();
this.index.setOrdering(kelondroNaturalOrder.naturalOrder, 0); ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
index.shape(); ri.shape();
System.out.println(" -done-"); return ri;
System.out.println(index.size() + " index entries initialized and sorted from " + super.col[0].size() + " keys.");
} }
/* private kelondroIndex initializeTreeIndex(File indexfile, long buffersize) throws IOException {
private final static byte[] read(File source) throws IOException { kelondroTree index = new kelondroTree(indexfile, buffersize, 10, rowdef.width(0), 4, true);
byte[] buffer = new byte[(int) source.length()]; Iterator content = super.col[0].contentNodes();
InputStream fis = null; kelondroRecords.Node node;
try { kelondroRow.Entry indexentry;
fis = new FileInputStream(source); int i;
int p = 0, c; while (content.hasNext()) {
while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c; node = (kelondroRecords.Node) content.next();
} finally { i = node.handle().hashCode();
if (fis != null) try { fis.close(); } catch (Exception e) {} indexentry = index.row().newEntry();
indexentry.setCol(0, node.getValueRow());
indexentry.setColLongB256(1, i);
index.put(indexentry);
if ((i % 10000) == 0) {
System.out.print('.');
System.out.flush();
}
} }
return buffer; return index;
} }
*/
public synchronized kelondroRow.Entry get(byte[] key) throws IOException { public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
synchronized (index) { synchronized (index) {
int i = index.geti(key); int i = index.geti(key);
if (i >= this.size()) { if (i >= this.size()) System.out.println("error");
System.out.println("errror");
}
if (i < 0) return null; if (i < 0) return null;
return super.get(i); return super.get(i);
} }

@ -24,7 +24,7 @@
package de.anomic.kelondro; package de.anomic.kelondro;
import java.util.Random; //import java.util.Random;
public class kelondroIntBytesMap extends kelondroRowBufferedSet { public class kelondroIntBytesMap extends kelondroRowBufferedSet {
@ -70,7 +70,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet {
public static void main(String[] args) { public static void main(String[] args) {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
kelondroIntBytesMap c = new kelondroIntBytesMap(30, 0); kelondroIntBytesMap c = new kelondroIntBytesMap(30, 0);
Random random = new Random(0); //Random random = new Random(0);
int x; int x;
for (int i = 0; i < 100000; i++) { for (int i = 0; i < 100000; i++) {
//x = random.nextInt(100000); //x = random.nextInt(100000);

@ -462,7 +462,7 @@ public class kelondroRecords {
public String cacheNodeStatusString() { public String cacheNodeStatusString() {
return return
"cacheMaxSize=" + cacheSize + "cacheMaxSize=" + cacheSize +
", cacheCurrSize=" + cacheHeaders.size() + ", cacheCurrSize=" + ((cacheHeaders == null) ? 0 : cacheHeaders.size()) +
", readHit=" + readHit + ", readHit=" + readHit +
", readMiss=" + readMiss + ", readMiss=" + readMiss +
", writeUnique=" + writeUnique + ", writeUnique=" + writeUnique +

@ -62,6 +62,10 @@ public class kelondroRowCollection {
this.lastTimeWrote = System.currentTimeMillis(); this.lastTimeWrote = System.currentTimeMillis();
} }
public kelondroRow row() {
return this.rowdef;
}
private final void ensureSize(int elements) { private final void ensureSize(int elements) {
int needed = elements * rowdef.objectsize(); int needed = elements * rowdef.objectsize();
if (chunkcache.length >= needed) return; if (chunkcache.length >= needed) return;

@ -28,7 +28,7 @@ import java.util.TreeSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Random; import java.util.Random;
public class kelondroRowSet extends kelondroRowCollection { public class kelondroRowSet extends kelondroRowCollection implements kelondroIndex {
private static final int collectionReSortLimit = 90; private static final int collectionReSortLimit = 90;
private static final int removeMaxSize = 100; private static final int removeMaxSize = 100;
@ -87,6 +87,10 @@ public class kelondroRowSet extends kelondroRowCollection {
return super.size() - removeMarker.size(); return super.size() - removeMarker.size();
} }
public kelondroRow.Entry remove(byte[] a) {
return removeMarked(a);
}
public kelondroRow.Entry removeMarked(byte[] a) { public kelondroRow.Entry removeMarked(byte[] a) {
return removeMarked(a, 0, a.length); return removeMarked(a, 0, a.length);
} }

Loading…
Cancel
Save