From 6af70febefece33b8d6ccb3997cdea4cb2e9965e Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 30 Jun 2006 12:54:19 +0000 Subject: [PATCH] - added kelondroTree index option to kelondroFlexTable - automatic generation of index file when index is too large for RAM git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2261 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/dbtest.java | 2 +- .../anomic/kelondro/kelondroBytesIntMap.java | 47 +++++---- .../de/anomic/kelondro/kelondroFlexTable.java | 97 +++++++++++++------ .../anomic/kelondro/kelondroIntBytesMap.java | 4 +- .../de/anomic/kelondro/kelondroRecords.java | 2 +- .../kelondro/kelondroRowCollection.java | 4 + source/de/anomic/kelondro/kelondroRowSet.java | 6 +- 7 files changed, 102 insertions(+), 60 deletions(-) diff --git a/source/dbtest.java b/source/dbtest.java index 2c89fa74d..8662e3a8b 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -187,7 +187,7 @@ public class dbtest { } if (dbe.equals("kelondroFlexTable")) { File tablepath = new File(tablename).getParentFile(); - table = new kelondroFlexTable(tablepath, new File(tablename).getName(), testRow, true); + table = new kelondroFlexTable(tablepath, new File(tablename).getName(), buffer, testRow, true); } if (dbe.equals("mysql")) { table = new dbTable("mysql", testRow); diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index c9579c5c0..c68cb941c 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -24,45 +24,42 @@ package de.anomic.kelondro; -public class kelondroBytesIntMap extends kelondroRowBufferedSet { +import java.io.IOException; + +public class kelondroBytesIntMap { + + private kelondroIndex ki; - public kelondroBytesIntMap(int keySize, int initSize) { - super(new kelondroRow(new int[]{keySize, 4}), initSize); - - // initialize ordering - super.setOrdering(kelondroNaturalOrder.naturalOrder, 0); + public kelondroBytesIntMap(kelondroIndex ki) throws IOException { + assert (ki.row().columns() == 2); // must be a key/index relation + assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long + this.ki = ki; } - - public int geti(byte[] key) { - kelondroRow.Entry indexentry = super.get(key); + + public int geti(byte[] key) throws IOException { + kelondroRow.Entry indexentry = ki.get(key); if (indexentry == null) return -1; return (int) indexentry.getColLongB256(1); } - public int puti(byte[] key, int i) { - kelondroRow.Entry newentry = rowdef.newEntry(); + public int puti(byte[] key, int i) throws IOException { + kelondroRow.Entry newentry = ki.row().newEntry(); newentry.setCol(0, key); newentry.setColLongB256(1, i); - kelondroRow.Entry oldentry = super.put(newentry); + kelondroRow.Entry oldentry = ki.put(newentry); if (oldentry == null) return -1; return (int) oldentry.getColLongB256(1); } - - public void addi(byte[] key, int i) { - kelondroRow.Entry indexentry = rowdef.newEntry(); - indexentry.setCol(0, key); - indexentry.setColLongB256(1, i); - add(indexentry); - } - public int removei(byte[] key) { - if (size() == 0) { - if (System.currentTimeMillis() - this.lastTimeWrote > 10000) this.trim(); - return -1; - } - kelondroRow.Entry indexentry = removeMarked(key); + public int removei(byte[] key) throws IOException { + if (ki.size() == 0) return -1; + kelondroRow.Entry indexentry = ki.remove(key); if (indexentry == null) return -1; return (int) indexentry.getColLongB256(1); } + public int size() throws IOException { + return ki.size(); + } + } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 2eb31bbbe..f9c999126 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -33,24 +33,58 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr private kelondroBytesIntMap index; - public kelondroFlexTable(File path, String tablename, kelondroRow rowdef, boolean exitOnFail) throws IOException { + public kelondroFlexTable(File path, String tablename, long buffersize, kelondroRow rowdef, boolean exitOnFail) throws IOException { super(path, tablename, rowdef, exitOnFail); - - // fill the index - this.index = new kelondroBytesIntMap(super.row().width(0), 0); - /* - kelondroFixedWidthArray indexArray = new kelondroFixedWidthArray(new File(path, colfilename(0,0))); - for (int i = 0; i < indexArray.size(); i++) index.put(indexArray.get(i).getColBytes(0), new Integer(i)); - indexArray.close(); - */ - System.out.print("*** Loading " + path); + File newpath = new File(path, tablename + ".table"); + File indexfile = new File(newpath, "col.000.index"); + kelondroIndex ki = null; + String description = new String(this.col[0].getDescription()); + System.out.println("*** Last Startup time: " + description.substring(4)); + long start = System.currentTimeMillis(); + + if (indexfile.exists()) { + // use existing index file + System.out.println("*** Using File index " + indexfile); + ki = new kelondroTree(indexfile, buffersize, 10); + } else if (size() > 100000) { + // generate new index file + System.out.print("*** Genrating File index for " + size() + " entries from " + indexfile); + ki = initializeTreeIndex(indexfile, buffersize); + + System.out.println(" -done-"); + System.out.println(ki.size() + + " entries indexed from " + + super.col[0].size() + " keys."); + } else { + // fill the index + System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath); + ki = initializeRamIndex(); + + System.out.println(" -done-"); + System.out.println(ki.size() + + " index entries initialized and sorted from " + + super.col[0].size() + " keys."); + } + // assign index to wrapper + index = new kelondroBytesIntMap(ki); + description = "stt=" + Long.toString(System.currentTimeMillis() - start) + ";"; + super.col[0].setDescription(description.getBytes()); + } + + private kelondroIndex initializeRamIndex() throws IOException { + kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new int[]{super.row().width(0), 4}), 0); + ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0); Iterator content = super.col[0].contentNodes(); kelondroRecords.Node node; + kelondroRow.Entry indexentry; int i; while (content.hasNext()) { node = (kelondroRecords.Node) content.next(); i = node.handle().hashCode(); - index.addi(node.getValueRow(), i); + indexentry = ri.rowdef.newEntry(); + indexentry.setCol(0, node.getValueRow()); + indexentry.setColLongB256(1, i); + ri.add(indexentry); if ((i % 10000) == 0) { System.out.print('.'); System.out.flush(); @@ -58,33 +92,36 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } System.out.print(" -ordering- "); System.out.flush(); - this.index.setOrdering(kelondroNaturalOrder.naturalOrder, 0); - index.shape(); - System.out.println(" -done-"); - System.out.println(index.size() + " index entries initialized and sorted from " + super.col[0].size() + " keys."); + ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0); + ri.shape(); + return ri; } - /* - private final static byte[] read(File source) throws IOException { - byte[] buffer = new byte[(int) source.length()]; - InputStream fis = null; - try { - fis = new FileInputStream(source); - int p = 0, c; - while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c; - } finally { - if (fis != null) try { fis.close(); } catch (Exception e) {} + private kelondroIndex initializeTreeIndex(File indexfile, long buffersize) throws IOException { + kelondroTree index = new kelondroTree(indexfile, buffersize, 10, rowdef.width(0), 4, true); + Iterator content = super.col[0].contentNodes(); + kelondroRecords.Node node; + kelondroRow.Entry indexentry; + int i; + while (content.hasNext()) { + node = (kelondroRecords.Node) content.next(); + i = node.handle().hashCode(); + indexentry = index.row().newEntry(); + indexentry.setCol(0, node.getValueRow()); + indexentry.setColLongB256(1, i); + index.put(indexentry); + if ((i % 10000) == 0) { + System.out.print('.'); + System.out.flush(); + } } - return buffer; + return index; } - */ public synchronized kelondroRow.Entry get(byte[] key) throws IOException { synchronized (index) { int i = index.geti(key); - if (i >= this.size()) { - System.out.println("errror"); - } + if (i >= this.size()) System.out.println("error"); if (i < 0) return null; return super.get(i); } diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index 99a1ebaf1..ace851b8f 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -24,7 +24,7 @@ package de.anomic.kelondro; -import java.util.Random; +//import java.util.Random; public class kelondroIntBytesMap extends kelondroRowBufferedSet { @@ -70,7 +70,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet { public static void main(String[] args) { long start = System.currentTimeMillis(); kelondroIntBytesMap c = new kelondroIntBytesMap(30, 0); - Random random = new Random(0); + //Random random = new Random(0); int x; for (int i = 0; i < 100000; i++) { //x = random.nextInt(100000); diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index d4bb4e47b..4269a745e 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -462,7 +462,7 @@ public class kelondroRecords { public String cacheNodeStatusString() { return "cacheMaxSize=" + cacheSize + - ", cacheCurrSize=" + cacheHeaders.size() + + ", cacheCurrSize=" + ((cacheHeaders == null) ? 0 : cacheHeaders.size()) + ", readHit=" + readHit + ", readMiss=" + readMiss + ", writeUnique=" + writeUnique + diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index e3e610766..2b9c88c44 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -62,6 +62,10 @@ public class kelondroRowCollection { this.lastTimeWrote = System.currentTimeMillis(); } + public kelondroRow row() { + return this.rowdef; + } + private final void ensureSize(int elements) { int needed = elements * rowdef.objectsize(); if (chunkcache.length >= needed) return; diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index d188ea95d..bf0a2e13b 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -28,7 +28,7 @@ import java.util.TreeSet; import java.util.Iterator; import java.util.Random; -public class kelondroRowSet extends kelondroRowCollection { +public class kelondroRowSet extends kelondroRowCollection implements kelondroIndex { private static final int collectionReSortLimit = 90; private static final int removeMaxSize = 100; @@ -87,6 +87,10 @@ public class kelondroRowSet extends kelondroRowCollection { return super.size() - removeMarker.size(); } + public kelondroRow.Entry remove(byte[] a) { + return removeMarked(a); + } + public kelondroRow.Entry removeMarked(byte[] a) { return removeMarked(a, 0, a.length); }