From 90b940e90ec4b423071aad76f23a7fd2b0656c0b Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 20 Jan 2006 00:26:55 +0000 Subject: [PATCH] fixed position storage problem. Now the word position is properly stored. No use of that now, but can be used for better ranking. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1378 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../anomic/plasma/plasmaWordIndexAssortment.java | 2 +- source/de/anomic/plasma/plasmaWordIndexCache.java | 2 +- source/de/anomic/plasma/plasmaWordIndexEntity.java | 14 +++++++------- source/de/anomic/plasma/plasmaWordIndexEntry.java | 10 +++++----- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index 8001f1e9c..3b17634c3 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -138,7 +138,7 @@ public final class plasmaWordIndexAssortment { for (int i = 0; i < assortmentLength; i++) { entry = (plasmaWordIndexEntry) entries.next(); row[3 + 2 * i] = entry.getUrlHash().getBytes(); - row[4 + 2 * i] = entry.toEncodedForm(1).getBytes(); + row[4 + 2 * i] = entry.toEncodedForm().getBytes(); } byte[][] oldrow = null; try { diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index 35ac45f0c..a0e032519 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -153,7 +153,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { row[1] = kelondroRecords.long2bytes(container.size(), 4); row[2] = kelondroRecords.long2bytes(updateTime, 8); row[3] = wordEntry.getUrlHash().getBytes(); - row[4] = wordEntry.toEncodedForm(1).getBytes(); + row[4] = wordEntry.toEncodedForm().getBytes(); dumpArray.set((int) urlcount++, row); } } diff --git a/source/de/anomic/plasma/plasmaWordIndexEntity.java b/source/de/anomic/plasma/plasmaWordIndexEntity.java index 769351237..b339f0910 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntity.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java @@ -95,10 +95,10 @@ public final class plasmaWordIndexEntity { kt = new kelondroTree(theLocation, cacheSize); } catch (IOException e) { theLocation.delete(); - kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort, false); + kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceLong, false); } else { // create new index file - kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort, false); + kt = new kelondroTree(theLocation, cacheSize, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceLong, false); } return kt; // everyone who get this should close it when finished! } @@ -167,11 +167,11 @@ public final class plasmaWordIndexEntity { public boolean addEntry(plasmaWordIndexEntry entry) throws IOException { if (entry == null) return false; - if (theTmpMap == null) { - return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(0).getBytes()) == null); - } else { - return (theTmpMap.put(entry.getUrlHash(), entry) == null); - } + if (theTmpMap == null) { + return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm().getBytes()) == null); + } else { + return (theTmpMap.put(entry.getUrlHash(), entry) == null); + } } public int addEntries(plasmaWordIndexEntryContainer container) throws IOException { diff --git a/source/de/anomic/plasma/plasmaWordIndexEntry.java b/source/de/anomic/plasma/plasmaWordIndexEntry.java index 96bf0d2bc..32a9840e2 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntry.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntry.java @@ -67,7 +67,7 @@ public final class plasmaWordIndexEntry { public static final int urlHashLength = yacySeedDB.commonHashLength; // 12 // the size of the index entry attributes - public static final int attrSpaceShort = 12; + //public static final int attrSpaceShort = 12; public static final int attrSpaceLong = 18; // the associated hash @@ -257,10 +257,10 @@ public final class plasmaWordIndexEntry { this.localflag = pr.getProperty("f", ""+LT_LOCAL).charAt(0); } - public String toEncodedForm(int outputFormat) { - // attention: this integrates NOT the URL into the encoding + public String toEncodedForm() { + // attention: this integrates NOT the URL hash into the encoding // if you need a complete dump, use toExternalForm() - StringBuffer buf = new StringBuffer((outputFormat >= 1) ? 18 : 12); + StringBuffer buf = new StringBuffer(attrSpaceLong); buf.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.quality, plasmaURL.urlQualityLength)) .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(plasmaWordIndex.microDateDays(this.lastModified), 3)) @@ -269,7 +269,7 @@ public final class plasmaWordIndexEntry { .append(this.doctype) .append(this.localflag); // 3 + 3 + 2 + 2 + 1 + 1 = 12 bytes - if (outputFormat >= 1) + buf.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posintext, 2)) .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posinphrase, 2)) .append(kelondroBase64Order.enhancedCoder.encodeLongSmart(this.posofphrase, 2));