From dc778659fb5e96d87f6e3eafff05e01fbb39322c Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 5 Dec 2005 15:48:45 +0000 Subject: [PATCH] fixed problem with time-out during result joint which caused OR behavior instead of AND beahvior git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1167 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaSearchQuery.java | 3 ++- source/de/anomic/plasma/plasmaWordIndexEntity.java | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index 1aca17d61..0652f158f 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -128,8 +128,9 @@ public final class plasmaSearchQuery { } // the string is clean now, but we must generate a set out of it - final String[] a = words.split(" "); final TreeSet query = new TreeSet(kelondroMSetTools.fastStringComparator); + if (words.length() == 0) return query; // split returns always one element + final String[] a = words.split(" "); for (int i = 0; i < a.length; i++) { query.add(a[i]); } return query; } diff --git a/source/de/anomic/plasma/plasmaWordIndexEntity.java b/source/de/anomic/plasma/plasmaWordIndexEntity.java index c3f614451..c6508bb6b 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntity.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java @@ -313,6 +313,9 @@ public final class plasmaWordIndexEntity { public static plasmaWordIndexEntity joinEntities(Set entities, long time) throws IOException { + // big problem here: there cannot be a time-out for join, since a time-out will leave the joined set too big. + // this will result in a OR behavior of the search instead of an AND behavior + long stamp = System.currentTimeMillis(); // order entities by their size @@ -339,12 +342,12 @@ public final class plasmaWordIndexEntity { // we now must pairwise build up a conjunction of these sets Long k = (Long) map.firstKey(); // the smallest, which means, the one with the least entries plasmaWordIndexEntity searchA, searchB, searchResult = (plasmaWordIndexEntity) map.remove(k); - while ((map.size() > 0) && (searchResult.size() > 0) && (time > 0)) { + while ((map.size() > 0) && (searchResult.size() > 0)) { // take the first element of map which is a result and combine it with result k = (Long) map.firstKey(); // the next smallest... time -= (System.currentTimeMillis() - stamp); stamp = System.currentTimeMillis(); - searchA = searchResult; - searchB = (plasmaWordIndexEntity) map.remove(k); + searchA = searchResult; + searchB = (plasmaWordIndexEntity) map.remove(k); searchResult = plasmaWordIndexEntity.joinConstructive(searchA, searchB, 2 * time / (map.size() + 1)); // close the input files/structures if (searchA != searchResult) searchA.close();