From 20a1b29ed397b0d46da898361ff381a68b0d7476 Mon Sep 17 00:00:00 2001 From: reger Date: Wed, 26 Oct 2016 01:38:40 +0200 Subject: [PATCH] add simple test case for ReferenceContainer helpful for debugging calculated ranking parameter --- .../kelondro/data/word/WordReferenceRow.java | 8 +- .../kelondro/rwi/ReferenceContainerTest.java | 100 ++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index a6e3f5e00..e80f2a9a7 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -49,7 +49,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef public static final Row urlEntryRow = new Row(new Column[]{ new Column("h", Column.celltype_string, Column.encoder_bytes, Word.commonHashLength, "urlhash"), new Column("a", Column.celltype_cardinal, Column.encoder_b256, 2, "lastModified"), - new Column("s", Column.celltype_cardinal, Column.encoder_b256, 2, "freshUntil"), + new Column("s", Column.celltype_cardinal, Column.encoder_b256, 2, "freshUntil"), // TODO: unused (since 2009) new Column("u", Column.celltype_cardinal, Column.encoder_b256, 1, "wordsInTitle"), new Column("w", Column.celltype_cardinal, Column.encoder_b256, 2, "wordsInText"), new Column("p", Column.celltype_cardinal, Column.encoder_b256, 2, "phrasesInText"), @@ -247,11 +247,17 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format } + /** + * @return date recalculated from MicroDateDays (accuracy = 1 Day, time always 0:00) + */ @Override public long lastModified() { return MicroDate.reverseMicroDateDays(this.entry.getColLong(col_lastModified)); } + /** + * @return occurences of word in text (in the rang 0..255) + */ @Override public int hitcount() { return (0xff & this.entry.getColByte(col_hitcount)); diff --git a/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java b/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java new file mode 100644 index 000000000..995fd016f --- /dev/null +++ b/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java @@ -0,0 +1,100 @@ +/** + * ReferenceContainerTest + * part of YaCy + * Copyright 2016 by reger24; https://github.com/reger24 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ +package net.yacy.kelondro.rwi; + +import java.util.Queue; +import java.util.concurrent.LinkedBlockingQueue; +import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; +import net.yacy.crawler.retrieval.Response; +import net.yacy.kelondro.data.word.Word; +import net.yacy.kelondro.data.word.WordReference; +import net.yacy.kelondro.data.word.WordReferenceFactory; +import net.yacy.kelondro.data.word.WordReferenceVars; +import net.yacy.kelondro.util.Bitfield; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import org.junit.Test; + +/** + * Unit tests for ReferenceContainer class. + * + * @author reger24 + */ +public class ReferenceContainerTest { + + /** + * Test of add method, of class ReferenceContainer. this also demonstrates a + * issue with word.distance() used in ranking + */ + @Test + public void testAdd() throws Exception { + ReferenceFactory wordReferenceFactory = new WordReferenceFactory(); + byte[] termHash = Word.word2hash("test"); + + ReferenceContainer rc = new ReferenceContainer(wordReferenceFactory, termHash); + + // prepare a WordReference to be added to the container + DigestURL url = new DigestURL("http://test.org/test.html"); + int urlComps = MultiProtocolURL.urlComps(url.toNormalform(true)).length; + int urlLength = url.toNormalform(true).length(); + + Queue positions = new LinkedBlockingQueue(); + positions.add(10); + + WordReferenceVars wentry = new WordReferenceVars( + url.hash(), + urlLength, // byte-length of complete URL + urlComps, // number of path components + 0, // length of description/length (longer are better?) + 1, // how often appears this word in the text + 1, // total number of words + 1, // total number of phrases + 1, // first position of word in text + positions, // positions of words that are joined into the reference + 1, // position of word in its phrase + 1, // number of the phrase where word appears + 0, // last-modified time of the document where word appears + "en", // (guessed) language of document + Response.DT_TEXT, // type of document + 0, // outlinks to same domain + 0, // outlinks to other domain + new Bitfield(4), // attributes to the url and to the word according the url + 0.0d + ); + + rc.add(wentry); // add the ref + + assertTrue("size after add", rc.size() > 0); + + WordReference wc = rc.getReference(url.hash()); // retrieve the ref + + assertNotNull("getReference failed", wc); + + // TODO: ReferenceContainer used for rwi results. As it distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix + System.out.println("-----------------------------------------------------------"); + System.out.println("WordReference (word distance) before add to container: " + wentry.distance()); + System.out.println("WordReference (word distance) after get from container: " + wc.distance()); + System.out.println("-----------------------------------------------------------"); + assertEquals("distance()", wentry.distance(), wc.distance()); + } + +}