From 1cd711d0054dcd886a5996c84bd21a6f15dacc72 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 24 Feb 2012 01:07:15 +0100 Subject: [PATCH] added classes for citation references (for new citation ranking) --- .../data/citation/CitationReference.java | 176 ++++++++++++++++++ .../citation/CitationReferenceFactory.java | 44 +++++ 2 files changed, 220 insertions(+) create mode 100644 source/net/yacy/kelondro/data/citation/CitationReference.java create mode 100644 source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java diff --git a/source/net/yacy/kelondro/data/citation/CitationReference.java b/source/net/yacy/kelondro/data/citation/CitationReference.java new file mode 100644 index 000000000..ce1fb73ce --- /dev/null +++ b/source/net/yacy/kelondro/data/citation/CitationReference.java @@ -0,0 +1,176 @@ +/** + * CitationReferenceRow + * Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 13.02.2012 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.kelondro.data.citation; + +import java.util.Collection; + +import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.UTF8; +import net.yacy.kelondro.data.word.Word; +import net.yacy.kelondro.index.Column; +import net.yacy.kelondro.index.Row; +import net.yacy.kelondro.index.Row.Entry; +import net.yacy.kelondro.order.Base64Order; +import net.yacy.kelondro.order.MicroDate; +import net.yacy.kelondro.rwi.Reference; +import net.yacy.kelondro.util.ByteArray; + +public class CitationReference implements Reference /*, Cloneable*/ { + + // this object stores citation attributes to URL references + + public static final Row citationRow = new Row(new Column[]{ + new Column("h", Column.celltype_string, Column.encoder_bytes, Word.commonHashLength, "urlhash"), + new Column("m", Column.celltype_cardinal, Column.encoder_b256, 2, "lastModified"), + new Column("r", Column.celltype_cardinal, Column.encoder_b256, 2, "reserve") + }, + Base64Order.enhancedCoder + ); + + // static properties + private static final int col_urlhash = 0; // h 12 the url hash b64-encoded + private static final int col_lastModified = 1; // a 2 last-modified time of the document where url appears + private static final int col_reserve = 2; // k 2 reserve2 + + private final Row.Entry entry; + + public CitationReference( + final byte[] urlHash, + final long lastmodified // last-modified time of the document where word appears + ) { + assert (urlHash.length == 12) : "urlhash = " + ASCII.String(urlHash); + this.entry = citationRow.newEntry(); + final int mddlm = MicroDate.microDateDays(lastmodified); + this.entry.setCol(col_urlhash, urlHash); + this.entry.setCol(col_lastModified, mddlm >> 2); + this.entry.setCol(col_reserve, 0); + } + + public CitationReference(final String urlHash, final String code) { + // the code is the external form of the row minus the leading urlHash entry + this.entry = citationRow.newEntry(UTF8.getBytes((urlHash + code))); + } + + public CitationReference(final String external) { + this.entry = citationRow.newEntry(external, true); + } + + public CitationReference(final byte[] row) { + this.entry = citationRow.newEntry(row); + } + + public CitationReference(final byte[] row, final int offset, final boolean clone) { + this.entry = citationRow.newEntry(row, offset, clone); + } + + public CitationReference(final Row.Entry rentry) { + // FIXME: see if cloning is necessary + this.entry = rentry; + } + + @Override + public CitationReference clone() { + final byte[] b = new byte[citationRow.objectsize]; + System.arraycopy(this.entry.bytes(), 0, b, 0, citationRow.objectsize); + return new CitationReference(b); + } + + @Override + public String toPropertyForm() { + return this.entry.toPropertyForm('=', true, true, false, false); + } + + @Override + public Entry toKelondroEntry() { + return this.entry; + } + + @Override + public byte[] urlhash() { + return this.entry.getColBytes(col_urlhash, true); + } + + public int virtualAge() { + return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format + } + + @Override + public long lastModified() { + return MicroDate.reverseMicroDateDays(((int) this.entry.getColLong(col_lastModified)) << 2); + } + + + @Override + public String toString() { + return toPropertyForm(); + } + + @Override + public boolean isOlder(final Reference other) { + if (other == null) return false; + if (this.lastModified() < other.lastModified()) return true; + return false; + } + + @Override + public int hashCode() { + return ByteArray.hashCode(this.urlhash()); + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (!(obj instanceof CitationReference)) return false; + CitationReference other = (CitationReference) obj; + return Base64Order.enhancedCoder.equal(this.urlhash(), other.urlhash()); + } + + @Override + public int distance() { + throw new UnsupportedOperationException(); + } + + @Override + public void join(Reference oe) { + throw new UnsupportedOperationException(); + } + + @Override + public int maxposition() { + throw new UnsupportedOperationException(); + } + + @Override + public int minposition() { + throw new UnsupportedOperationException(); + } + + public int position(int p) { + throw new UnsupportedOperationException(); + } + + @Override + public Collection positions() { + throw new UnsupportedOperationException(); + } + +} diff --git a/source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java b/source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java new file mode 100644 index 000000000..4fe251ac6 --- /dev/null +++ b/source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java @@ -0,0 +1,44 @@ +/** + * CitationReferenceFactory + * Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 13.02.2012 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.kelondro.data.citation; + +import net.yacy.kelondro.index.Row; +import net.yacy.kelondro.index.Row.Entry; +import net.yacy.kelondro.rwi.ReferenceFactory; + +public class CitationReferenceFactory implements ReferenceFactory { + + @Override + public CitationReference produceSlow(final Entry e) { + return new CitationReference(e); + } + + @Override + public CitationReference produceFast(final CitationReference r) { + throw new UnsupportedOperationException(); + } + + @Override + public Row getRow() { + return CitationReference.citationRow; + } + +}