diff --git a/htroot/Vocabulary_p.html b/htroot/Vocabulary_p.html index 09ce3884c..c3bbac0e9 100644 --- a/htroot/Vocabulary_p.html +++ b/htroot/Vocabulary_p.html @@ -68,28 +68,31 @@ Delete Literal Synonyms + Object Link #{terms}# #(editable)# ::#(/editable)# #(editable)# ::#(/editable)# #[term]# - #(editable)##[synonyms]#::#(/editable)# + #(editable)##[synonyms]#::#(/editable)# + #(editable)##[objectlink]#::#(/editable)# #{/terms}# #(editable)#:: add - + + - clear table (remove all terms) + clear table (remove all terms) - delete vocabulary + delete vocabulary #(/editable)# diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index c88775156..b43a7249a 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -30,6 +30,7 @@ import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.lod.vocabulary.DCTerms; import net.yacy.cora.lod.vocabulary.Owl; import net.yacy.cora.lod.vocabulary.Tagging; +import net.yacy.cora.lod.vocabulary.Tagging.SOTuple; import net.yacy.cora.lod.vocabulary.YaCyMetadata; import net.yacy.cora.protocol.RequestHeader; import net.yacy.document.LibraryProvider; @@ -50,6 +51,7 @@ public class Vocabulary_p { String vocabularyName = (post == null) ? null : post.get("vocabulary", null); Tagging vocabulary = vocabularyName == null ? null : LibraryProvider.autotagging.getVocabulary(vocabularyName); + if (vocabulary == null) vocabularyName = null; int count = 0; for (Tagging v: vocs) { prop.put("vocabularyset_" + count + "_name", v.getName()); @@ -70,11 +72,12 @@ public class Vocabulary_p { String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); Segment segment = sb.indexSegments.segment(segmentName); Iterator ui = segment.urlSelector(discoveruri); - Map table = new TreeMap(); + Map table = new TreeMap(); File propFile = LibraryProvider.autotagging.getVocabularyFile(discovername); while (ui.hasNext()) { DigestURI u = ui.next(); - String t = u.toNormalform(false, false).substring(discoverobjectspace.length()); + String u0 = u.toNormalform(false, false); + String t = u0.substring(discoverobjectspace.length()); if (t.indexOf('/') >= 0) continue; int p = t.indexOf('.'); if (p >= 0) t = t.substring(0, p); @@ -82,7 +85,7 @@ public class Vocabulary_p { while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1); if (p >= 0) t = t.substring(p + 1); if (t.length() == 0) continue; - table.put(t, ""); + table.put(t, new Tagging.SOTuple("", u0)); } if (table.size() > 0) { Tagging newvoc = new Tagging(discovername, propFile, discoverobjectspace, table); @@ -96,7 +99,7 @@ public class Vocabulary_p { // check if a term was added if (post.get("add_new", "").equals("checked") && post.get("newterm", "").length() > 0) { - vocabulary.put(post.get("newterm", ""), post.get("newsynonyms", "")); + vocabulary.put(post.get("newterm", ""), post.get("newsynonyms", ""), post.get("newobjectlink", "")); } // check if a term was modified @@ -104,7 +107,8 @@ public class Vocabulary_p { if (e.getKey().startsWith("modify_") && e.getValue().equals("checked")) { String term = e.getKey().substring(7); String synonyms = post.get("synonyms_" + term, ""); - vocabulary.put(term, synonyms); + String objectlink = post.get("objectlink_" + term, ""); + vocabulary.put(term, synonyms, objectlink); } } @@ -150,15 +154,16 @@ public class Vocabulary_p { prop.putHTML("edit_editable_objectspacepredicate", DCTerms.references.getPredicate()); prop.putHTML("edit_triple1", "<" + yacyurl + "> <" + vocabulary.getPredicate() + "> \"[discovered-tags-commaseparated]\""); prop.putHTML("edit_triple2", "<" + yacyurl + "> <" + Owl.SameAs.getPredicate() + "> <[document-url]>"); - prop.putHTML("edit_tripleN", vocabulary.getObjectspace() == null ? "none - missing objectspace" : "<" + yacyurl + "> <" + DCTerms.references.getPredicate() + "> \"" + vocabulary.getObjectspace() + "[discovered-tag]\""); + prop.putHTML("edit_tripleN", vocabulary.getObjectspace() == null ? "none - missing objectspace" : "<" + yacyurl + "> <" + DCTerms.references.getPredicate() + "> \"[reference-link]#[tag]\" ."); int c = 0; boolean dark = false; - for (Map.Entry entry: vocabulary.list().entrySet()) { + for (Map.Entry entry: vocabulary.list().entrySet()) { prop.put("edit_terms_" + c + "_editable", editable ? 1 : 0); prop.put("edit_terms_" + c + "_dark", dark ? 1 : 0); dark = !dark; prop.putHTML("edit_terms_" + c + "_term", entry.getKey()); prop.putHTML("edit_terms_" + c + "_editable_term", entry.getKey()); - prop.putHTML("edit_terms_" + c + "_editable_synonyms", entry.getValue()); + prop.putHTML("edit_terms_" + c + "_editable_synonyms", entry.getValue().getSynonymsCSV()); + prop.putHTML("edit_terms_" + c + "_editable_objectlink", entry.getValue().getObjectlink()); c++; } prop.put("edit_terms", c); diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index b23b0f53a..5f42af68a 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -46,6 +46,7 @@ public class Tagging { private final String navigatorName; private final Map synonym2term; private final Map term2synonym; + private final Map term2objectlink; private final Map> synonym2synonyms; private File propFile; @@ -55,6 +56,7 @@ public class Tagging { this.navigatorName = name; this.synonym2term = new ConcurrentHashMap(); this.term2synonym = new ConcurrentHashMap(); + this.term2objectlink = new ConcurrentHashMap(); this.synonym2synonyms = new ConcurrentHashMap>(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + name; @@ -62,32 +64,68 @@ public class Tagging { this.propFile = null; } - public Tagging(String name, File propFile) throws IOException { + public Tagging(String name, File propFile) throws IOException { this(name); this.propFile = propFile; init(); } - /** - * initialize a new Tagging file with a given table and objectspace url stub - * @param name - * @param propFile - * @param objectspace - * @param table - * @throws IOException - */ - public Tagging(String name, File propFile, String objectspace, Map table) throws IOException { - this(name); - this.propFile = propFile; - this.objectspace = objectspace; - BufferedWriter w = new BufferedWriter(new FileWriter(propFile)); - w.write("#objectspace:" + objectspace + "\n"); - for (Map.Entry e: table.entrySet()) { - w.write(e.getKey() + (e.getValue() == null || e.getValue().length() == 0 ? "" : ":" + e.getValue()) + "\n"); - } - w.close(); - init(); - } + /** + * initialize a new Tagging file with a given table and objectspace url stub + * @param name + * @param propFile + * @param objectspace + * @param table + * @throws IOException + */ + public Tagging(String name, File propFile, String objectspace, Map table) throws IOException { + this(name); + this.propFile = propFile; + this.objectspace = objectspace; + BufferedWriter w = new BufferedWriter(new FileWriter(propFile)); + w.write("#objectspace:" + objectspace + "\n"); + for (Map.Entry e: table.entrySet()) { + String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV(); + String o = e.getValue() == null ? "" : e.getValue().getObjectlink(); + w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); + } + w.close(); + init(); + } + + /** + * helper class: Synonym and Objectlink tuple + */ + public static class SOTuple { + private final String synonyms; + private final String objectlink; + + public SOTuple(String synonyms, String objectlink) { + this.synonyms = synonyms; + this.objectlink = objectlink; + } + + public SOTuple(String[] synonyms, String objectlink) { + StringBuilder sb = new StringBuilder(synonyms.length * 10); + for (String s: synonyms) sb.append(',').append(s); + this.synonyms = sb.substring(1); + this.objectlink = objectlink; + } + + public String getSynonymsCSV() { + return this.synonyms; + } + + public String[] getSynonymsList() { + return this.synonyms.split(","); + } + + public String getObjectlink() { + return this.objectlink; + } + + } + public void updateTerm(String term, String[] synonyms) { @@ -98,7 +136,7 @@ public class Tagging { return new File(this.propFile.getAbsolutePath() + ".tmp"); } - public void put(String term, String synonyms) throws IOException { + public void put(String term, String synonyms, String objectlink) throws IOException { if (this.propFile == null) return; File tmp = tmpFile(); BufferedWriter w = new BufferedWriter(new FileWriter(tmp)); @@ -114,14 +152,14 @@ public class Tagging { continue vocloop; } if (pl[0].equals(term)) { - w.write(term + (synonyms == null || synonyms.length() == 0 ? "" : ":" + synonyms) + "\n"); + w.write(term + (synonyms == null || synonyms.length() == 0 ? "" : ":" + synonyms) + (objectlink == null || objectlink.length() == 0 || objectlink.equals(this.objectspace + term) ? "" : "#" + objectlink) + "\n"); written = true; } else { - w.write(pl[0] + (pl[1] == null ? "" : ":" + pl[1]) + "\n"); + w.write(pl[0] + (pl[1] == null || pl[1].length() == 0 ? "" : ":" + pl[1]) + (pl[2] == null || pl[2].length() == 0 || pl[2].equals(this.objectspace + pl[0]) ? "" : "#" + pl[2]) + "\n"); } } if (!written) { - w.write(term + (synonyms == null || synonyms.length() == 0 ? "" : ":" + synonyms) + "\n"); + w.write(term + (synonyms == null || synonyms.length() == 0 ? "" : ":" + synonyms) + (objectlink == null || objectlink.length() == 0 || objectlink.equals(this.objectspace + term) ? "" : "#" + objectlink) + "\n"); } } catch (InterruptedException e) { } @@ -148,7 +186,7 @@ public class Tagging { if (pl[0].equals(term)) { continue vocloop; } else { - w.write(pl[0] + (pl[1] == null ? "" : ":" + pl[1]) + "\n"); + w.write(pl[0] + (pl[1] == null || pl[1].length() == 0 ? "" : ":" + pl[1]) + (pl[2] == null || pl[2].length() == 0 || pl[2].equals(this.objectspace + pl[0]) ? "" : "#" + pl[2]) + "\n"); } } } catch (InterruptedException e) { @@ -187,7 +225,7 @@ public class Tagging { if (pl == null) { continue vocloop; } - w.write(pl[0] + (pl[1] == null ? "" : ":" + pl[1]) + "\n"); + w.write(pl[0] + (pl[1] == null || pl[1].length() == 0 ? "" : ":" + pl[1]) + (pl[2] == null || pl[2].length() == 0 || pl[2].equals(this.objectspace + pl[0]) ? "" : "#" + pl[2]) + "\n"); } } catch (InterruptedException e) { } @@ -218,23 +256,25 @@ public class Tagging { return r; } - public Map reconstructionLists() { + public Map reconstructionLists() { Map> r = reconstructionSets(); - Map map = new TreeMap(); + Map map = new TreeMap(); for (Map.Entry> e: r.entrySet()) { - StringBuilder sb = new StringBuilder(e.getValue().size() * 10); - for (String s: e.getValue()) sb.append(',').append(s); - map.put(e.getKey(), sb.substring(1)); + map.put(e.getKey(), new SOTuple(e.getValue().toArray(new String[e.getValue().size()]), "")); } return map; } - public Map list() { + public String getObjectlink(String term) { + return this.term2objectlink.get(term); + } + + public Map list() { if (this.propFile == null) { // create a virtual map for automatically generated vocabularies return reconstructionLists(); } - Map map = new LinkedHashMap(); + Map map = new LinkedHashMap(); BlockingQueue list; try { list=Files.concurentLineReader(this.propFile, 1000); @@ -248,7 +288,7 @@ public class Tagging { if (pl == null) { continue vocloop; } - map.put(pl[0], pl[1] == null ? "" : pl[1]); + map.put(pl[0], new SOTuple(pl[1] == null || pl[1].length() == 0 ? "" : pl[1], pl[2] == null || pl[2].length() == 0 || pl[2].equals(this.objectspace + pl[0]) ? "" : pl[2])); } } catch (InterruptedException e) { } @@ -258,7 +298,9 @@ public class Tagging { private final static String[] parseLine(String line) { line = line.trim(); int p = line.indexOf('#'); + String c = ""; if (p >= 0) { + c = line.substring(p + 1); line = line.substring(0, p).trim(); } if (line.length() == 0) { @@ -272,15 +314,16 @@ public class Tagging { p = line.indexOf('\t'); } if (p < 0) { - return new String[]{line, null}; + return new String[]{line, null, c}; } - return new String[]{line.substring(0, p), line.substring(p + 1)}; + return new String[]{line.substring(0, p), line.substring(p + 1), c}; } public void init() throws IOException { if (this.propFile == null) return; this.synonym2term.clear(); this.term2synonym.clear(); + this.term2objectlink.clear(); this.synonym2synonyms.clear(); this.namespace = DEFAULT_NAMESPACE; this.predicate = this.namespace + this.navigatorName; @@ -301,12 +344,13 @@ public class Tagging { this.namespace = comment.substring(10).trim(); if (!this.namespace.endsWith("/") && !this.namespace.endsWith("#") && this.namespace.length() > 0) this.namespace += "#"; this.predicate = this.namespace + this.navigatorName; + continue vocloop; } if (comment.startsWith("objectspace:")) { this.objectspace = comment.substring(12).trim(); if (!this.objectspace.endsWith("/") && !this.objectspace.endsWith("#") && this.objectspace.length() > 0) this.objectspace += "#"; + continue vocloop; } - line = line.substring(0, p).trim(); } String[] pl = parseLine(line); if (pl == null) { @@ -317,6 +361,7 @@ public class Tagging { v = normalizeWord(pl[0]); this.synonym2term.put(v, term); this.term2synonym.put(term, v); + if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); continue vocloop; } term = normalizeKey(pl[0]); @@ -336,6 +381,7 @@ public class Tagging { String synonym = normalizeWord(term); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); + if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); synonyms.add(synonym); for (String s: synonyms) { this.synonym2synonyms.put(s, synonyms); diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 97c7cbeec..bf2210bb0 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -218,11 +218,12 @@ dc_rights */ public void addMetatags(Map> tags) { String subject = YaCyMetadata.hashURI(this.source.hash()); - for (String s: this.keywords) { - tags.remove(s); - } + //for (String s: this.keywords) { + // tags.remove(s); + //} for (Map.Entry> e: tags.entrySet()) { Tagging vocabulary = LibraryProvider.autotagging.getVocabulary(e.getKey()); + if (vocabulary == null) continue; String objectspace = vocabulary.getObjectspace(); StringBuilder sb = new StringBuilder(e.getValue().size() * 20); for (Tagging.Metatag s: e.getValue()) { @@ -231,8 +232,9 @@ dc_rights this.keywords.add(t); } sb.append(',').append(s.getObject()); - if (objectspace != null) { - JenaTripleStore.addTriple(subject, DCTerms.references.getPredicate(), objectspace + s.getObject()); + String objectlink = vocabulary.getObjectlink(s.getObject()); + if ((objectspace != null && objectspace.length() > 0) || (objectlink != null && objectlink.length() > 0)) { + JenaTripleStore.addTriple(subject, DCTerms.references.getPredicate(), objectlink == null || objectlink.length() == 0 ? objectspace + s.getObject() + "#" + s.getObject() : objectlink + "#" + s.getObject()); } } // put to triplestore