From 2d67f2924412747d9de8ee634dc98ceeba947c20 Mon Sep 17 00:00:00 2001 From: reger Date: Thu, 26 Jun 2014 22:16:15 +0200 Subject: [PATCH] adjust mergeDocument after parsing to - preserve charset and languages - fix merge of author --- source/net/yacy/document/Document.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 7ff31606b..d687b64d2 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -817,17 +817,22 @@ dc_rights final List anchors = new ArrayList(); final LinkedHashMap rss = new LinkedHashMap(); final LinkedHashMap images = new LinkedHashMap(); + final Set languages = new HashSet(); double lon = 0.0d, lat = 0.0d; Date date = new Date(); + String charset = null; int mindepth = 999; for (final Document doc: docs) { - if (doc == null) continue; + if (doc == null) continue; + + if (charset == null) charset = doc.charset; // TODO: uses this charset for merged content + final String author = doc.dc_creator(); if (author.length() > 0) { if (authors.length() > 0) authors.append(","); - subjects.append(author); + authors.append(author); } final String publisher = doc.dc_publisher(); @@ -861,6 +866,7 @@ dc_rights if (doc.date.before(date)) date = doc.date; if (doc.getDepth() < mindepth) mindepth = doc.getDepth(); + if (doc.dc_language() != null) languages.add(doc.dc_language()); } // clean up parser data @@ -878,9 +884,9 @@ dc_rights Document newDoc = new Document( location, globalMime, + charset, null, - null, - null, + languages, subjects.toString().split(" |,"), titlesa, authors.toString(),