diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 00a3ee3fd..1f68239a2 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2022,7 +2022,9 @@ public final class Switchboard extends serverSwitch { log.warn("IO Error processing warc file " + infile); } return moved; - } else if (s.endsWith(".flatjson")) { + } else if (s.endsWith(".jsonlist") || s.endsWith(".flatjson")) { + // parse a file that can be generated with yacy_grid_parser + // see https://github.com/yacy/yacy_grid_parser/blob/master/README.md try { InputStream is = new BufferedInputStream(new FileInputStream(infile)); BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); @@ -2035,9 +2037,27 @@ public final class Switchboard extends serverSwitch { for (String key: json.keySet()) { Object o = json.get(key); if (o instanceof JSONArray) { - // todo: ass array + // transform this into a list + JSONArray a = (JSONArray) o; + List list = new ArrayList<>(); + for (int i = 0; i < a.length(); i++) list.add(a.get(i)); + CollectionSchema schema = CollectionSchema.valueOf(key); + schema.add(surrogate, list); } else { - surrogate.put(key, new SolrInputField(o.toString())); + // patch yacy grid altered schema (yacy grid does not have IDs any more, but they can be re-computed here) + if (key.equals("url_s")) { + DigestURL durl = new DigestURL(o.toString()); + String id = ASCII.String(durl.hash()); + surrogate.setField(CollectionSchema.sku.getSolrFieldName(), durl.toNormalform(true)); + surrogate.setField(CollectionSchema.id.getSolrFieldName(), id); + surrogate.setField(CollectionSchema.host_id_s.getSolrFieldName(), id.substring(6)); + } else if (key.equals("referrer_url_s")) { + DigestURL durl = new DigestURL(o.toString()); + String id = ASCII.String(durl.hash()); + surrogate.setField(CollectionSchema.referrer_id_s.getSolrFieldName(), id); + } else { + surrogate.setField(key, o.toString()); + } } } Switchboard.this.index.putDocument(surrogate); @@ -2219,7 +2239,9 @@ public final class Switchboard extends serverSwitch { || surrogate.endsWith(".xml.gz") || surrogate.endsWith(".xml.zip") || surrogate.endsWith(".warc") - || surrogate.endsWith(".warc.gz") ) { + || surrogate.endsWith(".warc.gz") + || surrogate.endsWith(".jsonlist") + || surrogate.endsWith(".flatjson") ) { // read the surrogate file and store entry in index if ( processSurrogate(surrogate) ) { return true; diff --git a/source/net/yacy/search/schema/CollectionSchema.java b/source/net/yacy/search/schema/CollectionSchema.java index fdba2e39e..59247ee57 100644 --- a/source/net/yacy/search/schema/CollectionSchema.java +++ b/source/net/yacy/search/schema/CollectionSchema.java @@ -26,6 +26,8 @@ import java.util.List; import net.yacy.cora.federate.solr.SchemaDeclaration; import net.yacy.cora.federate.solr.SolrType; +import org.apache.poi.ss.formula.atp.DateParser; +import org.apache.poi.ss.formula.eval.EvaluationException; import org.apache.solr.common.SolrInputDocument; public enum CollectionSchema implements SchemaDeclaration { @@ -424,6 +426,8 @@ public enum CollectionSchema implements SchemaDeclaration { doc.setField(this.getSolrFieldName(), new Integer[0]); } else if (this.type == SolrType.string || this.type == SolrType.text_general) { doc.setField(this.getSolrFieldName(), new String[0]); + } else if (this.type == SolrType.date) { + doc.setField(this.getSolrFieldName(), new Date[0]); } else { assert false : "ADD(1): type is " + this.type.name(); doc.setField(this.getSolrFieldName(), new Object[0]); @@ -436,6 +440,21 @@ public enum CollectionSchema implements SchemaDeclaration { } else if (this.type == SolrType.string || this.type == SolrType.text_general) { assert (value.iterator().next() instanceof String); doc.setField(this.getSolrFieldName(), value.toArray(new String[value.size()])); + } else if (this.type == SolrType.date) { + assert (value.iterator().next() instanceof String) || (value.iterator().next() instanceof Date); + if (value.iterator().next() instanceof String) { + Date[] da = new Date[value.size()]; + for (int i = 0; i < value.size(); i++) { + try { + da[i] = DateParser.parseDate((String) value.get(i)).getTime(); + } catch (EvaluationException e) { + da[i] = null; + } + } + doc.setField(this.getSolrFieldName(), da); + } else { + doc.setField(this.getSolrFieldName(), value.toArray(new Date[value.size()])); + } } else { assert false : "ADD(2): type is " + this.type.name(); doc.setField(this.getSolrFieldName(), value.toArray(new Object[value.size()]));