From 6f1ddb2519fa0fde0041ec7061fe68e116468441 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Wed, 25 Jul 2012 01:53:47 +0200
Subject: [PATCH] Moved solr index-add method to the same method where the YaCy
 index is written. Also done some code-cleanup.

---
 htroot/IndexFederated_p.java                  |   6 +-
 htroot/api/schema_p.java                      |   4 +-
 source/de/anomic/crawler/CrawlQueues.java     |   8 +-
 .../yacy/cora/protocol/ResponseHeader.java    |   8 +
 source/net/yacy/peers/Protocol.java           |   3 +-
 .../net/yacy/search/IndexingQueueEntry.java   |  41 +++
 source/net/yacy/search/Shutdown.java          |  47 ++++
 source/net/yacy/search/Switchboard.java       |  83 ++----
 .../net/yacy/search/index/DocumentIndex.java  |   9 +-
 source/net/yacy/search/index/Segment.java     | 255 +++++++++---------
 .../yacy/search/index/SolrConfiguration.java  |  20 +-
 source/net/yacy/search/query/RWIProcess.java  |  12 +-
 .../net/yacy/search/query/SnippetProcess.java |   2 +-
 source/net/yacy/yacy.java                     |   4 +-
 14 files changed, 275 insertions(+), 227 deletions(-)
 create mode 100644 source/net/yacy/search/IndexingQueueEntry.java
 create mode 100644 source/net/yacy/search/Shutdown.java

diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java
index 2d43aff42..08b842bd4 100644
--- a/htroot/IndexFederated_p.java
+++ b/htroot/IndexFederated_p.java
@@ -137,7 +137,7 @@ public class IndexFederated_p {
             }
 
             // read index scheme table flags
-            final Iterator<ConfigurationSet.Entry> i = sb.solrScheme.entryIterator();
+            final Iterator<ConfigurationSet.Entry> i = sb.index.getSolrScheme().entryIterator();
             ConfigurationSet.Entry entry;
             boolean modified = false; // flag to remember changes
             while (i.hasNext()) {
@@ -160,7 +160,7 @@ public class IndexFederated_p {
             }
             if (modified) { // save settings to config file if modified
                 try {
-                    sb.solrScheme.commit();
+                    sb.index.getSolrScheme().commit();
                     modified = false;
                 } catch (IOException ex) {}
             }
@@ -191,7 +191,7 @@ public class IndexFederated_p {
         // use enum SolrField to keep defined order
         for(SolrField field : SolrField.values()) {
             prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark;
-            prop.put("scheme_" + c + "_checked", sb.solrScheme.contains(field.name()) ? 1 : 0);
+            prop.put("scheme_" + c + "_checked", sb.index.getSolrScheme().contains(field.name()) ? 1 : 0);
             prop.putHTML("scheme_" + c + "_key", field.name());
             prop.putHTML("scheme_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName());
             if (field.getComment() != null) prop.putHTML("scheme_" + c + "_comment",field.getComment());
diff --git a/htroot/api/schema_p.java b/htroot/api/schema_p.java
index 69ba5525f..5a224eb27 100644
--- a/htroot/api/schema_p.java
+++ b/htroot/api/schema_p.java
@@ -24,6 +24,7 @@
 
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.search.Switchboard;
+import net.yacy.search.index.SolrConfiguration;
 import net.yacy.search.index.SolrField;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@@ -37,8 +38,9 @@ public class schema_p {
 
         // write scheme
         int c = 0;
+        SolrConfiguration solrScheme = sb.index.getSolrScheme();
         for (SolrField field : SolrField.values()) {
-            if (sb.solrScheme.contains(field.name())) {
+            if (solrScheme.contains(field.name())) {
                 prop.put("fields_" + c + "_solrname", field.getSolrFieldName());
                 prop.put("fields_" + c + "_type", field.getType().printName());
                 prop.put("fields_" + c + "_comment", field.getComment());
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index c0241818c..88037837d 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -81,8 +81,8 @@ public class CrawlQueues {
         this.log.logConfig("Starting Crawling Management");
         this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
         FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(sb.index.getSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
-        this.delegatedURL = new ZURL(sb.index.getSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
+        this.errorURL = new ZURL(sb.index.getSolr(), sb.index.getSolrScheme(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
+        this.delegatedURL = new ZURL(sb.index.getSolr(), sb.index.getSolrScheme(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
     }
 
     public void relocate(final File newQueuePath) {
@@ -93,8 +93,8 @@ public class CrawlQueues {
 
         this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
         FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(this.sb.index.getSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
-        this.delegatedURL = new ZURL(this.sb.index.getSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
+        this.errorURL = new ZURL(this.sb.index.getSolr(), this.sb.index.getSolrScheme(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
+        this.delegatedURL = new ZURL(this.sb.index.getSolr(), this.sb.index.getSolrScheme(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
     }
 
     public synchronized void close() {
diff --git a/source/net/yacy/cora/protocol/ResponseHeader.java b/source/net/yacy/cora/protocol/ResponseHeader.java
index 328c10722..e4185ad4a 100644
--- a/source/net/yacy/cora/protocol/ResponseHeader.java
+++ b/source/net/yacy/cora/protocol/ResponseHeader.java
@@ -159,4 +159,12 @@ public class ResponseHeader extends HeaderFramework {
         }
         return Charset.forName(charSetName);
     }
+
+    public String getXRobotsTag() {
+        String x_robots_tag = this.get(HeaderFramework.X_ROBOTS_TAG, "");
+        if (x_robots_tag.isEmpty()) {
+            x_robots_tag = this.get(HeaderFramework.X_ROBOTS, "");
+        }
+        return x_robots_tag;
+    }
 }
diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java
index 11d195525..b5902dc54 100644
--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@@ -786,7 +786,8 @@ public final class Protocol
         // store remote result to local result container
         // insert one container into the search result buffer
         // one is enough, only the references are used, not the word
-        containerCache.add(container.get(0), false, target.getName() + "/" + target.hash, result.joincount, true, time);
+        containerCache.add(container.get(0), false, target.getName() + "/" + target.hash, result.joincount, time);
+        containerCache.addFinalize();
         containerCache.addExpectedRemoteReferences(-count);
 
         // insert the containers to the index
diff --git a/source/net/yacy/search/IndexingQueueEntry.java b/source/net/yacy/search/IndexingQueueEntry.java
new file mode 100644
index 000000000..250921688
--- /dev/null
+++ b/source/net/yacy/search/IndexingQueueEntry.java
@@ -0,0 +1,41 @@
+/**
+ *  IndexingQueueEntry
+ *  Copyright 2012 by Michael Peter Christen
+ *  First released 24.07.2012 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package net.yacy.search;
+
+import net.yacy.document.Condenser;
+import net.yacy.document.Document;
+import net.yacy.kelondro.workflow.WorkflowJob;
+import de.anomic.crawler.retrieval.Response;
+
+public class IndexingQueueEntry extends WorkflowJob {
+
+    public Response queueEntry;
+    public Document[] documents;
+    public Condenser[] condenser;
+
+    public IndexingQueueEntry(final Response queueEntry, final Document[] documents, final Condenser[] condenser) {
+        super();
+        this.queueEntry = queueEntry;
+        this.documents = documents;
+        this.condenser = condenser;
+    }
+}
diff --git a/source/net/yacy/search/Shutdown.java b/source/net/yacy/search/Shutdown.java
new file mode 100644
index 000000000..b139f398f
--- /dev/null
+++ b/source/net/yacy/search/Shutdown.java
@@ -0,0 +1,47 @@
+/**
+ *  Shutdown
+ *  Copyright 2012 by Michael Peter Christen
+ *  First released 24.07.2012 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.search;
+
+import net.yacy.kelondro.logging.Log;
+
+public class Shutdown extends Thread {
+    private final Switchboard sb;
+    private final long delay;
+    private final String reason;
+
+    public Shutdown(final Switchboard sb, final long delay, final String reason) {
+        this.sb = sb;
+        this.delay = delay;
+        this.reason = reason;
+    }
+
+    @Override
+    public void run() {
+        try {
+            Thread.sleep(this.delay);
+        } catch ( final InterruptedException e ) {
+            this.sb.getLog().logInfo("interrupted delayed shutdown");
+        } catch ( final Exception e ) {
+            Log.logException(e);
+        }
+        this.sb.terminate(this.reason);
+    }
+}
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 245b4da89..8a29de350 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -97,7 +97,6 @@ import net.yacy.cora.protocol.http.ProxySettings;
 import net.yacy.cora.services.federated.solr.ShardSelection;
 import net.yacy.cora.services.federated.solr.ShardSolrConnector;
 import net.yacy.cora.services.federated.solr.SolrConnector;
-import net.yacy.cora.services.federated.solr.SolrDoc;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@@ -251,7 +250,6 @@ public final class Switchboard extends serverSwitch
     public SeedDB peers;
     public WorkTables tables;
     public Tray tray;
-    public SolrConfiguration solrScheme;
 
     public WorkflowProcessor<IndexingQueueEntry> indexingDocumentProcessor;
     public WorkflowProcessor<IndexingQueueEntry> indexingCondensementProcessor;
@@ -376,16 +374,6 @@ public final class Switchboard extends serverSwitch
         this.networkRoot.mkdirs();
         this.queuesRoot.mkdirs();
 
-        // initialize index
-        ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
-        final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
-        this.index = new Segment(this.log, new File(segmentsPath, "default"));
-        final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_URLDB, true)) this.index.connectUrlDb(this.useTailCache, this.exceed134217727);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_SOLR, true)) this.index.connectLocalSolr(connectWithinMs);
-
         // prepare a solr index profile switch list
         final File solrBackupProfile = new File("defaults/solr.keys.list");
         final String schemename = getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list");
@@ -395,11 +383,21 @@ public final class Switchboard extends serverSwitch
         }
         final boolean solrlazy = getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true);
         final SolrConfiguration backupScheme = new SolrConfiguration(solrBackupProfile, solrlazy);
-        this.solrScheme = new SolrConfiguration(solrWorkProfile, solrlazy);
-
+        final SolrConfiguration solrScheme = new SolrConfiguration(solrWorkProfile, solrlazy);
         // update the working scheme with the backup scheme. This is necessary to include new features.
         // new features are always activated by default (if activated in input-backupScheme)
-        this.solrScheme.fill(backupScheme, true);
+        solrScheme.fill(backupScheme, true);
+
+        // initialize index
+        ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
+        final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
+        this.index = new Segment(this.log, new File(segmentsPath, "default"), solrScheme);
+        final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_URLDB, true)) this.index.connectUrlDb(this.useTailCache, this.exceed134217727);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_SOLR, true)) this.index.connectLocalSolr(connectWithinMs);
+
 
         // set up the solr interface
         final String solrurls = getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr");
@@ -1133,6 +1131,9 @@ public final class Switchboard extends serverSwitch
         // switch the networks
         synchronized ( this ) {
 
+            // remember the solr scheme
+            SolrConfiguration solrScheme = this.index.getSolrScheme();
+
             // shut down
             this.crawler.close();
             if ( this.dhtDispatcher != null ) {
@@ -1179,7 +1180,7 @@ public final class Switchboard extends serverSwitch
                 partitionExponent,
                 this.useTailCache,
                 this.exceed134217727);
-            this.index = new Segment(this.log, new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"));
+            this.index = new Segment(this.log, new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"), solrScheme);
             final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
             if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
             if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
@@ -2395,55 +2396,8 @@ public final class Switchboard extends serverSwitch
             return new IndexingQueueEntry(in.queueEntry, in.documents, null);
         }
 
-        boolean localSolr = this.index.connectedLocalSolr();
-        boolean remoteSolr = this.index.connectedRemoteSolr();
-        if (localSolr || remoteSolr) {
-            // send the documents to solr
-            for ( final Document doc : in.documents ) {
-                try {
-                    final String id = UTF8.String(new DigestURI(doc.dc_identifier()).hash());
-                    final String iquh = UTF8.String(in.queueEntry.url().hash());
-                    if ( !id.equals(iquh) ) {
-                        this.log.logWarning("condenseDocument consistency check doc="
-                            + id
-                            + ":"
-                            + doc.dc_identifier()
-                            + ", query="
-                            + iquh
-                            + ":"
-                            + in.queueEntry.url());
-                        // in case that this happens it appears that the doc id is the right one
-                    }
-                    try {
-                        SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc);
-                        this.index.getSolr().add(solrDoc);
-                    } catch ( final IOException e ) {
-                        Log.logWarning(
-                            "SOLR",
-                            "failed to send "
-                                + in.queueEntry.url().toNormalform(true, false)
-                                + " to solr: "
-                                + e.getMessage());
-                    }
-                } catch ( final MalformedURLException e ) {
-                    Log.logException(e);
-                    continue;
-                }
-            }
-        }
-
-        // check if we should accept the document for our index
-        if (!this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) {
-            if ( this.log.isInfo() ) {
-                this.log.logInfo("Not Condensed Resource '"
-                    + in.queueEntry.url().toNormalform(false, true)
-                    + "': indexing not wanted by federated rule for YaCy");
-            }
-            return new IndexingQueueEntry(in.queueEntry, in.documents, null);
-        }
-        final List<Document> doclist = new ArrayList<Document>();
-
         // check which files may take part in the indexing process
+        final List<Document> doclist = new ArrayList<Document>();
         for ( final Document document : in.documents ) {
             if ( document.indexingDenied() ) {
                 if ( this.log.isInfo() ) {
@@ -2569,6 +2523,7 @@ public final class Switchboard extends serverSwitch
                     queueEntry.lastModified(),
                     new Date(),
                     queueEntry.size(),
+                    queueEntry.getResponseHeader(),
                     document,
                     condenser,
                     searchEvent,
diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java
index 21ebf5878..518d2c08f 100644
--- a/source/net/yacy/search/index/DocumentIndex.java
+++ b/source/net/yacy/search/index/DocumentIndex.java
@@ -73,9 +73,9 @@ public class DocumentIndex extends Segment
 
     static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
 
-    public DocumentIndex(final File segmentPath, final CallbackListener callback, final int cachesize)
+    public DocumentIndex(final File segmentPath, final File schemePath, final CallbackListener callback, final int cachesize)
         throws IOException {
-        super(new Log("DocumentIndex"), segmentPath);
+        super(new Log("DocumentIndex"), segmentPath, schemePath == null ? null : new SolrConfiguration(schemePath, true));
         super.connectRWI(cachesize, targetFileSize * 4 - 1);
         super.connectCitation(cachesize, targetFileSize * 4 - 1);
         super.connectUrlDb(
@@ -174,6 +174,7 @@ public class DocumentIndex extends Segment
                     new Date(url.lastModified()),
                     new Date(),
                     url.length(),
+                    null,
                     document,
                     condenser,
                     null,
@@ -306,7 +307,7 @@ public class DocumentIndex extends Segment
         try {
             if ( args[1].equals("add") ) {
                 final DigestURI f = new DigestURI(args[2]);
-                final DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
+                final DocumentIndex di = new DocumentIndex(segmentPath, null, callback, 100000);
                 di.addConcurrent(f);
                 di.close();
             } else {
@@ -315,7 +316,7 @@ public class DocumentIndex extends Segment
                     query += args[i];
                 }
                 query.trim();
-                final DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
+                final DocumentIndex di = new DocumentIndex(segmentPath, null, callback, 100000);
                 final ArrayList<DigestURI> results = di.find(query, 100);
                 for ( final DigestURI f : results ) {
                     if ( f != null ) {
diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java
index 2faf13d25..f3c16058d 100644
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@@ -39,7 +39,9 @@ import net.yacy.cora.document.ASCII;
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.cora.order.ByteOrder;
+import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.cora.services.federated.solr.SolrConnector;
+import net.yacy.cora.services.federated.solr.SolrDoc;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@@ -100,15 +102,16 @@ public class Segment {
 
     private   final Log                            log;
     private   final File                           segmentPath;
+    private   final SolrConfiguration              solrScheme;
     protected final MetadataRepository             urlMetadata;
     protected       IndexCell<WordReference>       termIndex;
     protected       IndexCell<CitationReference>   urlCitationIndex;
 
-    public Segment(final Log log, final File segmentPath) {
-
+    public Segment(final Log log, final File segmentPath, final SolrConfiguration solrScheme) {
         log.logInfo("Initializing Segment '" + segmentPath + ".");
         this.log = log;
         this.segmentPath = segmentPath;
+        this.solrScheme = solrScheme;
 
         // create LURL-db
         this.urlMetadata = new MetadataRepository(segmentPath);
@@ -197,10 +200,15 @@ public class Segment {
     public void disconnectLocalSolr() {
         this.urlMetadata.disconnectLocalSolr();
     }
+
     public SolrConnector getSolr() {
         return this.urlMetadata.getSolr();
     }
 
+    public SolrConfiguration getSolrScheme() {
+        return this.solrScheme;
+    }
+
     public SolrConnector getRemoteSolr() {
         return this.urlMetadata.getRemoteSolr();
     }
@@ -318,94 +326,6 @@ public class Segment {
         return this.segmentPath;
     }
 
-    /**
-     * this is called by the switchboard to put in a new page into the index
-     * use all the words in one condenser object to simultanous create index entries
-     *
-     * @param url
-     * @param urlModified
-     * @param document
-     * @param condenser
-     * @param language
-     * @param doctype
-     * @param outlinksSame
-     * @param outlinksOther
-     * @return
-     */
-    private int addPageIndex(
-            final DigestURI url,
-            final Date urlModified,
-            final Document document,
-            final Condenser condenser,
-            final String language,
-            final char doctype,
-            final int outlinksSame,
-            final int outlinksOther,
-            final SearchEvent searchEvent,
-            final String sourceName) {
-        final RWIProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult();
-        int wordCount = 0;
-        final int urlLength = url.toNormalform(true, true).length();
-        final int urlComps = MultiProtocolURI.urlComps(url.toString()).length;
-
-        // iterate over all words of content text
-        final Iterator<Map.Entry<String, Word>> i = condenser.words().entrySet().iterator();
-        Map.Entry<String, Word> wentry;
-        String word;
-        final int len = (document == null) ? urlLength : document.dc_title().length();
-        final WordReferenceRow ientry = new WordReferenceRow(url.hash(),
-                                urlLength, urlComps, len,
-                                condenser.RESULT_NUMB_WORDS,
-                                condenser.RESULT_NUMB_SENTENCES,
-                                urlModified.getTime(),
-                                System.currentTimeMillis(),
-                                UTF8.getBytes(language),
-                                doctype,
-                                outlinksSame, outlinksOther);
-        Word wprop = null;
-        byte[] wordhash;
-        while (i.hasNext()) {
-            wentry = i.next();
-            word = wentry.getKey();
-            wprop = wentry.getValue();
-            assert (wprop.flags != null);
-            ientry.setWord(wprop);
-            wordhash = Word.word2hash(word);
-            if (this.termIndex != null) try {
-                this.termIndex.add(wordhash, ientry);
-            } catch (final Exception e) {
-                Log.logException(e);
-            }
-            wordCount++;
-
-            // during a search event it is possible that a heuristic is used which aquires index
-            // data during search-time. To transfer indexed data directly to the search process
-            // the following lines push the index data additionally to the search process
-            // this is done only for searched words
-            if (searchEvent != null && !searchEvent.getQuery().query_exclude_hashes.has(wordhash) && searchEvent.getQuery().query_include_hashes.has(wordhash)) {
-                // if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
-                ReferenceContainer<WordReference> container;
-                try {
-                    container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1);
-                    container.add(ientry);
-                    rankingProcess.add(container, true, sourceName, -1, !i.hasNext(), 5000);
-                } catch (final RowSpaceExceededException e) {
-                    continue;
-                }
-            }
-        }
-
-        // assign the catchall word
-        ientry.setWord(wprop == null ? catchallWord : wprop); // we use one of the word properties as template to get the document characteristics
-        if (this.termIndex != null) try {
-            this.termIndex.add(catchallHash, ientry);
-        } catch (final Exception e) {
-            Log.logException(e);
-        }
-
-        return wordCount;
-    }
-
     private int addCitationIndex(final DigestURI url, final Date urlModified, final Map<MultiProtocolURI, Properties> anchors) {
     	if (anchors == null) return 0;
     	int refCount = 0;
@@ -433,25 +353,12 @@ public class Segment {
         if (this.urlCitationIndex != null) this.urlCitationIndex.close();
     }
 
-    public URIMetadataRow storeDocument(
-            final DigestURI url,
-            final DigestURI referrerURL,
-            Date modDate,
-            final Date loadDate,
-            final long sourcesize,
-            final Document document,
-            final Condenser condenser,
-            final SearchEvent searchEvent,
-            final String sourceName
-            ) throws IOException {
-        final long startTime = System.currentTimeMillis();
-
-        // CREATE INDEX
-
-        // load some document metadata
-        final String dc_title = document.dc_title();
-
-        // do a identification of the language
+    private String votedLanguage(
+                    final DigestURI url,
+                    final String urlNormalform,
+                    final Document document,
+                    final Condenser condenser) {
+     // do a identification of the language
         String language = condenser.language(); // this is a statistical analysation of the content: will be compared with other attributes
         final String bymetadata = document.dc_language(); // the languageByMetadata may return null if there was no declaration
         if (language == null) {
@@ -466,7 +373,7 @@ public class Segment {
                 else {
                     final String error = "LANGUAGE-BY-STATISTICS: " + url + " CONFLICTING: " + language + " (the language given by the TLD is " + url.language() + ")";
                     // see if we have a hint in the url that the statistic was right
-                    final String u = url.toNormalform(true, false).toLowerCase();
+                    final String u = urlNormalform.toLowerCase();
                     if (!u.contains("/" + language + "/") && !u.contains("/" + ISO639.country(language).toLowerCase() + "/")) {
                         // no confirmation using the url, use the TLD
                         language = url.language();
@@ -491,9 +398,46 @@ public class Segment {
                 }
             }
         }
+        return language;
+    }
 
-        // create a new loaded URL db entry
-        if (modDate.getTime() > loadDate.getTime()) modDate = loadDate;
+    public URIMetadataRow storeDocument(
+            final DigestURI url,
+            final DigestURI referrerURL,
+            Date modDate,
+            final Date loadDate,
+            final long sourcesize,
+            final ResponseHeader responseHeader,
+            final Document document,
+            final Condenser condenser,
+            final SearchEvent searchEvent,
+            final String sourceName
+            ) throws IOException {
+        final long startTime = System.currentTimeMillis();
+
+        // CREATE INDEX
+
+        // load some document metadata
+        final String id = ASCII.String(url.hash());
+        final String dc_title = document.dc_title();
+        final String urlNormalform = url.toNormalform(true, false);
+        final String language = votedLanguage(url, urlNormalform, document, condenser); // identification of the language
+
+        // STORE TO SOLR
+        boolean localSolr = this.connectedLocalSolr();
+        boolean remoteSolr = this.connectedRemoteSolr();
+        if (localSolr || remoteSolr) {
+            try {
+                SolrDoc solrDoc = this.solrScheme.yacy2solr(id, responseHeader, document);
+                this.getSolr().add(solrDoc);
+            } catch ( final IOException e ) {
+                Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage());
+            }
+        }
+
+        // STORE URL TO LOADED-URL-DB
+        if (modDate.getTime() > loadDate.getTime()) modDate = loadDate; // TODO: compare with modTime from responseHeader
+        char docType = Response.docType(document.dc_format());
         final URIMetadataRow newEntry = new URIMetadataRow(
                 url,                                       // URL
                 dc_title,                                  // document description
@@ -509,7 +453,7 @@ public class Segment {
                 new byte[0],                               // md5
                 (int) sourcesize,                          // size
                 condenser.RESULT_NUMB_WORDS,               // word count
-                Response.docType(document.dc_format()),    // doctype
+                docType,                                   // doctype
                 condenser.RESULT_FLAGS,                    // flags
                 UTF8.getBytes(language),                   // language
                 document.inboundLinks().size(),            // inbound links
@@ -519,25 +463,72 @@ public class Segment {
                 document.getVideolinks().size(),           // lvideo
                 document.getApplinks().size()              // lapp
         );
-
-        // STORE URL TO LOADED-URL-DB
-        this.urlMetadata.store(newEntry); // TODO: should be serialized; integrated in IODispatcher
-
+        this.urlMetadata.store(newEntry);
         final long storageEndTime = System.currentTimeMillis();
 
         // STORE PAGE INDEX INTO WORD INDEX DB
-        final int words = addPageIndex(
-                url,                                          // document url
-                modDate,                                      // document mod date
-                document,                                     // document content
-                condenser,                                    // document condenser
-                language,                                     // document language
-                Response.docType(document.dc_format()),       // document type
-                document.inboundLinks().size(),               // inbound links
-                document.outboundLinks().size(),              // outbound links
-                searchEvent,                                  // a search event that can have results directly
-                sourceName                                    // the name of the source where the index was created
-        );
+        int outlinksSame = document.inboundLinks().size();
+        int outlinksOther = document.outboundLinks().size();
+        final RWIProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult();
+        int wordCount = 0;
+        final int urlLength = urlNormalform.length();
+        final int urlComps = MultiProtocolURI.urlComps(url.toString()).length;
+
+        // create a word prototype which is re-used for all entries
+        final int len = (document == null) ? urlLength : document.dc_title().length();
+        final WordReferenceRow ientry = new WordReferenceRow(
+                        url.hash(),
+                        urlLength, urlComps, len,
+                        condenser.RESULT_NUMB_WORDS,
+                        condenser.RESULT_NUMB_SENTENCES,
+                        modDate.getTime(),
+                        System.currentTimeMillis(),
+                        UTF8.getBytes(language),
+                        docType,
+                        outlinksSame, outlinksOther);
+
+        // iterate over all words of content text
+        Word wprop = null;
+        byte[] wordhash;
+        String word;
+        for (Map.Entry<String, Word> wentry: condenser.words().entrySet()) {
+            word = wentry.getKey();
+            wprop = wentry.getValue();
+            assert (wprop.flags != null);
+            ientry.setWord(wprop);
+            wordhash = Word.word2hash(word);
+            if (this.termIndex != null) try {
+                this.termIndex.add(wordhash, ientry);
+            } catch (final Exception e) {
+                Log.logException(e);
+            }
+            wordCount++;
+
+            // during a search event it is possible that a heuristic is used which aquires index
+            // data during search-time. To transfer indexed data directly to the search process
+            // the following lines push the index data additionally to the search process
+            // this is done only for searched words
+            if (searchEvent != null && !searchEvent.getQuery().query_exclude_hashes.has(wordhash) && searchEvent.getQuery().query_include_hashes.has(wordhash)) {
+                // if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
+                ReferenceContainer<WordReference> container;
+                try {
+                    container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1);
+                    container.add(ientry);
+                    rankingProcess.add(container, true, sourceName, -1, 5000);
+                } catch (final RowSpaceExceededException e) {
+                    continue;
+                }
+            }
+        }
+        if (rankingProcess != null) rankingProcess.addFinalize();
+
+        // assign the catchall word
+        ientry.setWord(wprop == null ? catchallWord : wprop); // we use one of the word properties as template to get the document characteristics
+        if (this.termIndex != null) try {
+            this.termIndex.add(catchallHash, ientry);
+        } catch (final Exception e) {
+            Log.logException(e);
+        }
 
         // STORE PAGE REFERENCES INTO CITATION INDEX
         final int refs = addCitationIndex(url, modDate, document.getAnchors());
@@ -546,10 +537,8 @@ public class Segment {
         final long indexingEndTime = System.currentTimeMillis();
 
         if (this.log.isInfo()) {
-            // TODO: UTF-8 docDescription seems not to be displayed correctly because
-            // of string concatenation
-            this.log.logInfo("*Indexed " + words + " words in URL " + url +
-                    " [" + ASCII.String(url.hash()) + "]" +
+            this.log.logInfo("*Indexed " + wordCount + " words in URL " + url +
+                    " [" + id + "]" +
                     "\n\tDescription:  " + dc_title +
                     "\n\tMimeType: "  + document.dc_format() + " | Charset: " + document.getCharset() + " | " +
                     "Size: " + document.getTextLength() + " bytes | " +
diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java
index 5fc5efe4f..ef7a3b093 100644
--- a/source/net/yacy/search/index/SolrConfiguration.java
+++ b/source/net/yacy/search/index/SolrConfiguration.java
@@ -106,7 +106,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
     protected void addSolr(final SolrDoc solrdoc, final SolrField key, final String[] value) {
         if ((isEmpty() || contains(key.name())) && (!this.lazy || (value != null && value.length > 0))) solrdoc.addSolr(key, value);
     }
-    
+
     protected void addSolr(final SolrDoc solrdoc, final SolrField key, final List<String> value) {
         if ((isEmpty() || contains(key.name())) && (!this.lazy || (value != null && !value.isEmpty()))) solrdoc.addSolr(key, value);
     }
@@ -163,7 +163,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
         addSolr(solrdoc, SolrField.author, yacydoc.dc_creator());
         addSolr(solrdoc, SolrField.description, yacydoc.dc_description());
         addSolr(solrdoc, SolrField.content_type, yacydoc.dc_format());
-        addSolr(solrdoc, SolrField.last_modified, header.lastModified());
+        addSolr(solrdoc, SolrField.last_modified, header == null ? new Date() : header.lastModified());
         addSolr(solrdoc, SolrField.keywords, yacydoc.dc_subject(' '));
         final String content = yacydoc.getTextString();
         addSolr(solrdoc, SolrField.text_t, content);
@@ -224,10 +224,14 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
                 if (robots_meta.indexOf("noindex",0) >= 0) b += 4;  // set bit 2
                 if (robots_meta.indexOf("nofollow",0) >= 0) b += 8; // set bit 3
             }
-            String x_robots_tag = header.get(HeaderFramework.X_ROBOTS_TAG, "");
-            if (x_robots_tag.isEmpty()) {
-            	x_robots_tag = header.get(HeaderFramework.X_ROBOTS, "");
-            } else {
+            String x_robots_tag = "";
+            if (header != null) {
+                x_robots_tag = header.get(HeaderFramework.X_ROBOTS_TAG, "");
+                if (x_robots_tag.isEmpty()) {
+                    x_robots_tag = header.get(HeaderFramework.X_ROBOTS, "");
+                }
+            }
+            if (!x_robots_tag.isEmpty()) {
                 // this tag may have values: noarchive, nosnippet, noindex, unavailable_after
                 if (x_robots_tag.indexOf("noarchive",0) >= 0) b += 256;         // set bit 8
                 if (x_robots_tag.indexOf("nosnippet",0) >= 0) b += 512;         // set bit 9
@@ -398,7 +402,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
             }
 
             // response time
-            addSolr(solrdoc, SolrField.responsetime_i, header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0"));
+            addSolr(solrdoc, SolrField.responsetime_i, header == null ? 0 : Integer.parseInt(header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0")));
         }
 
         // list all links
@@ -487,7 +491,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
             addSolr(solrdoc, SolrField.lon_coordinate, yacydoc.lon());
             addSolr(solrdoc, SolrField.lat_coordinate, yacydoc.lat());
         }
-        addSolr(solrdoc, SolrField.httpstatus_i, header.getStatusCode());
+        addSolr(solrdoc, SolrField.httpstatus_i, header == null ? 200 : header.getStatusCode());
 
         return solrdoc;
     }
diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java
index 54ddf747b..3248598d8 100644
--- a/source/net/yacy/search/query/RWIProcess.java
+++ b/source/net/yacy/search/query/RWIProcess.java
@@ -221,7 +221,8 @@ public final class RWIProcess extends Thread
                     System.currentTimeMillis() - timer),
                 false);
             if ( !index.isEmpty() ) {
-                add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, true, this.maxtime);
+                add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, this.maxtime);
+                addFinalize();
             }
         } catch ( final Exception e ) {
             Log.logException(e);
@@ -230,12 +231,15 @@ public final class RWIProcess extends Thread
         }
     }
 
+    public void addFinalize() {
+        this.addRunning = false;
+    }
+
     public void add(
         final ReferenceContainer<WordReference> index,
         final boolean local,
         final String resourceName,
         final int fullResource,
-        final boolean finalizeAddAtEnd,
         final long maxtime) {
         // we collect the urlhashes and construct a list with urlEntry objects
         // attention: if minEntries is too high, this method will not terminate within the maxTime
@@ -422,10 +426,6 @@ public final class RWIProcess extends Thread
 
         } catch ( final InterruptedException e ) {
         } catch ( final RowSpaceExceededException e ) {
-        } finally {
-            if ( finalizeAddAtEnd ) {
-                this.addRunning = false;
-            }
         }
 
         //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java
index e2d4b24e9..d381d7d9e 100644
--- a/source/net/yacy/search/query/SnippetProcess.java
+++ b/source/net/yacy/search/query/SnippetProcess.java
@@ -503,7 +503,7 @@ public class SnippetProcess {
                             sd = sdl.get(0);
                         }
                         if (sd != null) {
-                            solrContent = Switchboard.getSwitchboard().solrScheme.solrGetText(sd);
+                            solrContent = Switchboard.getSwitchboard().index.getSolrScheme().solrGetText(sd);
                         }
                     }
 
diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java
index 62564b8b4..d10281b63 100644
--- a/source/net/yacy/yacy.java
+++ b/source/net/yacy/yacy.java
@@ -666,7 +666,7 @@ public final class yacy {
             final int cacheMem = (int)(MemoryControl.maxMemory() - MemoryControl.total());
             if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
 
-            final Segment wordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"));
+            final Segment wordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"), null);
             wordIndex.connectRWI(10000, Integer.MAX_VALUE);
             wordIndex.connectUrlDb(false, false);
             final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().referenceContainerIterator("AAAAAAAAAAAA".getBytes(), false, false);
@@ -845,7 +845,7 @@ public final class yacy {
         try {
             Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
             if (resource.equals("all")) {
-                WordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"));
+                WordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"), null);
                 WordIndex.connectRWI(10000, Integer.MAX_VALUE);
                 WordIndex.connectUrlDb(false, false);
                 indexContainerIterator = WordIndex.termIndex().referenceContainerIterator(wordChunkStartHash.getBytes(), false, false);