diff --git a/defaults/yacy.init b/defaults/yacy.init index ac0466a7a..980f8147e 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1056,7 +1056,7 @@ color_searchurlhover = #008000 # - to check whats in solr after indexing, open http://localhost:8983/solr/admin/ federated.service.solr.indexing.enabled = false federated.service.solr.indexing.url = http://127.0.0.1:8983/solr -federated.service.solr.indexing.commitWithinMs = 180000 +federated.service.solr.indexing.commitWithinMs = -1 federated.service.solr.indexing.sharding = MODULO_HOST_MD5 federated.service.solr.indexing.schemefile = solr.keys.default.list # the lazy attribute causes that fields containing "" or 0 are not added and not written diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 44063e4d7..389860108 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -371,7 +371,7 @@ public class Crawler_p { try { sb.crawlQueues.errorURL.removeHost(ASCII.getBytes(hosthash)); sb.index.fulltext().getSolr().deleteByQuery(YaCySchema.host_id_s.getSolrFieldName() + ":\"" + hosthash + "\" AND " + YaCySchema.failreason_t.getSolrFieldName() + ":[* TO *]"); - sb.index.fulltext().commit(); + sb.index.fulltext().commit(true); } catch (IOException e) {Log.logException(e);} } diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java index 550494fa8..d6b73f081 100644 --- a/htroot/HostBrowser.java +++ b/htroot/HostBrowser.java @@ -96,7 +96,7 @@ public class HostBrowser { } String path = post == null ? "" : post.get("path", "").trim(); - if (admin && path.length() == 0) sb.index.fulltext().commit(); + sb.index.fulltext().commit(true); if (post == null || env == null) { return prop; } diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java index a0a585a8e..333846b4d 100644 --- a/htroot/IndexFederated_p.java +++ b/htroot/IndexFederated_p.java @@ -77,7 +77,7 @@ public class IndexFederated_p { final boolean previous_core_fulltext = sb.index.fulltext().connectedLocalSolr() && env.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, false); env.setConfig(SwitchboardConstants.CORE_SERVICE_FULLTEXT, post_core_fulltext); - final int commitWithinMs = post.getInt("solr.indexing.commitWithinMs", env.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000)); + final int commitWithinMs = post.getInt("solr.indexing.commitWithinMs", env.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, -1)); if (previous_core_fulltext && !post_core_fulltext) { // switch off sb.index.fulltext().disconnectLocalSolr(); diff --git a/htroot/index.java b/htroot/index.java index 845b8e78c..d4f3ec44a 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -61,10 +61,6 @@ public class index { return prop; } } - - if (authorizedAccess) { - sb.index.fulltext().commit(); // call this only as superuser to prevent that this can be misused for DoS - } boolean global = (post == null) ? true : post.get("resource", "global").equals("global"); final boolean focus = (post == null) ? true : post.get("focus", "1").equals("1"); diff --git a/htroot/yacyinteractive.java b/htroot/yacyinteractive.java index 27b794fd8..8a050dcd2 100644 --- a/htroot/yacyinteractive.java +++ b/htroot/yacyinteractive.java @@ -47,11 +47,6 @@ public class yacyinteractive { prop.put("promoteSearchPageGreeting.homepage", sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, "")); prop.put("promoteSearchPageGreeting.smallImage", sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, "")); - final boolean admin = sb.verifyAuthentication(header); - if (admin) { - sb.index.fulltext().commit(); - } - final String query = (post == null) ? "" : post.get("query", ""); final String startRecord = (post == null) ? "0" : post.get("startRecord", ""); final String maximumRecords = (post == null) ? "10" : post.get("maximumRecords", ""); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index e5395e6bd..f7f42da98 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -117,7 +117,6 @@ public class yacysearch { final String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim(); String querystring = originalquerystring.replace('+', ' ').trim(); CacheStrategy snippetFetchStrategy = (post == null) ? null : CacheStrategy.parse(post.get("verify", sb.getConfig("search.verify", ""))); - if (authenticated && originalquerystring.length() == 0) sb.index.fulltext().commit(); final servletProperties prop = new servletProperties(); prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0); diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index 7293a852f..6a204f4c5 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -146,7 +146,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo @Override public synchronized void close() { - try {this.commit();} catch (Throwable e) {Log.logException(e);} + try {this.commit(false);} catch (Throwable e) {Log.logException(e);} try {super.close();} catch (Throwable e) {Log.logException(e);} try {this.defaultCore.close();} catch (Throwable e) {Log.logException(e);} try {this.cores.shutdown();} catch (Throwable e) {Log.logException(e);} @@ -213,7 +213,6 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo storage.mkdirs(); try { EmbeddedSolrConnector solr = new EmbeddedSolrConnector(storage, solr_config); - solr.setCommitWithinMs(100); SolrInputDocument doc = new SolrInputDocument(); doc.addField(YaCySchema.id.name(), "ABCD0000abcd"); doc.addField(YaCySchema.title.name(), "Lorem ipsum"); diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index e38a8bb3c..c5b95db64 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -171,9 +171,18 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo } @Override - public void commit() { - if (this.solr0 != null) this.solr0.commit(); - if (this.solr1 != null) this.solr1.commit(); + public void commit(boolean softCommit) { + if (this.solr0 != null) this.solr0.commit(softCommit); + if (this.solr1 != null) this.solr1.commit(softCommit); + } + + /** + * force an explicit merge of segments + * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization + */ + public void optimize(int maxSegments) { + if (this.solr0 != null) this.solr0.optimize(maxSegments); + if (this.solr1 != null) this.solr1.optimize(maxSegments); } @Override @@ -320,7 +329,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo // check if there is a autocommit problem if (c.hitCache.containsKey(key)) { // the document should be there, therefore make a commit and check again - this.commit(); + this.commit(true); if ((solr0 != null && ((doc = solr0.getById(key, fields)) != null)) || (solr1 != null && ((doc = solr1.getById(key, fields)) != null))) { addToCache(doc, fields.length == 0); return doc; diff --git a/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java index 2ee21794a..d34518901 100644 --- a/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java @@ -47,7 +47,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr this.solr = new RemoteSolrConnector(url); this.queue = new ArrayBlockingQueue(1000); this.worker = new AddWorker[connections]; - this.commitWithinMs = 180000; + this.commitWithinMs = -1; for (int i = 0; i < connections; i++) { this.worker[i] = new AddWorker(url); this.worker[i].start(); @@ -58,7 +58,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr private final SolrConnector solr; public AddWorker(final String url) throws IOException { this.solr = new RemoteSolrConnector(url); - this.solr.setCommitWithinMs(MultipleSolrConnector.this.commitWithinMs); + if (MultipleSolrConnector.this.commitWithinMs >= 0 ) this.solr.setCommitWithinMs(MultipleSolrConnector.this.commitWithinMs); } @Override public void run() { @@ -97,8 +97,16 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr } @Override - public void commit() { - this.solr.commit(); + public void commit(boolean softCommit) { + this.solr.commit(softCommit); + } + + /** + * force an explicit merge of segments + * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization + */ + public void optimize(int maxSegments) { + this.solr.optimize(maxSegments); } @Override diff --git a/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java b/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java index 6016e1f8d..f9de62b97 100644 --- a/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/RetrySolrConnector.java @@ -59,8 +59,16 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon } @Override - public void commit() { - this.solrConnector.commit(); + public void commit(boolean softCommit) { + this.solrConnector.commit(softCommit); + } + + /** + * force an explicit merge of segments + * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization + */ + public void optimize(int maxSegments) { + this.solrConnector.optimize(maxSegments); } @Override diff --git a/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java index 1b7c5089f..386a1c34b 100644 --- a/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ShardSolrConnector.java @@ -73,8 +73,16 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon } @Override - public void commit() { - for (final SolrConnector connector: this.connectors) connector.commit(); + public void commit(boolean softCommit) { + for (final SolrConnector connector: this.connectors) connector.commit(softCommit); + } + + /** + * force an explicit merge of segments + * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization + */ + public void optimize(int maxSegments) { + for (final SolrConnector connector: this.connectors) connector.optimize(maxSegments); } @Override diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java index 8a7ae6ba6..b05094939 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java @@ -51,7 +51,13 @@ public interface SolrConnector extends Iterable /* Iterable of document /** * force a commit */ - public void commit(); + public void commit(boolean softCommit); + + /** + * force an explicit merge of segments + * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization + */ + public void optimize(int maxSegments); /** * close the server connection diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java index 74bcfb241..a0f65a5df 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java @@ -91,6 +91,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen /** * set the solr autocommit delay + * when doing continuous inserts, don't set this value because it would cause continuous commits * @param c the maximum waiting time after a solr command until it is transported to the server */ @Override @@ -99,9 +100,21 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen } @Override - public synchronized void commit() { + public synchronized void commit(final boolean softCommit) { try { - this.server.commit(); + this.server.commit(true, true, softCommit); + } catch (SolrServerException e) { + } catch (IOException e) { + } + } + + /** + * force an explicit merge of segments + * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization + */ + public void optimize(int maxSegments) { + try { + this.server.optimize(true, true, maxSegments); } catch (SolrServerException e) { } catch (IOException e) { } @@ -110,7 +123,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen @Override public synchronized void close() { try { - if (this.server != null) synchronized (this.server) {this.server.commit();} + if (this.server != null) synchronized (this.server) {this.server.commit(true, true, false);} this.server = null; } catch (SolrServerException e) { log.warn(e); @@ -194,7 +207,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen try { synchronized (this.server) { this.server.deleteByQuery("*:*"); - this.server.commit(); + this.server.commit(true, true, false); } } catch (final Throwable e) { throw new IOException(e); @@ -234,7 +247,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen synchronized (this.server) { long c0 = this.getQueryCount(querystring); this.server.deleteByQuery(querystring, this.commitWithinMs); - this.commit(); + this.commit(true); long c1 = this.getQueryCount(querystring); return (int) (c1 - c0); } @@ -254,7 +267,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen try { synchronized (this.server) { this.server.request(up); - //this.server.commit(); } } catch (final Throwable e) { throw new IOException(e); @@ -273,7 +285,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen // catches "version conflict for": try this again and delete the document in advance try { this.server.deleteById((String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName())); - //this.server.commit(); } catch (SolrServerException e1) {} try { synchronized (this.server) { diff --git a/source/net/yacy/migration.java b/source/net/yacy/migration.java index f7c446356..e7333246e 100644 --- a/source/net/yacy/migration.java +++ b/source/net/yacy/migration.java @@ -319,7 +319,7 @@ public class migration { } Log.logInfo("migrateUrldbtoSolr", Integer.toString(i) + " entries left (convert next chunk of 1000 entries)"); } - ft.commit(); + ft.commit(true); } catch (IOException ex) { Log.logInfo("migrateUrldbtoSolr", "error reading old urldb index"); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 56940be57..1f78ba0d2 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -431,7 +431,7 @@ public final class Switchboard extends serverSwitch { ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0); final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS"); this.index = new Segment(this.log, new File(segmentsPath, "default"), solrScheme); - final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000); + final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, -1); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) { @@ -1288,7 +1288,7 @@ public final class Switchboard extends serverSwitch { this.useTailCache, this.exceed134217727); this.index = new Segment(this.log, new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"), solrScheme); - final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000); + final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, -1); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) { @@ -1306,7 +1306,7 @@ public final class Switchboard extends serverSwitch { solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true); - solr.setCommitWithinMs(connectWithinMs); + if (connectWithinMs >= 0) solr.setCommitWithinMs(connectWithinMs); this.index.fulltext().connectRemoteSolr(solr); } catch ( final IOException e ) { Log.logException(e); @@ -2232,7 +2232,7 @@ public final class Switchboard extends serverSwitch { // execute the (post-) processing steps for all entries that have a process tag assigned if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrScheme().contains(YaCySchema.process_sxt)) { // that means we must search for those entries. - index.fulltext().getSolr().commit(); // make sure that we have latest information that can be found + index.fulltext().getSolr().commit(true); // make sure that we have latest information that can be found BlockingQueue docs = index.fulltext().getSolr().concurrentQuery(YaCySchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 1000, 60000, 10); SolrDocument doc; int proccount_clickdepth = 0; diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java index 518bf21a7..bdebf8c30 100644 --- a/source/net/yacy/search/index/DocumentIndex.java +++ b/source/net/yacy/search/index/DocumentIndex.java @@ -70,7 +70,7 @@ public class DocumentIndex extends Segment { false, // useTailCache false // exceed134217727 ); - super.fulltext().connectLocalSolr(1000); + super.fulltext().connectLocalSolr(-1); final int cores = Runtime.getRuntime().availableProcessors() + 1; this.callback = callback; this.queue = new LinkedBlockingQueue(cores * 300); diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 036269e1a..126536ed3 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -77,8 +77,6 @@ public final class Fulltext implements Iterable { private static final String SOLR_PATH = "solr_40"; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml private static final String SOLR_OLD_PATH[] = new String[]{"solr_36"}; - private static final long forcedCommitTimeout = 3000; // wait this time until a next forced commit is executed - // class objects private final File location; private Index urlIndexFile; @@ -87,7 +85,6 @@ public final class Fulltext implements Iterable { private ArrayList statsDump; private final MirrorSolrConnector solr; private final SolrConfiguration solrScheme; - private long forcedCommitTime; protected Fulltext(final File path, final SolrConfiguration solrScheme) { this.location = path; @@ -97,7 +94,6 @@ public final class Fulltext implements Iterable { this.statsDump = null; this.solr = new MirrorSolrConnector(10000, 10000, 100); this.solrScheme = solrScheme; - this.forcedCommitTime = 0; } /** @@ -158,7 +154,7 @@ public final class Fulltext implements Iterable { if (oldLocation.exists()) oldLocation.renameTo(solrLocation); } EmbeddedSolrConnector esc = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr")); - esc.setCommitWithinMs(commitWithin); + if (commitWithin >= 0) esc.setCommitWithinMs(commitWithin); Version luceneVersion = esc.getConfig().getLuceneVersion("luceneMatchVersion"); String lvn = luceneVersion.name(); int p = lvn.indexOf('_'); @@ -239,11 +235,8 @@ public final class Fulltext implements Iterable { return this.solr.getCommitWithinMs(); } - public void commit() { - if (this.forcedCommitTime + forcedCommitTimeout > System.currentTimeMillis()) return; - this.forcedCommitTime = Long.MAX_VALUE - forcedCommitTimeout; // set the time high to prevent that other processes get to this point meanwhile - this.solr.commit(); - this.forcedCommitTime = System.currentTimeMillis(); // set the exact time + public void commit(boolean softCommit) { + this.solr.commit(softCommit); } public Date getLoadDate(final String urlHash) { @@ -378,7 +371,7 @@ public final class Fulltext implements Iterable { synchronized (Fulltext.this.solr) { try { count.addAndGet(Fulltext.this.solr.deleteByQuery(q)); - if (count.get() > 0) Fulltext.this.solr.commit(); + if (count.get() > 0) Fulltext.this.solr.commit(true); } catch (IOException e) {} } @@ -444,7 +437,7 @@ public final class Fulltext implements Iterable { count.incrementAndGet(); } } - if (count.get() > 0) Fulltext.this.solr.commit(); + if (count.get() > 0) Fulltext.this.solr.commit(true); } catch (InterruptedException e) {} } }; @@ -466,7 +459,7 @@ public final class Fulltext implements Iterable { for (byte[] urlHash: deleteIDs) { Fulltext.this.solr.delete(ASCII.String(urlHash)); } - Fulltext.this.solr.commit(); + Fulltext.this.solr.commit(true); } } catch (final Throwable e) { Log.logException(e); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index fe30c713e..d9a0709b2 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -35,6 +35,9 @@ import java.util.Properties; import java.util.Set; import java.util.concurrent.BlockingQueue; +import org.apache.solr.client.solrj.util.ClientUtils; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import net.yacy.cora.document.ASCII; @@ -68,6 +71,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceFactory; import net.yacy.kelondro.util.Bitfield; import net.yacy.kelondro.util.ISO639; +import net.yacy.kelondro.util.MemoryControl; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; @@ -347,6 +351,21 @@ public class Segment { ) { final long startTime = System.currentTimeMillis(); + // DO A SOFT/HARD COMMIT IF NEEDED + if (MemoryControl.shortStatus()) { + // do a 'hard' commit to flush index caches + this.fulltext.getSolr().commit(false); + } else { + if ( + (this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_unique_b)) || + (this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_unique_b)) || + this.fulltext.getSolrScheme().contains(YaCySchema.title_unique_b) || + this.fulltext.getSolrScheme().contains(YaCySchema.description_unique_b) + ) { + this.fulltext.getSolr().commit(true); // make sure that we have latest information for the postprocessing steps + } + } + // CREATE INDEX // load some document metadata @@ -368,13 +387,13 @@ public class Segment { for (YaCySchema[] checkfields: new YaCySchema[][]{ {YaCySchema.exact_signature_l, YaCySchema.exact_signature_unique_b}, {YaCySchema.fuzzy_signature_l, YaCySchema.fuzzy_signature_unique_b}}) { - YaCySchema hashfield = checkfields[0]; + YaCySchema checkfield = checkfields[0]; YaCySchema uniquefield = checkfields[1]; - if (this.fulltext.getSolrScheme().contains(hashfield) && this.fulltext.getSolrScheme().contains(uniquefield)) { + if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) { // lookup the document with the same signature - long signature = ((Long) solrInputDoc.getField(hashfield.getSolrFieldName()).getValue()).longValue(); + long signature = ((Long) solrInputDoc.getField(checkfield.getSolrFieldName()).getValue()).longValue(); try { - if (this.fulltext.getSolr().exists(hashfield.getSolrFieldName(), Long.toString(signature))) { + if (this.fulltext.getSolr().exists(checkfield.getSolrFieldName(), Long.toString(signature))) { // change unique attribut in content solrInputDoc.setField(uniquefield.getSolrFieldName(), false); } @@ -382,13 +401,44 @@ public class Segment { } } + // CHECK IF TITLE AND DESCRIPTION IS UNIQUE (this is by default not switched on) + uniquecheck: for (YaCySchema[] checkfields: new YaCySchema[][]{ + {YaCySchema.title, YaCySchema.title_unique_b}, + {YaCySchema.description, YaCySchema.description_unique_b}}) { + YaCySchema checkfield = checkfields[0]; + YaCySchema uniquefield = checkfields[1]; + if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) { + // lookup in the index for the same title + String checkstring = checkfield == YaCySchema.title ? document.dc_title() : document.dc_description(); + if (checkstring.length() == 0) { + solrInputDoc.setField(uniquefield.getSolrFieldName(), false); + continue uniquecheck; + } + checkstring = ClientUtils.escapeQueryChars("\"" + checkstring + "\""); + try { + if (this.fulltext.getSolr().exists(checkfield.getSolrFieldName(), checkstring)) { + // switch unique attribute in new document + solrInputDoc.setField(uniquefield.getSolrFieldName(), false); + // switch attribute also in all existing documents (which should be exactly only one!) + SolrDocumentList docs = this.fulltext.getSolr().query(checkfield.getSolrFieldName() + ":" + checkstring + " AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000, YaCySchema.id.getSolrFieldName()); + for (SolrDocument doc: docs) { + SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc); + sid.setField(uniquefield.getSolrFieldName(), false); + this.fulltext.getSolr().add(sid); + } + } else { + solrInputDoc.setField(uniquefield.getSolrFieldName(), true); + } + } catch (IOException e) {} + } + } + // ENRICH DOCUMENT WITH RANKING INFORMATION if (this.urlCitationIndex != null && this.fulltext.getSolrScheme().contains(YaCySchema.references_i)) { int references = this.urlCitationIndex.count(url.hash()); if (references > 0) solrInputDoc.setField(YaCySchema.references_i.getSolrFieldName(), references); } - // STORE TO SOLR String error = null; tryloop: for (int i = 0; i < 20; i++) { @@ -399,7 +449,7 @@ public class Segment { } catch ( final IOException e ) { error = "failed to send " + urlNormalform + " to solr"; Log.logWarning("SOLR", error + e.getMessage()); - if (i == 10) this.fulltext.commit(); + if (i == 10) this.fulltext.commit(false); try {Thread.sleep(1000);} catch (InterruptedException e1) {} continue tryloop; } diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 90be0c28b..076da8d48 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -645,6 +645,10 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable inboundLinks.remove(canonical); outboundLinks.remove(canonical); add(doc, YaCySchema.canonical_t, canonical.toNormalform(false)); + // set a flag if this is equal to sku + if (contains(YaCySchema.canonical_equal_sku_b) && canonical.equals(docurl)) { + add(doc, YaCySchema.canonical_equal_sku_b, true); + } } } diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 896afe5e5..95b81235d 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -210,6 +210,9 @@ public final class SearchEvent { .getFlagAcceptRemoteIndex())); final long start = System.currentTimeMillis(); + // do a soft commit for fresh results + query.getSegment().fulltext().commit(true); + // prepare a local RWI search // initialize a ranking process that is the target for data // that is generated concurrently from local and global search threads