diff --git a/htroot/gsa/searchresult.java b/htroot/gsa/searchresult.java index e36c123f1..4528ad6ad 100644 --- a/htroot/gsa/searchresult.java +++ b/htroot/gsa/searchresult.java @@ -112,7 +112,7 @@ public class searchresult { // get a solr query string QueryGoal qg = new QueryGoal(originalQuery, originalQuery); - StringBuilder solrQ = qg.collectionQueryString(sb.index.fulltext().getDefaultConfiguration(), 0); + StringBuilder solrQ = qg.collectionTextQueryString(sb.index.fulltext().getDefaultConfiguration(), 0); post.put("defType", "edismax"); post.put(CommonParams.Q, solrQ.toString()); post.put(CommonParams.ROWS, post.remove("num")); diff --git a/htroot/solr/select.java b/htroot/solr/select.java index f5af3bff1..311bec918 100644 --- a/htroot/solr/select.java +++ b/htroot/solr/select.java @@ -168,7 +168,7 @@ public class select { querystring = modifier.parse(querystring); modifier.apply(post); QueryGoal qg = new QueryGoal(querystring, querystring); - StringBuilder solrQ = qg.collectionQueryString(sb.index.fulltext().getDefaultConfiguration(), profileNr); + StringBuilder solrQ = qg.collectionTextQueryString(sb.index.fulltext().getDefaultConfiguration(), profileNr); post.put(CommonParams.Q, solrQ.toString()); // sru patch } String q = post.get(CommonParams.Q, ""); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 8cd24a7d1..80c2f6628 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -277,32 +277,32 @@ public class yacysearchitem { // image search; shows thumbnails prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content - //final MediaSnippet ms = theSearch.result().oneImage(item); - final ResultEntry ms = theSearch.oneResult(item, timeout); - if (ms == null) { - prop.put("content_item", "0"); - } else { - final String resultUrlstring = ms.url().toNormalform(true); - final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self"); + SearchEvent.ImageResult image = null; + try { + image = theSearch.oneImageResult(item, timeout); + final String imageUrlstring = image.imageUrl.toNormalform(true); + final String target = sb.getConfig(imageUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self"); - final String license = URLLicense.aquireLicense(ms.url()); - sb.loader.loadIfNotExistBackground(ms.url(), 1024 * 1024 * 10, null, ClientIdentification.yacyIntranetCrawlerAgent); - prop.putHTML("content_item_hrefCache", (auth) ? "/ViewImage.png?url=" + resultUrlstring : resultUrlstring); - prop.putHTML("content_item_href", resultUrlstring); + final String license = URLLicense.aquireLicense(image.imageUrl); + sb.loader.loadIfNotExistBackground(image.imageUrl, 1024 * 1024 * 10, null, ClientIdentification.yacyIntranetCrawlerAgent); + prop.putHTML("content_item_hrefCache", (auth) ? "/ViewImage.png?url=" + imageUrlstring : imageUrlstring); + prop.putHTML("content_item_href", imageUrlstring); prop.putHTML("content_item_target", target); prop.put("content_item_code", license); - prop.putHTML("content_item_name", shorten(ms.title(), MAX_NAME_LENGTH)); - prop.put("content_item_mimetype", ""); + prop.putHTML("content_item_name", shorten(image.imagetext, MAX_NAME_LENGTH)); + prop.put("content_item_mimetype", image.mimetype); prop.put("content_item_fileSize", 0); - prop.put("content_item_width", 0); - prop.put("content_item_height", 0); + prop.put("content_item_width", image.width); + prop.put("content_item_height", image.height); prop.put("content_item_attr", ""/*(ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"*/); // attributes, here: original size of image - prop.put("content_item_urlhash", ASCII.String(ms.url().hash())); - prop.put("content_item_source", ms.url().toNormalform(true)); - prop.putXML("content_item_source-xml", ms.url().toNormalform(true)); - prop.put("content_item_sourcedom", ms.url().getHost()); + prop.put("content_item_urlhash", ASCII.String(image.imageUrl.hash())); + prop.put("content_item_source", image.sourceUrl.toNormalform(true)); + prop.putXML("content_item_source-xml", image.sourceUrl.toNormalform(true)); + prop.put("content_item_sourcedom", image.sourceUrl.getHost()); prop.put("content_item_nl", (item == theSearch.query.offset) ? 0 : 1); prop.put("content_item", 1); + } catch (MalformedURLException e) { + prop.put("content_item", "0"); } theSearch.query.transmitcount = item + 1; return prop; diff --git a/source/net/yacy/kelondro/util/SetTools.java b/source/net/yacy/kelondro/util/SetTools.java index 604f4a8ce..9cd054b42 100644 --- a/source/net/yacy/kelondro/util/SetTools.java +++ b/source/net/yacy/kelondro/util/SetTools.java @@ -559,6 +559,13 @@ public final class SetTools { return sb.toString(); } + public static Object nth(Collection c, int n) { + if (c == null || c.size() <= n) return null; + int i = 0; + for (Object o: c) if (i++ == n) return o; + return null; + } + // ------------------------------------------------------------------------------------------------ diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 63e701b23..fe11e4512 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -69,6 +69,7 @@ import net.yacy.cora.document.RSSFeed; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.RSSReader; import net.yacy.cora.document.UTF8; +import net.yacy.cora.document.analysis.Classification; import net.yacy.cora.federate.opensearch.SRURSSConnector; import net.yacy.cora.federate.solr.connector.RemoteSolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector; @@ -1017,14 +1018,17 @@ public final class Protocol { solrQuery.setRows(count); // set highlighting query attributes - solrQuery.setHighlight(true); - solrQuery.setHighlightFragsize(SearchEvent.SNIPPET_MAX_LENGTH); - //solrQuery.setHighlightRequireFieldMatch(); - solrQuery.setHighlightSimplePost(""); - solrQuery.setHighlightSimplePre(""); - solrQuery.setHighlightSnippets(1); - for (CollectionSchema field: snippetFields) solrQuery.addHighlightField(field.getSolrFieldName()); - + if (event.query.contentdom == Classification.ContentDomain.TEXT || event.query.contentdom == Classification.ContentDomain.ALL) { + solrQuery.setHighlight(true); + solrQuery.setHighlightFragsize(SearchEvent.SNIPPET_MAX_LENGTH); + //solrQuery.setHighlightRequireFieldMatch(); + solrQuery.setHighlightSimplePost(""); + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSnippets(1); + for (CollectionSchema field: snippetFields) solrQuery.addHighlightField(field.getSolrFieldName()); + } else { + solrQuery.setHighlight(false); + } boolean localsearch = target == null || target.equals(event.peers.mySeed()); if (localsearch && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_TESTLOCAL, false)) { target = event.peers.mySeed(); diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 8233cfc58..600d63474 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -172,7 +172,7 @@ public class RemoteSearch extends Thread { nodePeers.add(event.peers.mySeed()); } if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_OFF, false)) { - final SolrQuery solrQuery = event.query.solrQuery(start == 0); + final SolrQuery solrQuery = event.query.solrQuery(event.getQuery().contentdom, start == 0); for (Seed s: nodePeers) { Thread t = solrRemoteSearch(event, solrQuery, start, count, s, blacklist); event.nodeSearchThreads.add(t); diff --git a/source/net/yacy/peers/SeedDB.java b/source/net/yacy/peers/SeedDB.java index 9b61b2abf..06d7e9f07 100644 --- a/source/net/yacy/peers/SeedDB.java +++ b/source/net/yacy/peers/SeedDB.java @@ -32,10 +32,8 @@ import java.io.PrintWriter; import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; -import java.util.HashSet; import java.util.Iterator; import java.util.Map; -import java.util.Set; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index 4a72f110e..4f5c06921 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -179,6 +179,14 @@ public class QueryGoal { return exclude_strings; } + public boolean matches(String text) { + if (text == null || text.length() == 0) return false; + String t = text.toLowerCase(); + for (String i: this.include_strings) if (t.indexOf(i.toLowerCase()) < 0) return false; + for (String e: this.exclude_strings) if (t.indexOf(e.toLowerCase()) >= 0) return false; + return true; + } + public ArrayList getAllStrings() { return all_strings; } @@ -189,30 +197,22 @@ public class QueryGoal { final HandleSet blues = Word.words2hashesHandles(blueList); for (final byte[] b: blues) this.include_hashes.remove(b); } - - public StringBuilder collectionQueryString(CollectionConfiguration configuration, int rankingProfile) { + + public StringBuilder collectionTextQueryString(CollectionConfiguration configuration, int rankingProfile) { final StringBuilder q = new StringBuilder(80); + // add filter to prevent that results come from failed urls + q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND ("); + // parse special requests if (include_strings.size() == 1 && exclude_strings.size() == 0) { String w = include_strings.get(0); if (Segment.catchallString.equals(w)) return new StringBuilder(AbstractSolrConnector.CATCHALL_TERM); } - // add text query + // add goal query int wc = 0; - StringBuilder w = new StringBuilder(80); - for (String s: include_strings) { - if (wc > 0) w.append(" AND "); - w.append(dq).append(s).append(dq); - wc++; - } - for (String s: exclude_strings){ - if (wc > 0) w.append(" AND -"); - w.append(dq).append(s).append(dq); - wc++; - } - if (wc > 1) {w.insert(0, '('); w.append(')');} + StringBuilder w = getGoalQuery(); // combine these queries for all relevant fields wc = 0; @@ -231,14 +231,52 @@ public class QueryGoal { q.append(')'); wc++; } - q.insert(0, '('); q.append(')'); + return q; + } + + public StringBuilder collectionImageQueryString() { + final StringBuilder q = new StringBuilder(80); + // add filter to prevent that results come from failed urls - q.append(" AND ").append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200"); - //q.append(" AND -").append(YaCySchema.failreason_s.getSolrFieldName()).append(":[* TO *]"); + q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND "); + q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *]").append(" AND ("); + + // parse special requests + if (include_strings.size() == 1 && exclude_strings.size() == 0) { + String w = include_strings.get(0); + if (Segment.catchallString.equals(w)) return new StringBuilder(AbstractSolrConnector.CATCHALL_TERM); + } + + // add goal query + StringBuilder w = getGoalQuery(); + + // combine these queries for all relevant fields + q.append('(').append(CollectionSchema.images_alt_txt.getSolrFieldName()).append(':').append(w).append("^20.0) OR "); + q.append('(').append(CollectionSchema.images_text_t.getSolrFieldName()).append(':').append(w).append("^10.0) OR "); + q.append('(').append(CollectionSchema.text_t.getSolrFieldName()).append(':').append(w).append(')'); + + q.append(')'); return q; } + + private StringBuilder getGoalQuery() { + int wc = 0; + StringBuilder w = new StringBuilder(80); + for (String s: include_strings) { + if (wc > 0) w.append(" AND "); + w.append(dq).append(s).append(dq); + wc++; + } + for (String s: exclude_strings){ + if (wc > 0) w.append(" AND -"); + w.append(dq).append(s).append(dq); + wc++; + } + if (wc > 1) {w.insert(0, '('); w.append(')');} + return w; + } } diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 609e79f76..c59ade8d7 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -250,7 +250,7 @@ public final class QueryParams { this.constraint = constraint; this.allofconstraint = allofconstraint; this.siteexcludes = siteexcludes != null && siteexcludes.isEmpty() ? null: siteexcludes; - this.snippetCacheStrategy = snippetCacheStrategy; + this.snippetCacheStrategy = contentdom == ContentDomain.TEXT ? snippetCacheStrategy : contentdom == null ? null : CacheStrategy.CACHEONLY; this.clienthost = host; this.remotepeer = null; this.starttime = Long.valueOf(System.currentTimeMillis()); @@ -376,26 +376,124 @@ public final class QueryParams { return SetTools.anymatch(wordhashes, keyhashes); } - public SolrQuery solrQuery(boolean getFacets) { + public SolrQuery solrQuery(ContentDomain cd, boolean getFacets) { + if (cd == ContentDomain.IMAGE) return solrImageQuery(getFacets); + return solrTextQuery(getFacets); + } + + private SolrQuery solrTextQuery(boolean getFacets) { if (this.cachedQuery != null) { this.cachedQuery.setStart(this.offset); return this.cachedQuery; } if (this.queryGoal.getIncludeStrings().size() == 0) return null; + // construct query - final SolrQuery params = new SolrQuery(); + final SolrQuery params = getBasicParams(getFacets); int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0; - params.setQuery(this.queryGoal.collectionQueryString(this.indexSegment.fulltext().getDefaultConfiguration(), rankingProfile).toString()); - params.setParam("defType", "edismax"); + params.setQuery(this.queryGoal.collectionTextQueryString(this.indexSegment.fulltext().getDefaultConfiguration(), rankingProfile).toString()); Ranking ranking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile String bq = ranking.getBoostQuery(); String bf = ranking.getBoostFunction(); if (bq.length() > 0) params.setParam("bq", bq); if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 + + /* + if (this.contentdom == ContentDomain.IMAGE) { + fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\")"); + } + + if (this.contentdom == ContentDomain.AUDIO) { + fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"aif\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"aiff\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mp3\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"ogg\")"); + } + + if (this.contentdom == ContentDomain.VIDEO) { + fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mpg\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"avi\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mp4\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mkv\")"); + } + + if (this.contentdom == ContentDomain.APP) { + fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"apk\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"exe\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"dmg\""); + fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"gz\")"); + } + */ + + // prepare result + ConcurrentLog.info("Protocol", "SOLR QUERY: " + params.toString()); + this.cachedQuery = params; + return params; + } + + private SolrQuery solrImageQuery(boolean getFacets) { + if (this.cachedQuery != null) { + this.cachedQuery.setStart(this.offset); + return this.cachedQuery; + } + if (this.queryGoal.getIncludeStrings().size() == 0) return null; + + // construct query + final SolrQuery params = getBasicParams(getFacets); + params.setQuery(this.queryGoal.collectionImageQueryString().toString()); + + // set boosts + StringBuilder bq = new StringBuilder(); + bq.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\""); + bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\""); + bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\""); + bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\""); + params.setParam("bq", bq.toString()); + + // prepare result + ConcurrentLog.info("Protocol", "SOLR QUERY: " + params.toString()); + this.cachedQuery = params; + return params; + } + + private SolrQuery getBasicParams(boolean getFacets) { + final SolrQuery params = new SolrQuery(); + params.setParam("defType", "edismax"); params.setStart(this.offset); params.setRows(this.itemsPerPage); params.setFacet(false); + + if (this.ranking.coeff_date == RankingProfile.COEFF_MAX) { + // set a most-recent ordering + params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc)); + //params.setSortField(CollectionSchema.last_modified.getSolrFieldName(), ORDER.desc); // deprecated in Solr 4.2 + } + + // add site facets + final String fq = getFacets(); + if (fq.length() > 0) { + params.setFilterQueries(fq); + } + + // set facet query attributes + if (getFacets && this.facetfields.size() > 0) { + params.setFacet(true); + params.setFacetLimit(this.maxfacets); + params.setFacetSort(FacetParams.FACET_SORT_COUNT); + params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fcs); + for (String field: this.facetfields) params.addFacetField(field); + } else { + params.setFacet(false); + } + params.setFields("*", "score"); // we need the score for post-ranking + return params; + } + + private String getFacets() { // add site facets final StringBuilder fq = new StringBuilder(); @@ -441,34 +539,6 @@ public final class QueryParams { fq.append(" AND ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"").append(this.modifier.filetype).append('\"'); } - if (this.contentdom == ContentDomain.IMAGE) { - fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\")"); - } - - if (this.contentdom == ContentDomain.AUDIO) { - fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"aif\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"aiff\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mp3\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"ogg\")"); - } - - if (this.contentdom == ContentDomain.VIDEO) { - fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mpg\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"avi\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mp4\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"mkv\")"); - } - - if (this.contentdom == ContentDomain.APP) { - fq.append(" AND (").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"apk\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"exe\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"dmg\""); - fq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"gz\")"); - } - if (this.inlink != null) { fq.append(" AND ").append(CollectionSchema.outboundlinks_urlstub_sxt.getSolrFieldName()).append(":\"").append(this.inlink).append('\"'); } @@ -495,45 +565,15 @@ public final class QueryParams { //params.set("d", GeoLocation.degreeToKm(this.radius)); fq.append(" AND ").append("{!bbox sfield=" + CollectionSchema.coordinate_p.getSolrFieldName() + " pt=" + Double.toString(this.lat) + "," + Double.toString(this.lon) + " d=" + GeoLocation.degreeToKm(this.radius) + "}"); //params.setRows(Integer.MAX_VALUE); - } else { - // set ranking - if (this.ranking.coeff_date == RankingProfile.COEFF_MAX) { - // set a most-recent ordering - params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc)); - //params.setSortField(CollectionSchema.last_modified.getSolrFieldName(), ORDER.desc); // deprecated in Solr 4.2 - } } if (this.modifier.collection != null && this.modifier.collection.length() > 0) { fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.modifier.collection)); } - if (fq.length() > 0) { - params.setFilterQueries(fq.substring(5)); - } - - params.setStart(offset); - params.setRows(itemsPerPage); - - // set facet query attributes - if (getFacets && this.facetfields.size() > 0) { - params.setFacet(true); - params.setFacetLimit(this.maxfacets); - params.setFacetSort(FacetParams.FACET_SORT_COUNT); - params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fcs); - for (String field: this.facetfields) params.addFacetField(field); - } else { - params.setFacet(false); - } - - params.setFields("*", "score"); // we need the score for post-ranking - - // prepare result - ConcurrentLog.info("Protocol", "SOLR QUERY: " + params.toString()); - this.cachedQuery = params; - return params; + return fq.length() > 0 ? fq.substring(5) : fq.toString(); } - + public QueryGoal getQueryGoal() { return this.queryGoal; } diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index f3d8cf98a..b1961d769 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -26,11 +26,13 @@ package net.yacy.search.query; +import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.SortedMap; @@ -66,6 +68,7 @@ import net.yacy.document.Condenser; import net.yacy.document.LargeNumberCache; import net.yacy.document.LibraryProvider; import net.yacy.document.TextParser; +import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; @@ -77,6 +80,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.TermSearch; import net.yacy.kelondro.util.Bitfield; import net.yacy.kelondro.util.MemoryControl; +import net.yacy.kelondro.util.SetTools; import net.yacy.peers.RemoteSearch; import net.yacy.peers.SeedDB; import net.yacy.peers.graphics.ProfilingGraph; @@ -278,7 +282,7 @@ public final class SearchEvent { // start a local solr search if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) { - this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(true), 0, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist); + this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, true), 0, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist); } this.localsolroffset = this.query.itemsPerPage; @@ -1322,7 +1326,6 @@ public final class SearchEvent { return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0); // result without snippet } - public ResultEntry oneResult(final int item, final long timeout) { // check if we already retrieved this item // (happens if a search pages is accessed a second time) @@ -1337,7 +1340,7 @@ public final class SearchEvent { int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded. if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}} if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) { - this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.localsolroffset == 0), this.localsolroffset, nextitems, null /*this peer*/, Switchboard.urlBlacklist); + this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, this.localsolroffset == 0), this.localsolroffset, nextitems, null /*this peer*/, Switchboard.urlBlacklist); } this.localsolroffset += nextitems; } @@ -1358,7 +1361,7 @@ public final class SearchEvent { if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) { // at the end of a list, trigger a next solr search if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) { - this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.localsolroffset == 0), this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist); + this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, this.localsolroffset == 0), this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist); } this.localsolroffset += this.query.itemsPerPage; } @@ -1370,6 +1373,81 @@ public final class SearchEvent { return null; } + private LinkedHashMap imageViewed = new LinkedHashMap(); + private LinkedHashMap imageSpare = new LinkedHashMap(); + private ImageResult nthImage(int item) { + Object o = SetTools.nth(this.imageViewed.values(), item); + if (o == null) return null; + return (ImageResult) o; + } + private ImageResult nextSpare() { + Map.Entry next = imageSpare.entrySet().iterator().next(); + imageViewed.put(next.getKey(), next.getValue()); + imageSpare.remove(next.getKey()); + return next.getValue(); + } + + public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException { + if (item < imageViewed.size()) return nthImage(item); + if (imageSpare.size() > 0) return nextSpare(); + + ResultEntry ms = oneResult(item, timeout); + // check if the match was made in the url or in the image links + if (ms == null) throw new MalformedURLException("nUll"); + int height = 0, width = 0, fileSize = 0; + SolrDocument doc = ms.getNode().getDocument(); + Collection alt = doc.getFieldValues(CollectionSchema.images_alt_txt.getSolrFieldName()); + Collection img = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName()); + Collection prt = doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()); + if (img != null) { + int c = 0; + for (Object i: img) { + String a = alt != null && alt.size() > c ? (String) SetTools.nth(alt, c) : ""; + if (query.getQueryGoal().matches((String) i) || query.getQueryGoal().matches(a)) { + try { + DigestURI imageUrl = new DigestURI((prt != null && prt.size() > c ? SetTools.nth(prt, c) : "http") + "://" + i); + Object heightO = SetTools.nth(doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName()), c); + Object widthO = SetTools.nth(doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName()), c); + if (heightO != null) height = (Integer) heightO; + if (widthO != null) width = (Integer) widthO; + String id = ASCII.String(imageUrl.hash()); + if (!imageViewed.containsKey(id) && !imageSpare.containsKey(id)) imageSpare.put(id, new ImageResult(ms.url(), imageUrl, "", a, width, height, fileSize)); + } catch (MalformedURLException e) { + continue; + } + } + c++; + } + } + if (MultiProtocolURI.isImage(MultiProtocolURI.getFileExtension(ms.url().getFileName()))) { + String id = ASCII.String(ms.hash()); + if (!imageViewed.containsKey(id) && !imageSpare.containsKey(id)) imageSpare.put(id, new ImageResult(ms.url(), ms.url(), "", ms.title(), width, height, fileSize)); + } + if (img != null && img.size() > 0) { + DigestURI imageUrl = new DigestURI((prt != null && prt.size() > 0 ? SetTools.nth(prt, 0) : "http") + "://" + SetTools.nth(img, 0)); + String imagetext = alt != null && alt.size() > 0 ? (String) SetTools.nth(alt, 0) : ""; + String id = ASCII.String(imageUrl.hash()); + if (!imageViewed.containsKey(id) && !imageSpare.containsKey(id)) imageSpare.put(id, new ImageResult(ms.url(), imageUrl, "", imagetext, width, height, fileSize)); + } + if (imageSpare.size() > 0) return nextSpare(); + throw new MalformedURLException("no image url found"); + } + + public class ImageResult { + public DigestURI imageUrl, sourceUrl; + public String mimetype = "", imagetext = ""; + public int width = 0, height = 0, fileSize = 0; + public ImageResult(DigestURI sourceUrl, DigestURI imageUrl, String mimetype, String imagetext, int width, int height, int fileSize) { + this.sourceUrl = sourceUrl; + this.imageUrl = imageUrl; + this.mimetype = mimetype; + this.imagetext = imagetext; + this.width = width; + this.height = height; + this.fileSize = fileSize; + } + } + public ArrayList> completeResults(final long waitingtime) { final long timeout = waitingtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + waitingtime; int i = 0; diff --git a/source/net/yacy/search/snippet/ResultEntry.java b/source/net/yacy/search/snippet/ResultEntry.java index 8f0c1a944..fe8099416 100644 --- a/source/net/yacy/search/snippet/ResultEntry.java +++ b/source/net/yacy/search/snippet/ResultEntry.java @@ -122,6 +122,9 @@ public class ResultEntry implements Comparable, Comparator