From 07222b3e1a25d0c8b882bbc345d85bd46bb45c84 Mon Sep 17 00:00:00 2001 From: luc Date: Fri, 5 Feb 2016 17:05:36 +0100 Subject: [PATCH] Added favicon url transmission in RWI chunks. --- htroot/yacysearchitem.java | 46 ++-------- .../kelondro/data/meta/URIMetadataNode.java | 88 ++++++++++++++++++- source/net/yacy/search/query/SearchEvent.java | 4 + .../data/meta/URIMetadataNodeTest.java | 66 ++++++++++++-- 4 files changed, 161 insertions(+), 43 deletions(-) diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 105162c67..cce332601 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -356,49 +356,21 @@ public class yacysearchitem { * We look preferably for a standard icon with preferred size, but * accept as a fallback other icons below 128x128 or with no known size */ - IconEntry faviconEntry = null; - boolean foundStandard = false; - double closestDistance = Double.MAX_VALUE; - for (IconEntry icon : result.getIcons()) { - boolean isStandard = icon.isStandardIcon(); - double distance = IconEntry.getDistance(icon.getClosestSize(preferredSize), preferredSize); - boolean match = false; - if (foundStandard) { - /* - * Already found a standard icon : now must find a standard icon - * with closer size - */ - match = isStandard && distance < closestDistance; - } else { - /* - * No standard icon yet found : prefer a standard icon, or check - * size - */ - match = isStandard || distance < closestDistance; - } - if (match) { - faviconEntry = icon; - closestDistance = distance; - foundStandard = isStandard; - if (isStandard && distance == 0.0) { - break; - } - } - } + IconEntry faviconEntry = result.getFavicon(preferredSize); DigestURL faviconURL; - try { - if (faviconEntry == null) { + if (faviconEntry == null) { + try { String defaultFaviconURL = result.url().getProtocol() + "://" + result.url().getHost() + ((result.url().getPort() != -1) ? (":" + result.url().getPort()) : "") + "/favicon.ico"; faviconURL = new DigestURL(defaultFaviconURL); - } else { - faviconURL = faviconEntry.getUrl(); + } catch (final MalformedURLException e1) { + ConcurrentLog.logException(e1); + faviconURL = null; } - - } catch (final MalformedURLException e1) { - ConcurrentLog.logException(e1); - faviconURL = null; + } else { + faviconURL = faviconEntry.getUrl(); } + return faviconURL; } diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index b97429d19..785ffe62a 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -57,6 +57,7 @@ import net.yacy.document.Tokenizer; import net.yacy.document.parser.pdfParser; import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.IconEntry; +import net.yacy.document.parser.html.IconLinkRelations; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceVars; @@ -96,6 +97,12 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable protocols = new ArrayList(1); + final List sizes = new ArrayList(1); + final List stubs = new ArrayList(1); + final List rels = new ArrayList(1); + + if (iconURL != null) { + String protocol = iconURL.getProtocol(); + protocols.add(protocol); + + sizes.add(""); + stubs.add(iconURL.toString().substring(protocol.length() + 3)); + rels.add(IconLinkRelations.ICON.getRelValue()); + } + + this.setField(CollectionSchema.icons_protocol_sxt.name(), protocols); + this.setField(CollectionSchema.icons_urlstub_sxt.name(), stubs); + this.setField(CollectionSchema.icons_rel_sxt.name(), rels); + this.setField(CollectionSchema.icons_sizes_sxt.name(), sizes); + } + /** * @param name field name * @return field values from field name eventually immutable empty list when field has no values or is not a List @@ -673,6 +752,13 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable 0; // pages with image extension from wikipedia do not contain image files but html files... I know this is a bad hack, but many results come from wikipedia and we must handle that // generalize above hack (regarding url with file extension but beeing a html (with html mime) if (doc.doctype() == Response.DT_IMAGE) { + /* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents, + * or documents coming from previous versions peers */ if (!doc.url().getFileName().endsWith(".ico")) { // we don't want favicons final String id = ASCII.String(doc.hash()); // check image size @@ -1657,6 +1659,8 @@ public final class SearchEvent { List width = widthO == null ? null : (List) widthO; for (int c = 0; c < img.size(); c++) { String image_urlstub = (String) img.get(c); + /* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents, + * or documents coming from previous versions peers */ if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic try { int h = height == null ? 0 : (Integer) height.get(c); diff --git a/test/java/net/yacy/kelondro/data/meta/URIMetadataNodeTest.java b/test/java/net/yacy/kelondro/data/meta/URIMetadataNodeTest.java index cf56030cd..7eadb6cba 100644 --- a/test/java/net/yacy/kelondro/data/meta/URIMetadataNodeTest.java +++ b/test/java/net/yacy/kelondro/data/meta/URIMetadataNodeTest.java @@ -65,8 +65,8 @@ public class URIMetadataNodeTest { Collection icons = metadataNode.getIcons(); int nb = 0; /* Check results consistency */ - for(IconEntry icon : icons) { - if("http://somehost.org/static/images/icon16.png".equals(icon.getUrl().toNormalform(false))) { + for (IconEntry icon : icons) { + if ("http://somehost.org/static/images/icon16.png".equals(icon.getUrl().toNormalform(false))) { Assert.assertEquals(1, icon.getSizes().size()); Dimension size = icon.getSizes().iterator().next(); Assert.assertEquals(16, size.width); @@ -74,7 +74,7 @@ public class URIMetadataNodeTest { Assert.assertEquals(1, icon.getRel().size()); Assert.assertEquals("icon", icon.getRel().iterator().next()); nb++; - } else if("https://somehost.org/static/images/icon32.png".equals(icon.getUrl().toNormalform(false))) { + } else if ("https://somehost.org/static/images/icon32.png".equals(icon.getUrl().toNormalform(false))) { Assert.assertEquals(1, icon.getSizes().size()); Dimension size = icon.getSizes().iterator().next(); Assert.assertEquals(32, size.width); @@ -82,7 +82,7 @@ public class URIMetadataNodeTest { Assert.assertEquals(1, icon.getRel().size()); Assert.assertEquals("icon", icon.getRel().iterator().next()); nb++; - } else if("https://somehost.org/static/images/icon64.png".equals(icon.getUrl().toNormalform(false))) { + } else if ("https://somehost.org/static/images/icon64.png".equals(icon.getUrl().toNormalform(false))) { Assert.assertEquals(1, icon.getSizes().size()); Dimension size = icon.getSizes().iterator().next(); Assert.assertEquals(58, size.width); @@ -90,7 +90,7 @@ public class URIMetadataNodeTest { Assert.assertEquals(1, icon.getRel().size()); Assert.assertEquals("icon", icon.getRel().iterator().next()); nb++; - } else if("http://somehost.org/static/images/iconApple128.png".equals(icon.getUrl().toNormalform(false))) { + } else if ("http://somehost.org/static/images/iconApple128.png".equals(icon.getUrl().toNormalform(false))) { Assert.assertEquals(1, icon.getSizes().size()); Dimension size = icon.getSizes().iterator().next(); Assert.assertEquals(128, size.width); @@ -154,4 +154,60 @@ public class URIMetadataNodeTest { Assert.assertEquals(0, icons.size()); } + /** + * Check encoding/decoding consistency + * + * @throws MalformedURLException + */ + @Test + public final void testEncodeDecode() throws MalformedURLException { + URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org")); + metadataNode + .setField(CollectionSchema.icons_urlstub_sxt.getSolrFieldName(), + new String[] { "somehost.org/static/images/icon16.png", "somehost.org/static/images/icon32.png", + "somehost.org/static/images/icon64.png", + "somehost.org/static/images/iconApple128.png" }); + List protocols = CollectionConfiguration + .protocolList2indexedList(Arrays.asList(new String[] { "http", "https", "https", "http" })); + metadataNode.setField(CollectionSchema.icons_protocol_sxt.getSolrFieldName(), protocols); + metadataNode.setField(CollectionSchema.icons_rel_sxt.getSolrFieldName(), + new String[] { "icon", "icon", "icon", "apple-touch-icon" }); + metadataNode.setField(CollectionSchema.icons_sizes_sxt.getSolrFieldName(), + new String[] { "16x24", "32x32", "58x64", "128x128" }); + + String encoded = metadataNode.toString(); + URIMetadataNode decoded = URIMetadataNode.importEntry(encoded, "dht"); + Collection icons = decoded.getIcons(); + + /* + * Only icon which is the closest to 16x16 pixels is encoded, and sizes + * and rel attribute are not encoded + */ + Assert.assertEquals(1, icons.size()); + IconEntry icon = icons.iterator().next(); + + Assert.assertEquals(0, icon.getSizes().size()); + + Assert.assertEquals("http://somehost.org/static/images/icon16.png", icon.getUrl().toNormalform(false)); + + Assert.assertEquals(1, icon.getRel().size()); + Assert.assertEquals("icon", icon.getRel().iterator().next()); + } + + /** + * Check encoding/decoding consistency when document has no indexed icon + * + * @throws MalformedURLException + */ + @Test + public final void testEncodeDecodeNoIcon() throws MalformedURLException { + URIMetadataNode metadataNode = new URIMetadataNode(new DigestURL("http://somehost.org")); + + String encoded = metadataNode.toString(); + URIMetadataNode decoded = URIMetadataNode.importEntry(encoded, "dht"); + Collection icons = decoded.getIcons(); + + Assert.assertEquals(0, icons.size()); + } + }