From 8682dfbd5ea8445c6bcfbb32b4f15c17fa62853a Mon Sep 17 00:00:00 2001 From: luc Date: Wed, 10 Feb 2016 09:02:21 +0100 Subject: [PATCH] Updated getpageinfo outputs to return page icons list. --- htroot/api/getpageinfo.java | 28 +++++++++++++++------------- htroot/api/getpageinfo.json | 4 +++- htroot/api/getpageinfo.xml | 6 +++++- htroot/api/getpageinfo_p.java | 19 +++++++++++-------- htroot/api/getpageinfo_p.xml | 6 +++++- 5 files changed, 39 insertions(+), 24 deletions(-) diff --git a/htroot/api/getpageinfo.java b/htroot/api/getpageinfo.java index ca926022e..f6e2941b8 100644 --- a/htroot/api/getpageinfo.java +++ b/htroot/api/getpageinfo.java @@ -35,11 +35,6 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import org.w3c.dom.Document; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; - import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.yacy.CacheStrategy; @@ -52,6 +47,11 @@ import net.yacy.search.Switchboard; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + public class getpageinfo { @@ -65,7 +65,7 @@ public class getpageinfo { prop.put("lang", ""); prop.put("robots-allowed", "3"); //unknown prop.put("robotsInfo", ""); //unknown - prop.put("favicon",""); + prop.put("icons","0"); prop.put("sitelist", ""); prop.put("filter", ".*"); prop.put("oai", 0); @@ -110,13 +110,15 @@ public class getpageinfo { // put the document title prop.putXML("title", removelinebreaks(scraper.dc_title())); - DigestURL favicon = null; - if (scraper.getIcons() != null && !scraper.getIcons().isEmpty()) { - favicon = scraper.getIcons().keySet().iterator().next(); - } - - // put the favicon that belongs to the document - prop.put("favicon", (favicon == null) ? "" : favicon.toString()); + Set iconURLs = scraper.getIcons().keySet(); + int i = 0; + for (DigestURL iconURL : iconURLs) { + prop.putXML("icons_" + i + "_icon", iconURL.toNormalform(false)); + prop.put("icons_" + i + "_eol", 1); + i++; + } + prop.put("icons_" + (i - 1) + "_eol", 0); + prop.put("icons", iconURLs.size()); // put keywords final Set list = scraper.dc_subject(); diff --git a/htroot/api/getpageinfo.json b/htroot/api/getpageinfo.json index cfc70dc5f..abb138222 100644 --- a/htroot/api/getpageinfo.json +++ b/htroot/api/getpageinfo.json @@ -6,7 +6,9 @@ "robots": "#(robots-allowed)#0::1::#(/robots-allowed)#", "robotsInfo": "#[robotsInfo]#", - "favicon": "#[favicon]#", + "icons": [#{icons}# + "#[icon]#"#(eol)#::,#(/eol)# + #{/icons}#], "filter": "#[filter]#", "tags": "#{tags}##[tag]#,#{/tags}#", diff --git a/htroot/api/getpageinfo.xml b/htroot/api/getpageinfo.xml index 0758b2333..9ccd8e3d9 100644 --- a/htroot/api/getpageinfo.xml +++ b/htroot/api/getpageinfo.xml @@ -8,7 +8,11 @@ #{sitemaps}# #[sitemap]# #{/sitemaps}# - #[favicon]# + + #{icons}# + #[icon]# + #{/icons}# + #[sitelist]# #[filter]# diff --git a/htroot/api/getpageinfo_p.java b/htroot/api/getpageinfo_p.java index 15bf2ae87..f0286c027 100644 --- a/htroot/api/getpageinfo_p.java +++ b/htroot/api/getpageinfo_p.java @@ -66,7 +66,7 @@ public class getpageinfo_p { prop.put("robots-allowed", "3"); //unknown prop.put("robotsInfo", ""); //unknown prop.put("sitemap", ""); - prop.put("favicon",""); + prop.put("icons","0"); prop.put("sitelist", ""); prop.put("filter", ".*"); prop.put("oai", 0); @@ -109,14 +109,17 @@ public class getpageinfo_p { if (scraper != null) { // put the document title prop.putXML("title", scraper.dc_title()); - - DigestURL favicon = null; - if (scraper.getIcons() != null && !scraper.getIcons().isEmpty()) { - favicon = scraper.getIcons().keySet().iterator().next(); - } - // put the favicon that belongs to the document - prop.put("favicon", (favicon == null) ? "" : favicon.toString()); + // put the icons that belongs to the document + Set iconURLs = scraper.getIcons().keySet(); + int i = 0; + for (DigestURL iconURL : iconURLs) { + prop.putXML("icons_" + i + "_icon", iconURL.toNormalform(false)); + prop.put("icons_" + i + "_eol", 1); + i++; + } + prop.put("icons_" + (i - 1) + "_eol", 0); + prop.put("icons", iconURLs.size()); // put keywords final Set list = scraper.dc_subject(); diff --git a/htroot/api/getpageinfo_p.xml b/htroot/api/getpageinfo_p.xml index 0758b2333..9ccd8e3d9 100644 --- a/htroot/api/getpageinfo_p.xml +++ b/htroot/api/getpageinfo_p.xml @@ -8,7 +8,11 @@ #{sitemaps}# #[sitemap]# #{/sitemaps}# - #[favicon]# + + #{icons}# + #[icon]# + #{/icons}# + #[sitelist]# #[filter]#