From 17b7c92009befed5ae4928d1c965f113e7cea0e5 Mon Sep 17 00:00:00 2001 From: luccioman Date: Tue, 17 Jan 2017 15:59:55 +0100 Subject: [PATCH] Made sure webstructure.xml API produces valid XML. Host names should not contain XML special characters such as quotation mark, but at this stage the WebGraph may have mistakenly recorded a host name with such characters. What's more the DigestURL constructor does not prevent this. By the way using serverObjects.putXML to encode host names we ensure here the rendered XML is well formed and can be parsed by external tools even if an structure entry is incorrect. --- htroot/api/webstructure.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/htroot/api/webstructure.java b/htroot/api/webstructure.java index cbf5367c8..fae5e46a2 100644 --- a/htroot/api/webstructure.java +++ b/htroot/api/webstructure.java @@ -263,7 +263,7 @@ public class webstructure { public static void reference(serverObjects prop, String prefix, int c, WebStructureGraph.StructureEntry sentry, WebStructureGraph ws) { prop.put(prefix + "_domains_" + c + "_hash", sentry.hosthash); - prop.put(prefix + "_domains_" + c + "_domain", sentry.hostname); + prop.putXML(prefix + "_domains_" + c + "_domain", sentry.hostname); prop.put(prefix + "_domains_" + c + "_date", sentry.date); Iterator> k = sentry.references.entrySet().iterator(); Map.Entry refentry; @@ -276,7 +276,7 @@ public class webstructure { refdom = ws.hostHash2hostName(refhash); if (refdom == null) continue refloop; prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refhash", refhash); - prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refdom", refdom); + prop.putXML(prefix + "_domains_" + c + "_citations_" + d + "_refdom", refdom); refcount = refentry.getValue(); prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refcount", refcount.intValue()); d++;