Permalink
Browse files

Made sure webstructure.xml API produces valid XML.

Host names should not contain XML special characters such as quotation
mark, but at this stage the WebGraph may have mistakenly recorded a host
name with such characters. What's more the DigestURL constructor does
not prevent this.
By the way using serverObjects.putXML to encode host names we ensure
here the rendered XML is well formed and can be parsed by external tools
even if an structure entry is incorrect.
  • Loading branch information...
luccioman committed Jan 17, 2017
1 parent d9766ca commit 17b7c92009befed5ae4928d1c965f113e7cea0e5
Showing with 2 additions and 2 deletions.
  1. +2 −2 htroot/api/webstructure.java
@@ -263,7 +263,7 @@ public static serverObjects respond(final RequestHeader header, final serverObje
public static void reference(serverObjects prop, String prefix, int c, WebStructureGraph.StructureEntry sentry, WebStructureGraph ws) {
prop.put(prefix + "_domains_" + c + "_hash", sentry.hosthash);
prop.put(prefix + "_domains_" + c + "_domain", sentry.hostname);
prop.putXML(prefix + "_domains_" + c + "_domain", sentry.hostname);
prop.put(prefix + "_domains_" + c + "_date", sentry.date);
Iterator<Map.Entry<String, Integer>> k = sentry.references.entrySet().iterator();
Map.Entry<String, Integer> refentry;
@@ -276,7 +276,7 @@ public static void reference(serverObjects prop, String prefix, int c, WebStruct
refdom = ws.hostHash2hostName(refhash);
if (refdom == null) continue refloop;
prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refhash", refhash);
prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refdom", refdom);
prop.putXML(prefix + "_domains_" + c + "_citations_" + d + "_refdom", refdom);
refcount = refentry.getValue();
prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refcount", refcount.intValue());
d++;

0 comments on commit 17b7c92

Please sign in to comment.