diff --git a/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java b/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java
index 68d8a4d5..f5c71e5d 100755
--- a/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java
+++ b/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java
@@ -22,8 +22,10 @@
package com.adobe.epubcheck.ops;
+import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
+import java.net.URL;
import java.util.Locale;
import java.util.Stack;
@@ -226,15 +228,22 @@ else if (".".equals(href))
{
report.info(path, FeatureEnum.REFERENCE, href);
- /*
- * #708 report invalid HTTP/HTTPS URLs
- * uri.scheme may be correct, but missing a : or a / from the //
- * leads to uri.getHost() == null
- */
+ // Report if the host part couldn't be parsed correctly
+ // (either due to missing slashes (issue #708) or invalid characters (issue #1034)
if (uri.getHost() == null)
{
- int missingSlashes = uri.getSchemeSpecificPart().startsWith("/") ? 1 : 2;
- report.message(MessageId.RSC_023, parser.getLocation(), uri, missingSlashes, uri.getScheme());
+ try
+ {
+ // if the URL contains underscore characters, try reparsing it without them,
+ // as underscores are accepted by browsers in the host part (even if it's disallowed)
+ // see issue #1079
+ if (!href.contains("_") || new URI(href.replace('_', 'x')).getHost() == null) {
+ report.message(MessageId.RSC_023, parser.getLocation(), uri);
+ }
+ } catch (URISyntaxException ignored)
+ {
+ // ignored (well-formedness errors are caught earlier)
+ }
}
}
diff --git a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties
index 6c6bfa53..316c39cb 100644
--- a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties
+++ b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties
@@ -326,7 +326,7 @@ RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml fi
RSC_020='%1$s' is not a valid URI.
RSC_021=A Search Key Map Document must point to Content Documents ('%1$s' was not found in the spine).
RSC_022=Cannot check image details (requires Java version 7 or higher).
-RSC_023=The URL '%1$s' is missing %2$d slash(es) '/' after the protocol '%3$s:'
+RSC_023=Couldn't parse host of URL '%1$s' (probably due to disallowed characters or missing slashes after the protocol)
#Scripting
SCP_001=Use of Javascript eval() function in EPUB scripts is a security risk.
diff --git a/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java b/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java
index 5a2798e8..75ba890a 100644
--- a/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java
+++ b/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java
@@ -230,7 +230,7 @@ public void testValidateXHTMLLINKInvalid()
public void testValidateXHTMLUrlChecksInvalid()
{
Collections.addAll(expectedErrors, MessageId.RSC_020);
- Collections.addAll(expectedWarnings, MessageId.HTM_025, MessageId.RSC_023, MessageId.RSC_023);
+ Collections.addAll(expectedWarnings, MessageId.HTM_025, MessageId.RSC_023, MessageId.RSC_023, MessageId.RSC_023);
testValidateDocument("xhtml/invalid/url-checks_issue-708.xhtml", "application/xhtml+xml",
EPUBVersion.VERSION_3);
}
diff --git a/src/test/resources/30/single/xhtml/invalid/url-checks_issue-708.xhtml b/src/test/resources/30/single/xhtml/invalid/url-checks_issue-708.xhtml
index 8e672dde..16e62c06 100644
--- a/src/test/resources/30/single/xhtml/invalid/url-checks_issue-708.xhtml
+++ b/src/test/resources/30/single/xhtml/invalid/url-checks_issue-708.xhtml
@@ -11,7 +11,9 @@
Unsupported URI scheme (HTM-025)
URL is missing slashes after protocol (RSC-023)
URL is missing slashes after protocol (RSC-023)
-
+ Host contains an invalid character (RSC-023)
+
+ Underscore in hosts are accepted in most browsers
Valid URI
Valid URI
Valid URI