Skip to content

Commit

Permalink
fix: improve reporting of invalid URL host parts
Browse files Browse the repository at this point in the history
- fix #1034: make the message more genric ("Couldn't parse host…")
- fix #1079: don't report underscores used in the URL host part
- expand test `testValidateXHTMLUrlChecksInvalid`
  • Loading branch information
rdeltour committed Apr 27, 2020
1 parent 6af3b98 commit d2728ee
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 10 deletions.
23 changes: 16 additions & 7 deletions src/main/java/com/adobe/epubcheck/ops/OPSHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@

package com.adobe.epubcheck.ops;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Locale;
import java.util.Stack;

Expand Down Expand Up @@ -226,15 +228,22 @@ else if (".".equals(href))
{
report.info(path, FeatureEnum.REFERENCE, href);

/*
* #708 report invalid HTTP/HTTPS URLs
* uri.scheme may be correct, but missing a : or a / from the //
* leads to uri.getHost() == null
*/
// Report if the host part couldn't be parsed correctly
// (either due to missing slashes (issue #708) or invalid characters (issue #1034)
if (uri.getHost() == null)
{
int missingSlashes = uri.getSchemeSpecificPart().startsWith("/") ? 1 : 2;
report.message(MessageId.RSC_023, parser.getLocation(), uri, missingSlashes, uri.getScheme());
try
{
// if the URL contains underscore characters, try reparsing it without them,
// as underscores are accepted by browsers in the host part (even if it's disallowed)
// see issue #1079
if (!href.contains("_") || new URI(href.replace('_', 'x')).getHost() == null) {
report.message(MessageId.RSC_023, parser.getLocation(), uri);
}
} catch (URISyntaxException ignored)
{
// ignored (well-formedness errors are caught earlier)
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml fi
RSC_020='%1$s' is not a valid URI.
RSC_021=A Search Key Map Document must point to Content Documents ('%1$s' was not found in the spine).
RSC_022=Cannot check image details (requires Java version 7 or higher).
RSC_023=The URL '%1$s' is missing %2$d slash(es) '/' after the protocol '%3$s:'
RSC_023=Couldn't parse host of URL '%1$s' (probably due to disallowed characters or missing slashes after the protocol)

#Scripting
SCP_001=Use of Javascript eval() function in EPUB scripts is a security risk.
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ public void testValidateXHTMLLINKInvalid()
public void testValidateXHTMLUrlChecksInvalid()
{
Collections.addAll(expectedErrors, MessageId.RSC_020);
Collections.addAll(expectedWarnings, MessageId.HTM_025, MessageId.RSC_023, MessageId.RSC_023);
Collections.addAll(expectedWarnings, MessageId.HTM_025, MessageId.RSC_023, MessageId.RSC_023, MessageId.RSC_023);
testValidateDocument("xhtml/invalid/url-checks_issue-708.xhtml", "application/xhtml+xml",
EPUBVersion.VERSION_3);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
<a href="httpf://www.youtube.com/watch?v=xxxxxxxxxxx">Unsupported URI scheme (HTM-025)</a>
<a href="https:/www.youtube.com/watch?v=xxxxxxxxxxx">URL is missing slashes after protocol (RSC-023)</a>
<a href="https:www.youtube.com/watch?v=xxxxxxxxxxx">URL is missing slashes after protocol (RSC-023)</a>

<a href="https://w,w.example.com/watch?v=xxxxxxxxxxx">Host contains an invalid character (RSC-023)</a>

<a href="https://w_w.example.com">Underscore in hosts are accepted in most browsers</a>
<a href="https://www.youtube.com/watch?v=xxxxxxxxxxx">Valid URI</a>
<a href="https://youtube.com/watch?v=xxxxxxxxxxx">Valid URI</a>
<a href="https://youtube.com/watch?v=xxxxxx%20xxxx">Valid URI</a>
Expand Down

0 comments on commit d2728ee

Please sign in to comment.