Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
fix: improve reporting of invalid URL host parts
- fix #1034 : make the message more genric ("Couldn't parse host…")
- fix #1079 : don't report underscores used in the URL host part
- expand test `testValidateXHTMLUrlChecksInvalid`
Loading branch information
@@ -22,8 +22,10 @@
package com.adobe.epubcheck.ops ;
import java.net.MalformedURLException ;
import java.net.URI ;
import java.net.URISyntaxException ;
import java.net.URL ;
import java.util.Locale ;
import java.util.Stack ;
@@ -226,15 +228,22 @@ else if (".".equals(href))
{
report. info(path, FeatureEnum . REFERENCE , href);
/*
* #708 report invalid HTTP/HTTPS URLs
* uri.scheme may be correct, but missing a : or a / from the //
* leads to uri.getHost() == null
*/
// Report if the host part couldn't be parsed correctly
// (either due to missing slashes (issue #708) or invalid characters (issue #1034)
if (uri. getHost() == null )
{
int missingSlashes = uri. getSchemeSpecificPart(). startsWith(" /" ) ? 1 : 2 ;
report. message(MessageId . RSC_023 , parser. getLocation(), uri, missingSlashes, uri. getScheme());
try
{
// if the URL contains underscore characters, try reparsing it without them,
// as underscores are accepted by browsers in the host part (even if it's disallowed)
// see issue #1079
if (! href. contains(" _" ) || new URI (href. replace(' _' , ' x' )). getHost() == null ) {
report. message(MessageId . RSC_023 , parser. getLocation(), uri);
}
} catch (URISyntaxException ignored)
{
// ignored (well-formedness errors are caught earlier)
}
}
}
@@ -326,7 +326,7 @@ RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml fi
RSC_020 ='%1$s' is not a valid URI.
RSC_021 =A Search Key Map Document must point to Content Documents ('%1$s' was not found in the spine).
RSC_022 =Cannot check image details (requires Java version 7 or higher).
RSC_023 =The URL '%1$s' is missing %2$d slash(es) '/' after the protocol '%3$s:'
RSC_023 =Couldn't parse host of URL '%1$s' (probably due to disallowed characters or missing slashes after the protocol)
# Scripting
SCP_001 =Use of Javascript eval() function in EPUB scripts is a security risk.
@@ -230,7 +230,7 @@ public void testValidateXHTMLLINKInvalid()
public void testValidateXHTMLUrlChecksInvalid ()
{
Collections . addAll(expectedErrors, MessageId . RSC_020 );
Collections . addAll(expectedWarnings, MessageId . HTM_025 , MessageId . RSC_023 , MessageId . RSC_023 );
Collections . addAll(expectedWarnings, MessageId . HTM_025 , MessageId . RSC_023 , MessageId . RSC_023, MessageId . RSC_023 );
testValidateDocument(" xhtml/invalid/url-checks_issue-708.xhtml" , " application/xhtml+xml" ,
EPUBVersion . VERSION_3 );
}
@@ -11,7 +11,9 @@
< a href ="httpf://www.youtube.com/watch?v=xxxxxxxxxxx "> Unsupported URI scheme (HTM-025)</ a >
< a href ="https:/www.youtube.com/watch?v=xxxxxxxxxxx "> URL is missing slashes after protocol (RSC-023)</ a >
< a href ="https:www.youtube.com/watch?v=xxxxxxxxxxx "> URL is missing slashes after protocol (RSC-023)</ a >
< a href ="https://w,w.example.com/watch?v=xxxxxxxxxxx "> Host contains an invalid character (RSC-023)</ a >
< a href ="https://w_w.example.com "> Underscore in hosts are accepted in most browsers</ a >
< a href ="https://www.youtube.com/watch?v=xxxxxxxxxxx "> Valid URI</ a >
< a href ="https://youtube.com/watch?v=xxxxxxxxxxx "> Valid URI</ a >
< a href ="https://youtube.com/watch?v=xxxxxx%20xxxx "> Valid URI</ a >
Toggle all file notes
Toggle all file annotations