Skip to content

Commit

Permalink
feat: check that container-relative URLs have no query
Browse files Browse the repository at this point in the history
EPUB relative URLs must be valid-relative-ocf-URL-with-fragment strings,
which must not have query components.

This commit adds a new check that reports `RSC-033` (error) when a query
is found in a container-relative URL.

It also slightly improves the URL checks in single-file validation mode,
to ensure a URL is not always considered remote in that mode.
  • Loading branch information
rdeltour committed Nov 28, 2022
1 parent e3d3afe commit a4fed67
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 8 deletions.
Expand Up @@ -349,6 +349,7 @@ private void initialize()
severities.put(MessageId.RSC_030, Severity.ERROR);
severities.put(MessageId.RSC_031, Severity.WARNING);
severities.put(MessageId.RSC_032, Severity.ERROR);
severities.put(MessageId.RSC_033, Severity.ERROR);

// Scripting
severities.put(MessageId.SCP_001, Severity.SUPPRESSED); // checking scripts is out of scope
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Expand Up @@ -343,6 +343,7 @@ public enum MessageId implements Comparable<MessageId>
RSC_030("RSC-030"),
RSC_031("RSC-031"),
RSC_032("RSC-032"),
RSC_033("RSC-033"),

// Messages relating to scripting
SCP_001("SCP-001"),
Expand Down
Expand Up @@ -205,7 +205,7 @@ public String relativize(URL url)
}
else
{
return url.toString();
return this.url.relativize(url);
}
}

Expand Down
24 changes: 18 additions & 6 deletions src/main/java/org/w3c/epubcheck/core/references/URLChecker.java
Expand Up @@ -29,6 +29,7 @@ public class URLChecker
private URL baseURLTestB;
private boolean isRemoteBase;
private final Report report;
private final ValidationContext context;

public URLChecker(ValidationContext context)
{
Expand All @@ -37,7 +38,8 @@ public URLChecker(ValidationContext context)

public URLChecker(ValidationContext context, URL baseURL)
{
this.report = Preconditions.checkNotNull(context).report;
this.context = Preconditions.checkNotNull(context);
this.report = context.report;
this.baseURL = Preconditions.checkNotNull(baseURL);
this.isRemoteBase = false;
try
Expand Down Expand Up @@ -117,12 +119,22 @@ private URL resolveURL(String string, boolean isBase, EPUBLocation location)
isRemoteBase = true;
return url;
}
// if relative URL "leaks" outside the container, report and continue
else if (!isBase && !testA.toString().startsWith(TEST_BASE_A_FULL)
|| !testB.toString().startsWith(TEST_BASE_B_FULL))
else
{
// FIXME !!! this is broken, base s/b taken into account
report.message(MessageId.RSC_026, location, string);
// if URL has a query string, report and continue
if (url.query() != null)
{
report.message(MessageId.RSC_033, location, string);
url = url.withQuery(null);
}
// if relative URL "leaks" outside the container, report and continue
// this check only make sense when the container is present
if (context.container.isPresent() && !isBase
&& (!testA.toString().startsWith(TEST_BASE_A_FULL)
|| !testB.toString().startsWith(TEST_BASE_B_FULL)))
{
report.message(MessageId.RSC_026, location, string);
}
}
return url;
} catch (GalimatiasParseException e)
Expand Down
Expand Up @@ -260,7 +260,7 @@ RSC_015=A fragment identifier is required for svg use tag references.
RSC_016=Fatal Error while parsing file: %1$s
RSC_017=Warning while parsing file: %1$s
RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml file.
RSC_020="%1$s" is not a valid URI (%2$s)
RSC_020="%1$s" is not a valid URL (%2$s)
RSC_021=A Search Key Map Document must point to Content Documents ("%1$s" was not found in the spine).
RSC_022=Cannot check image details (requires Java version 7 or higher).
RSC_024=Informative parsing warning: %1$s
Expand All @@ -272,3 +272,4 @@ RSC_029=Data URL is not allowed in this context.
RSC_030=File URLs are not allowed in EPUB, but found "%1$s".
RSC_031=Remote resource references should use HTTPS, but found "%1$s".
RSC_032=Fallback must be provided for foreign resources, but found none for resource "%1$s" of type "%2$s".
RSC_033=Relative URL strings must not have a query component, but found one in "%1$s".
15 changes: 15 additions & 0 deletions src/test/resources/epub3/04-ocf/files/url-in-xhtml-valid.xhtml
@@ -0,0 +1,15 @@
<!DOCTYPE html>
<html xmlns:epub="http://www.idpf.org/2007/ops" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal EPUB</title>
</head>
<body>
<h1>Loomings</h1>
<a href="https://example.org">asbolute URL</a>
<a href="content_002.xhtml">relative URL</a>
<a href="../content_002.xhtml">relative URL, one directory up</a>
<a href="content_002.xhtml#id">fragment URL</a>
<a href="#id">fragment-only URL</a>
</body>
</html>
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
</metadata>
<manifest>
<item id="content_001" href="content_001.xhtml?value=value" media-type="application/xhtml+xml"/>
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
</manifest>
<spine>
<itemref idref="content_001"/>
</spine>
</package>
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
<link rel="record" href="atom.xml?name=value" media-type="application/atom+xml"/>
</metadata>
<manifest>
<item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
</manifest>
<spine>
<itemref idref="content_001" />
</spine>
</package>
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html xmlns:epub="http://www.idpf.org/2007/ops" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal EPUB</title>
</head>
<body>
<h1>Loomings</h1>
<a href="content_002.xhtml?name=value">Call me Ishmael.</a>
</body>
</html>
31 changes: 31 additions & 0 deletions src/test/resources/epub3/04-ocf/ocf.feature
Expand Up @@ -105,6 +105,15 @@ Feature: EPUB 3 — Open Container Format

### 4.1.5 URLs in the OCF abstract container

#### Valid container URLs

@spec @xref:sec-container-iri
Scenario: Allow valid container URLs in XHTML
When checking EPUB 'url-in-xhtml-valid.xhtml'
And no errors or warnings are reported

#### Invalid container URLs

@spec @xref:sec-container-iri
Scenario: Report leaking URLs in the package document
When checking EPUB 'ocf-url-leaking-in-opf-error'
Expand All @@ -117,6 +126,28 @@ Feature: EPUB 3 — Open Container Format
Then error RSC-026 is reported
And no other errors or warnings are reported

#### URL query checks:

@spec @xref:sec-container-iri
Scenario: Report a URL query string found in a manifest item
When checking EPUB 'url-query-in-package-item-error.opf'
Then error RSC-033 is reported
And no other errors or warnings are reported

@spec @xref:sec-container-iri
Scenario: Report a URL query string found in a package link
When checking EPUB 'url-query-in-package-link-error.opf'
Then error RSC-033 is reported
And no other errors or warnings are reported

@spec @xref:sec-container-iri
Scenario: Report a URL query string found in a manifest item
When checking EPUB 'url-query-in-xhtml-a-error.xhtml'
Then error RSC-033 is reported
And no other errors or warnings are reported

#### resource existence checks:

@spec @xref:sec-container-iri
Scenario: Report a reference from an XHTML `cite` attribute not declared in the manifest
When checking EPUB 'url-xhtml-cite-missing-resource-error'
Expand Down

0 comments on commit a4fed67

Please sign in to comment.