Skip to content

Commit a4fed67

Browse files
committed
feat: check that container-relative URLs have no query
EPUB relative URLs must be valid-relative-ocf-URL-with-fragment strings, which must not have query components. This commit adds a new check that reports `RSC-033` (error) when a query is found in a container-relative URL. It also slightly improves the URL checks in single-file validation mode, to ensure a URL is not always considered remote in that mode.
1 parent e3d3afe commit a4fed67

File tree

10 files changed

+113
-8
lines changed

10 files changed

+113
-8
lines changed

src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ private void initialize()
349349
severities.put(MessageId.RSC_030, Severity.ERROR);
350350
severities.put(MessageId.RSC_031, Severity.WARNING);
351351
severities.put(MessageId.RSC_032, Severity.ERROR);
352+
severities.put(MessageId.RSC_033, Severity.ERROR);
352353

353354
// Scripting
354355
severities.put(MessageId.SCP_001, Severity.SUPPRESSED); // checking scripts is out of scope

src/main/java/com/adobe/epubcheck/messages/MessageId.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ public enum MessageId implements Comparable<MessageId>
343343
RSC_030("RSC-030"),
344344
RSC_031("RSC-031"),
345345
RSC_032("RSC-032"),
346+
RSC_033("RSC-033"),
346347

347348
// Messages relating to scripting
348349
SCP_001("SCP-001"),

src/main/java/com/adobe/epubcheck/opf/ValidationContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ public String relativize(URL url)
205205
}
206206
else
207207
{
208-
return url.toString();
208+
return this.url.relativize(url);
209209
}
210210
}
211211

src/main/java/org/w3c/epubcheck/core/references/URLChecker.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public class URLChecker
2929
private URL baseURLTestB;
3030
private boolean isRemoteBase;
3131
private final Report report;
32+
private final ValidationContext context;
3233

3334
public URLChecker(ValidationContext context)
3435
{
@@ -37,7 +38,8 @@ public URLChecker(ValidationContext context)
3738

3839
public URLChecker(ValidationContext context, URL baseURL)
3940
{
40-
this.report = Preconditions.checkNotNull(context).report;
41+
this.context = Preconditions.checkNotNull(context);
42+
this.report = context.report;
4143
this.baseURL = Preconditions.checkNotNull(baseURL);
4244
this.isRemoteBase = false;
4345
try
@@ -117,12 +119,22 @@ private URL resolveURL(String string, boolean isBase, EPUBLocation location)
117119
isRemoteBase = true;
118120
return url;
119121
}
120-
// if relative URL "leaks" outside the container, report and continue
121-
else if (!isBase && !testA.toString().startsWith(TEST_BASE_A_FULL)
122-
|| !testB.toString().startsWith(TEST_BASE_B_FULL))
122+
else
123123
{
124-
// FIXME !!! this is broken, base s/b taken into account
125-
report.message(MessageId.RSC_026, location, string);
124+
// if URL has a query string, report and continue
125+
if (url.query() != null)
126+
{
127+
report.message(MessageId.RSC_033, location, string);
128+
url = url.withQuery(null);
129+
}
130+
// if relative URL "leaks" outside the container, report and continue
131+
// this check only make sense when the container is present
132+
if (context.container.isPresent() && !isBase
133+
&& (!testA.toString().startsWith(TEST_BASE_A_FULL)
134+
|| !testB.toString().startsWith(TEST_BASE_B_FULL)))
135+
{
136+
report.message(MessageId.RSC_026, location, string);
137+
}
126138
}
127139
return url;
128140
} catch (GalimatiasParseException e)

src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ RSC_015=A fragment identifier is required for svg use tag references.
260260
RSC_016=Fatal Error while parsing file: %1$s
261261
RSC_017=Warning while parsing file: %1$s
262262
RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml file.
263-
RSC_020="%1$s" is not a valid URI (%2$s)
263+
RSC_020="%1$s" is not a valid URL (%2$s)
264264
RSC_021=A Search Key Map Document must point to Content Documents ("%1$s" was not found in the spine).
265265
RSC_022=Cannot check image details (requires Java version 7 or higher).
266266
RSC_024=Informative parsing warning: %1$s
@@ -272,3 +272,4 @@ RSC_029=Data URL is not allowed in this context.
272272
RSC_030=File URLs are not allowed in EPUB, but found "%1$s".
273273
RSC_031=Remote resource references should use HTTPS, but found "%1$s".
274274
RSC_032=Fallback must be provided for foreign resources, but found none for resource "%1$s" of type "%2$s".
275+
RSC_033=Relative URL strings must not have a query component, but found one in "%1$s".
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<!DOCTYPE html>
2+
<html xmlns:epub="http://www.idpf.org/2007/ops" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3+
<head>
4+
<meta charset="utf-8"/>
5+
<title>Minimal EPUB</title>
6+
</head>
7+
<body>
8+
<h1>Loomings</h1>
9+
<a href="https://example.org">asbolute URL</a>
10+
<a href="content_002.xhtml">relative URL</a>
11+
<a href="../content_002.xhtml">relative URL, one directory up</a>
12+
<a href="content_002.xhtml#id">fragment URL</a>
13+
<a href="#id">fragment-only URL</a>
14+
</body>
15+
</html>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
3+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
4+
<dc:title id="title">Minimal EPUB 3.0</dc:title>
5+
<dc:language>en</dc:language>
6+
<dc:identifier id="q">NOID</dc:identifier>
7+
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
8+
</metadata>
9+
<manifest>
10+
<item id="content_001" href="content_001.xhtml?value=value" media-type="application/xhtml+xml"/>
11+
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
12+
</manifest>
13+
<spine>
14+
<itemref idref="content_001"/>
15+
</spine>
16+
</package>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
3+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
4+
<dc:title id="title">Minimal EPUB 3.0</dc:title>
5+
<dc:language>en</dc:language>
6+
<dc:identifier id="q">NOID</dc:identifier>
7+
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
8+
<link rel="record" href="atom.xml?name=value" media-type="application/atom+xml"/>
9+
</metadata>
10+
<manifest>
11+
<item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
12+
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
13+
</manifest>
14+
<spine>
15+
<itemref idref="content_001" />
16+
</spine>
17+
</package>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<!DOCTYPE html>
2+
<html xmlns:epub="http://www.idpf.org/2007/ops" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3+
<head>
4+
<meta charset="utf-8"/>
5+
<title>Minimal EPUB</title>
6+
</head>
7+
<body>
8+
<h1>Loomings</h1>
9+
<a href="content_002.xhtml?name=value">Call me Ishmael.</a>
10+
</body>
11+
</html>

src/test/resources/epub3/04-ocf/ocf.feature

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ Feature: EPUB 3 — Open Container Format
105105

106106
### 4.1.5 URLs in the OCF abstract container
107107

108+
#### Valid container URLs
109+
110+
@spec @xref:sec-container-iri
111+
Scenario: Allow valid container URLs in XHTML
112+
When checking EPUB 'url-in-xhtml-valid.xhtml'
113+
And no errors or warnings are reported
114+
115+
#### Invalid container URLs
116+
108117
@spec @xref:sec-container-iri
109118
Scenario: Report leaking URLs in the package document
110119
When checking EPUB 'ocf-url-leaking-in-opf-error'
@@ -117,6 +126,28 @@ Feature: EPUB 3 — Open Container Format
117126
Then error RSC-026 is reported
118127
And no other errors or warnings are reported
119128

129+
#### URL query checks:
130+
131+
@spec @xref:sec-container-iri
132+
Scenario: Report a URL query string found in a manifest item
133+
When checking EPUB 'url-query-in-package-item-error.opf'
134+
Then error RSC-033 is reported
135+
And no other errors or warnings are reported
136+
137+
@spec @xref:sec-container-iri
138+
Scenario: Report a URL query string found in a package link
139+
When checking EPUB 'url-query-in-package-link-error.opf'
140+
Then error RSC-033 is reported
141+
And no other errors or warnings are reported
142+
143+
@spec @xref:sec-container-iri
144+
Scenario: Report a URL query string found in a manifest item
145+
When checking EPUB 'url-query-in-xhtml-a-error.xhtml'
146+
Then error RSC-033 is reported
147+
And no other errors or warnings are reported
148+
149+
#### resource existence checks:
150+
120151
@spec @xref:sec-container-iri
121152
Scenario: Report a reference from an XHTML `cite` attribute not declared in the manifest
122153
When checking EPUB 'url-xhtml-cite-missing-resource-error'

0 commit comments

Comments
 (0)