diff --git a/src/main/java/com/adobe/epubcheck/ocf/OCFContainer.java b/src/main/java/com/adobe/epubcheck/ocf/OCFContainer.java index 6ebf1710c..ebaf4d087 100644 --- a/src/main/java/com/adobe/epubcheck/ocf/OCFContainer.java +++ b/src/main/java/com/adobe/epubcheck/ocf/OCFContainer.java @@ -79,7 +79,14 @@ public OCFContainer(Builder builder) public boolean contains(URL resource) { - return resources.containsKey(resource); + if (resources.containsKey(resource)) + { + return true; + } + else + { + return resources.containsKey(URLUtils.normalize(resource)); + } } @Override @@ -134,5 +141,4 @@ public boolean isRemote(URL url) } } - } diff --git a/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java b/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java index 080e6f03d..f7ba681a7 100644 --- a/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java +++ b/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java @@ -2,6 +2,8 @@ import java.net.URI; +import org.w3c.epubcheck.util.url.URLUtils; + import com.adobe.epubcheck.api.EPUBLocation; import com.adobe.epubcheck.api.Report; import com.adobe.epubcheck.messages.MessageId; @@ -64,7 +66,7 @@ public URL setBase(String newBase, EPUBLocation location) public URL checkURL(String string, EPUBLocation location) { - URL url = resolveURL(string, false, location); + URL url = URLUtils.normalize(resolveURL(string, false, location)); return url; } diff --git a/src/main/java/org/w3c/epubcheck/util/url/URLUtils.java b/src/main/java/org/w3c/epubcheck/util/url/URLUtils.java index 526e869d5..e58146ed4 100644 --- a/src/main/java/org/w3c/epubcheck/util/url/URLUtils.java +++ b/src/main/java/org/w3c/epubcheck/util/url/URLUtils.java @@ -14,6 +14,7 @@ import io.mola.galimatias.GalimatiasParseException; import io.mola.galimatias.ParseIssue; import io.mola.galimatias.URL; +import io.mola.galimatias.canonicalize.DecodeUnreservedCanonicalizer; //FIXME 2022 add unit tests public final class URLUtils @@ -79,9 +80,9 @@ else if (urlA.equals(urlB)) * in EPUB (to test for remote resources compared to container URLs). * * @param test - * the URL to test + * the URL to test * @param local - * the URL it is tested against + * the URL it is tested against * @return `true` if and only if `test` is remote compared to `local`. */ public static boolean isRemote(URL test, URL local) @@ -151,13 +152,33 @@ public static String decode(String string) return percentDecode(string); } + public static URL normalize(URL url) + { + URL normalized = url; + if (url != null) + { + try + { + if (url.isHierarchical() && url.path() != null) + { + normalized = url.withPath(URLUtils.encodePath(URLUtils.decode(url.path()))); + } + normalized = new DecodeUnreservedCanonicalizer().canonicalize(normalized); + } catch (GalimatiasParseException unexpected) + { + throw new AssertionError(unexpected); + } + } + return normalized; + } + /** * Returns the MIME type of a `data:` URL. * * @param url - * a URL, can be `null`. + * a URL, can be `null`. * @return the MIME type declared in the data URL (can be an empty string), or - * `null` if `url` is not a data URL. + * `null` if `url` is not a data URL. */ public static String getDataURLType(URL url) { diff --git a/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/content&001.xhtml b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/content&001.xhtml new file mode 100644 index 000000000..43a520ea2 --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/content&001.xhtml @@ -0,0 +1,11 @@ + + + + + Minimal EPUB + + +

Loomings

+

Call me Ishmael.

+ + diff --git a/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/nav.xhtml b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/nav.xhtml new file mode 100644 index 000000000..7f34f3024 --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/nav.xhtml @@ -0,0 +1,14 @@ + + + + + Minimal Nav + + + + + diff --git a/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/package.opf b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/package.opf new file mode 100644 index 000000000..552655bbd --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/EPUB/package.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/META-INF/container.xml b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/META-INF/container.xml new file mode 100644 index 000000000..318782179 --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/mimetype b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/mimetype new file mode 100644 index 000000000..57ef03f24 --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-percent-encoded-valid/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file diff --git a/src/test/resources/epub3/04-ocf/ocf.feature b/src/test/resources/epub3/04-ocf/ocf.feature index 7a0dba39f..b5c11175c 100644 --- a/src/test/resources/epub3/04-ocf/ocf.feature +++ b/src/test/resources/epub3/04-ocf/ocf.feature @@ -114,6 +114,12 @@ Feature: EPUB 3 — Open Container Format When checking EPUB 'url-in-xhtml-valid.xhtml' And no errors or warnings are reported + @spec @xref:sec-container-iri + Scenario: Allow percent-encoded URLs + When checking EPUB 'url-percent-encoded-valid' + And no errors or warnings are reported + + #### Invalid container URLs @spec @xref:sec-container-iri