From c15e4ea56a37e745cac29c369d11299f348e75b1 Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Wed, 5 Jul 2023 14:13:15 +0200 Subject: [PATCH] fix: `dc:source` element caused a NullPointerException The checks related to the HTML `source` element were run regardless of the element namespace, and with lack of protective coding. This caused an NPE when the markup contained `dc:source` elements. This commit fixes the situation by: - only applying `OPSHandler30` checks for the elements in the target namespace - returning early when the `source` element preconditions aren't met, as these preconditions are checked by schema Fix #1514 --- .../com/adobe/epubcheck/ops/OPSHandler30.java | 206 ++++++++++-------- .../content-document-xhtml.feature | 6 + .../dc-source-valid/EPUB/content_001.xhtml | 22 ++ .../files/dc-source-valid/EPUB/nav.xhtml | 14 ++ .../files/dc-source-valid/EPUB/package.opf | 16 ++ .../dc-source-valid/META-INF/container.xml | 6 + .../files/dc-source-valid/mimetype | 1 + 7 files changed, 178 insertions(+), 93 deletions(-) create mode 100644 src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/content_001.xhtml create mode 100644 src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/nav.xhtml create mode 100644 src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/package.opf create mode 100644 src/test/resources/epub3/06-content-document/files/dc-source-valid/META-INF/container.xml create mode 100644 src/test/resources/epub3/06-content-document/files/dc-source-valid/mimetype diff --git a/src/main/java/com/adobe/epubcheck/ops/OPSHandler30.java b/src/main/java/com/adobe/epubcheck/ops/OPSHandler30.java index 9f63ab7bb..1a2894fee 100644 --- a/src/main/java/com/adobe/epubcheck/ops/OPSHandler30.java +++ b/src/main/java/com/adobe/epubcheck/ops/OPSHandler30.java @@ -334,109 +334,127 @@ public void startElement() processSectioning(); String name = e.getName(); - if (name.equals("html")) - { - vocabs = VocabUtil.parsePrefixDeclaration( - e.getAttributeNS(EpubConstants.EpubTypeNamespaceUri, "prefix"), RESERVED_VOCABS, - KNOWN_VOCAB_URIS, DEFAULT_VOCAB_URIS, report, location()); - } - else if (EpubConstants.HtmlNamespaceUri.equals(e.getNamespace()) && name.equals("meta")) - { - processMeta(); - } - else if (name.equals("form")) - { - requiredProperties.add(ITEM_PROPERTIES.SCRIPTED); - } - else if (name.equals("link")) - { - processLink(); - } - else if (name.equals("math")) + if (EpubConstants.HtmlNamespaceUri.equals(e.getNamespace())) { - requiredProperties.add(ITEM_PROPERTIES.MATHML); - inMathML = true; - hasAltorAnnotation = (null != e.getAttribute("alttext")); - String altimg = e.getAttribute("altimg"); - if (altimg != null) + if (name.equals("html")) + { + vocabs = VocabUtil.parsePrefixDeclaration( + e.getAttributeNS(EpubConstants.EpubTypeNamespaceUri, "prefix"), RESERVED_VOCABS, + KNOWN_VOCAB_URIS, DEFAULT_VOCAB_URIS, report, location()); + } + else if (name.equals("meta")) + { + processMeta(); + } + else if (name.equals("form")) { - super.checkImage(null, "altimg"); + requiredProperties.add(ITEM_PROPERTIES.SCRIPTED); + } + else if (name.equals("link")) + { + processLink(); } - } - else if (name.equals("svg")) - { - processSVG(); - } - else if (EpubConstants.EpubTypeNamespaceUri.equals(e.getNamespace()) && name.equals("switch")) - { - requiredProperties.add(ITEM_PROPERTIES.SWITCH); - } - else if (name.equals("audio")) - { - startMediaElement(); - } - else if (name.equals("video")) - { - processVideo(); - startMediaElement(); - } - else if (name.equals("figure")) - { - processFigure(); - } - else if (name.equals("table")) - { - processTable(); - } - else if (name.equals("track")) - { - startTrack(); - } - else if (name.equals("a")) - { - anchorNeedsText = true; - processAnchor(e); - } - else if (name.equals("annotation-xml")) - { - hasAltorAnnotation = true; - } - else if (name.equals("input")) - { - startInput(); - } - else if (name.equals("picture")) - { - inPicture = true; - } - else if (name.equals("source")) - { - if ("picture".equals(e.getParent().getName())) + else if (name.equals("audio")) { - checkImage(null, null); + startMediaElement(); } - else // audio or video source + else if (name.equals("video")) { - startMediaSource(); + processVideo(); + startMediaElement(); + } + else if (name.equals("figure")) + { + processFigure(); + } + else if (name.equals("table")) + { + processTable(); + } + else if (name.equals("track")) + { + startTrack(); + } + else if (name.equals("a")) + { + anchorNeedsText = true; + processAnchor(e); + } + else if (name.equals("input")) + { + startInput(); + } + else if (name.equals("picture")) + { + inPicture = true; + } + else if (name.equals("source")) + { + if ("picture".equals(e.getParent().getName())) + { + checkImage(null, null); + } + else // audio or video source + { + startMediaSource(); + } + } + else if (name.equals("embed")) + { + startEmbed(); + } + else if (name.equals("blockquote") || name.equals("q") || name.equals("ins") + || name.equals("del")) + { + checkCiteAttribute(); } } - else if ("http://www.w3.org/2000/svg".equals(e.getNamespace()) && name.equals("title")) - { - hasLabel = true; - } - else if ("http://www.w3.org/2000/svg".equals(e.getNamespace()) && name.equals("text")) + else if ("http://www.w3.org/1998/Math/MathML".equals(e.getNamespace())) { - hasLabel = true; + if (name.equals("math")) + { + requiredProperties.add(ITEM_PROPERTIES.MATHML); + inMathML = true; + hasAltorAnnotation = (null != e.getAttribute("alttext")); + String altimg = e.getAttribute("altimg"); + if (altimg != null) + { + super.checkImage(null, "altimg"); + } + + } + else if (name.equals("annotation-xml")) + { + hasAltorAnnotation = true; + } } - else if (name.equals("embed")) + else if ("http://www.w3.org/2000/svg".equals(e.getNamespace())) { - startEmbed(); + if (name.equals("svg")) + { + processSVG(); + } + else if (name.equals("a")) + { + anchorNeedsText = true; + processAnchor(e); + } + else if (name.equals("title")) + { + hasLabel = true; + } + else if (name.equals("text")) + { + hasLabel = true; + } } - else if (name.equals("blockquote") || name.equals("q") || name.equals("ins") - || name.equals("del")) + else if (EpubConstants.EpubTypeNamespaceUri.equals(e.getNamespace())) { - checkCiteAttribute(); + if (name.equals("switch")) + { + requiredProperties.add(ITEM_PROPERTIES.SWITCH); + } } processInlineScripts(); @@ -595,10 +613,12 @@ protected void endMediaElement() protected void startMediaSource() { XMLElement elem = currentElement(); - assert "source".equals(elem.getName()) - && ("audio".equals(elem.getParent().getName()) - || "video".equals(elem.getParent().getName())) - && elem.getParent().getAttribute("src") == null; + assert "source".equals(elem.getName()); + if (!("audio".equals(elem.getParent().getName()) + || "video".equals(elem.getParent().getName()))) + { + return; // schema error was reported + } // check the `src` attribute URL url = checkResourceURL(elem.getAttribute("src")); diff --git a/src/test/resources/epub3/06-content-document/content-document-xhtml.feature b/src/test/resources/epub3/06-content-document/content-document-xhtml.feature index 2d5bdc80a..3ca0c29b5 100644 --- a/src/test/resources/epub3/06-content-document/content-document-xhtml.feature +++ b/src/test/resources/epub3/06-content-document/content-document-xhtml.feature @@ -551,6 +551,12 @@ Feature: EPUB 3 — Content Documents — XHTML #//TODO verify script core media types + #### source + + Scenario: Verify non-HTML `source` elements are skipped + See https://github.com/w3c/epubcheck/issues/1514 + When checking EPUB 'dc-source-valid' + Then no errors or warnings are reported #### Style diff --git a/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/content_001.xhtml b/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/content_001.xhtml new file mode 100644 index 000000000..329bf4a0d --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/content_001.xhtml @@ -0,0 +1,22 @@ + + + + + Minimal EPUB + + +

Loomings

+

Call me Ishmael.

+ + Example + + + https://example.com + + + + + diff --git a/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/nav.xhtml b/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/nav.xhtml new file mode 100644 index 000000000..240745e63 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/nav.xhtml @@ -0,0 +1,14 @@ + + + + + Minimal Nav + + + + + diff --git a/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/package.opf b/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/package.opf new file mode 100644 index 000000000..7ce8b2f1a --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/dc-source-valid/EPUB/package.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/06-content-document/files/dc-source-valid/META-INF/container.xml b/src/test/resources/epub3/06-content-document/files/dc-source-valid/META-INF/container.xml new file mode 100644 index 000000000..318782179 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/dc-source-valid/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/test/resources/epub3/06-content-document/files/dc-source-valid/mimetype b/src/test/resources/epub3/06-content-document/files/dc-source-valid/mimetype new file mode 100644 index 000000000..57ef03f24 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/dc-source-valid/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file