Skip to content

Commit

Permalink
feat: allow SVG/MathML doctype declarations
Browse files Browse the repository at this point in the history
EPUB 3.3. now allows a reserved set of external identifiers in doctype
declarations of documents with select media types.

See: https://www.w3.org/TR/epub-33/#app-identifiers-allowed

This commit:
- adds those as special cases to the XML parser code
- totally removes entity fetching for EPUB 3.3
- keeps forbidding external entities in the internal subset

Fix #1192, Fix #1114
  • Loading branch information
rdeltour committed Jan 23, 2022
1 parent ab99f1d commit 6e44b39
Show file tree
Hide file tree
Showing 23 changed files with 215 additions and 10 deletions.
31 changes: 23 additions & 8 deletions src/main/java/com/adobe/epubcheck/xml/XMLParser.java
Expand Up @@ -313,20 +313,18 @@ public InputSource resolveEntity(String publicId, String systemId)

String resourcePath = systemIdMap.get(systemId);

if (resourcePath != null)
// external entities are not resolved in EPUB 3
if (context.version == EPUBVersion.VERSION_3 || systemId.equals("about:legacy-compat")) {
return new InputSource(new StringReader(""));
}
else if (resourcePath != null)
{
InputStream resourceStream = ResourceUtil.getResourceStream(resourcePath);
InputSource source = new InputSource(resourceStream);
source.setPublicId(publicId);
source.setSystemId(systemId);
return source;
}
else if (systemId.equals("about:legacy-compat"))
{
// special case
return new InputSource(new StringReader(""));

}
else
{
// check for a system prop that turns off online fetching
Expand Down Expand Up @@ -797,7 +795,24 @@ else if (context.version == EPUBVersion.VERSION_3)
}
else if (publicId != null || systemId != null)
{
report.message(MessageId.OPF_073, getLocation());
// check if the declaration is allowed for the current media type
boolean isAllowed;
switch (mimeType)
{
case "image/svg+xml":
isAllowed = "-//W3C//DTD SVG 1.1//EN".equals(publicId) && "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd".equals(systemId);
break;
case "application/mathml+xml":
case "application/mathml-content+xml":
case "application/mathml-presentation+xml":
isAllowed = "-//W3C//DTD MathML 3.0//EN".equals(publicId) && "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd".equals(systemId);
break;
default:
isAllowed= false;
}
if (!isAllowed) {
report.message(MessageId.OPF_073, getLocation());
}
}
}

Expand Down
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal EPUB</title>
</head>
<body>
<h1>Loomings</h1>
<p>Call me Ishmael.</p>
</body>
</html>
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">
<math xmlns="http://www.w3.org/1998/Math/MathML" alttext="2x+y-z">
<mrow>
<mn>2</mn>
<mo> &#x2061;<!--INVISIBLE TIMES--></mo>
<mi>x</mi>
</mrow>
<mrow>
<mo>+</mo>
<mi>y</mi>
<mo>-</mo>
<mi>z</mi>
</mrow>
</math>
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">
<math xmlns="http://www.w3.org/1998/Math/MathML" alttext="2x+y-z">
<mrow>
<mn>2</mn>
<mo> &#x2061;<!--INVISIBLE TIMES--></mo>
<mi>x</mi>
</mrow>
<mrow>
<mo>+</mo>
<mi>y</mi>
<mo>-</mo>
<mi>z</mi>
</mrow>
</math>
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">
<math xmlns="http://www.w3.org/1998/Math/MathML" alttext="2x+y-z">
<mrow>
<mn>2</mn>
<mo> &#x2061;<!--INVISIBLE TIMES--></mo>
<mi>x</mi>
</mrow>
<mrow>
<mo>+</mo>
<mi>y</mi>
<mo>-</mo>
<mi>z</mi>
</mrow>
</math>
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
</metadata>
<manifest>
<item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
<item id="mathml-1" href="mathml-mediatype-1.xml" media-type="application/mathml+xml" fallback="content_001"/>
<item id="mathml-2" href="mathml-mediatype-2.xml" media-type="application/mathml-presentation+xml" fallback="content_001"/>
<item id="mathml-3" href="mathml-mediatype-3.xml" media-type="application/mathml-content+xml" fallback="content_001"/>
<item id="svg" href="svg.svg" media-type="image/svg+xml"/>
</manifest>
<spine>
<itemref idref="content_001" />
<itemref idref="svg"/>
<itemref idref="mathml-1"/>
<itemref idref="mathml-2"/>
<itemref idref="mathml-3"/>
</spine>
</package>
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8" ?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal EPUB</title>
</head>
<body>
<h1 id="ch1">Loomings</h1>
<p>Call me Ishmael.</p>
</body>
</html>
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal Nav</title>
</head>
<body>
<nav epub:type="toc">
<ol>
<li><a href="content_001.xhtml">content 001</a></li>
</ol>
</nav>
</body>
</html>
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<ncx xmlns:ncx="http://www.daisy.org/z3986/2005/ncx/"
xmlns="http://www.daisy.org/z3986/2005/ncx/"
version="2005-1"
xml:lang="en">
<head>
<meta name="dtb:uid" content="NOID"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>NCX</text>
</docTitle>
<navMap>
<navPoint id="ch1" playOrder="1">
<navLabel>
<text>Chapter 1</text>
</navLabel>
<content src="content_001.xhtml#ch1"/>
</navPoint>
</navMap>
</ncx>
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8" ?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
@@ -0,0 +1 @@
application/epub+zip
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal Nav</title>
</head>
<body>
<nav epub:type="toc">
<ol>
<li><a href="content_001.xhtml">content 001</a></li>
</ol>
</nav>
</body>
</html>
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
</metadata>
<manifest>
<item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" />
</manifest>
<spine toc="ncx">
<itemref idref="content_001" />
</spine>
</package>
@@ -0,0 +1 @@
application/epub+zip
13 changes: 11 additions & 2 deletions src/test/resources/epub3/resources-publication.feature
Expand Up @@ -254,8 +254,17 @@ Feature: EPUB 3 ▸ Publication Resources ▸ Full Publication Checks

## 3.3 XML Conformance

Scenario: Report an NCX file with a DOCTYPE declaration including the external identifier (issue 305)
When checking EPUB 'xml-ncx-doctype-external-identifier-error'
Scenario: Verify DOCTYPE declarations with allowed external identifiers
When checking EPUB 'xml-external-identifier-allowed-valid'
Then no errors or warnings are reported

Scenario: Report a DOCTYPE declaration with an allowed external identifier but not on the expected media type
When checking EPUB 'xml-external-identifier-bad-mediatype-error'
Then error OPF-073 is reported
And no other errors or warnings are reported

Scenario: Report a DOCTYPE declaration with an external identifier that is not allowed
When checking EPUB 'xml-external-identifier-disallowed-error'
Then error OPF-073 is reported
And no other errors or warnings are reported

Expand Down

0 comments on commit 6e44b39

Please sign in to comment.