Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: new check (OPF-092) for language tags well-formedness #1363

Merged
merged 1 commit into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ private void initialize()
severities.put(MessageId.OPF_089, Severity.ERROR);
severities.put(MessageId.OPF_090, Severity.USAGE);
severities.put(MessageId.OPF_091, Severity.ERROR);
severities.put(MessageId.OPF_092, Severity.ERROR);

// PKG
severities.put(MessageId.PKG_001, Severity.WARNING);
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ public enum MessageId implements Comparable<MessageId>
OPF_089("OPF-089"),
OPF_090("OPF-090"),
OPF_091("OPF-091"),
OPF_092("OPF-092"),

// Messages relating to the entire package
PKG_001("PKG-001"),
Expand Down
37 changes: 37 additions & 0 deletions src/main/java/com/adobe/epubcheck/opf/OPFHandler30.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Deque;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

Expand Down Expand Up @@ -166,6 +168,12 @@ public void startElement()

XMLElement e = parser.getCurrentElement();
String name = e.getName();

// Check global attributes
String xmllang = e.getAttributeNS(EpubConstants.XmlNamespaceUri, "lang");
if (xmllang != null && !xmllang.isEmpty()) {
checkLanguageTag(xmllang);
}

if (EpubConstants.OpfNamespaceUri.equals(e.getNamespace()))
{
Expand Down Expand Up @@ -469,6 +477,11 @@ private void processLink(XMLElement e)
.refines(e.getAttribute("refines")).build();
linkedResourcesBuilders.peekFirst().add(resource);
}

String hreflang = e.getAttribute("hreflang");
if (hreflang != null && !hreflang.isEmpty()) {
checkLanguageTag(hreflang);
}
}

private void processItemrefProperties(OPFItem.Builder builder, String property)
Expand Down Expand Up @@ -566,11 +579,22 @@ private void processDCElem(XMLElement e)
{
// get the property
Optional<Property> prop = DCMESVocab.VOCAB.lookup(e.getName());
// Add to the metadata model builder
if (prop.isPresent() && !metadataBuilders.isEmpty())
{
metadataBuilders.peekFirst().meta(e.getAttribute("id"), prop.get(),
(String) e.getPrivateData(), null);
}
// Check that dc:language is well-formed
if ("language".equals(e.getName()))
{
String language = (String) e.getPrivateData();
// Empty dc:language is checked by the schema
if (language != null && !language.trim().isEmpty())
{
checkLanguageTag(language.trim());
}
}
}

private void processItemsInIndexCollection(ResourceCollection collection)
Expand All @@ -592,6 +616,19 @@ private void processItemsInIndexCollection(ResourceCollection collection)
}
}
}

private void checkLanguageTag(String language)
{
try
{
new Locale.Builder().setLanguageTag(language);
} catch (IllformedLocaleException exception)
{
report.message(MessageId.OPF_092,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber()), language,
exception.getMessage());
}
}

protected void reportMetadata()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ OPF_088=Unrecognized epub:type value "%1$s".
OPF_089=The "alternate" link rel keyword cannot be paired with other keywords.
OPF_090=It is encouraged to use MIME media type "%1$s" instead of "%2$s".
OPF_091=The item href URL must not have a fragment identifier.
OPF_092=Language tag "%1$s" is not well-formed: %2$s

#Package
PKG_001=Validating the EPUB against version %1$s but detected version %2$s.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

opf.dc.identifier = element dc:identifier { opf.id.attr? & datatype.string.nonempty }
opf.dc.title = element dc:title { opf.dc.attlist & datatype.string.nonempty }
opf.dc.language = element dc:language { opf.id.attr? & datatype.languagecode }
opf.dc.language = element dc:language { opf.id.attr? & datatype.string.nonempty }
opf.dc.date = element dc:date { opf.id.attr? & datatype.string.nonempty }
opf.dc.source = element dc:source { opf.dc.attlist & datatype.string.nonempty }
opf.dc.type = element dc:type { opf.id.attr? & datatype.string.nonempty }
Expand Down Expand Up @@ -129,5 +129,5 @@
opf.href.attr = attribute href { datatype.URI }
opf.id.attr = attribute id { datatype.ID }
opf.i18n.attrs = opf.xml.lang.attr? & opf.dir.attr?
opf.xml.lang.attr = attribute xml:lang { "" | datatype.languagecode }
opf.xml.lang.attr = attribute xml:lang { "" | datatype.string.nonempty }
opf.dir.attr = attribute dir { 'ltr' | 'rtl' }
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<metadata>
<dc:title>Title</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="uid">NOID</dc:identifier>
<meta property="dcterms:modified">2019-01-01T12:00:00Z</meta>
<dc:creator xml:lang="a-value">Jane Doe</dc:creator>
</metadata>
<manifest>
<item id="t001" href="contents.xhtml" properties="nav" media-type="application/xhtml+xml"/>
</manifest>
<spine>
<itemref idref="t001"/>
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<metadata>
<dc:title>Title</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="uid">NOID</dc:identifier>
<meta property="dcterms:modified">2019-01-01T12:00:00Z</meta>
<dc:creator xml:lang=" en ">Jane Doe</dc:creator>
</metadata>
<manifest>
<item id="t001" href="contents.xhtml" properties="nav" media-type="application/xhtml+xml"/>
</manifest>
<spine>
<itemref idref="t001"/>
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<metadata>
<dc:title>Title</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="uid">NOID</dc:identifier>
<meta property="dcterms:modified">2019-01-01T12:00:00Z</meta>
<link hreflang="a-value" rel="alternate" href="https://example.org/package.json" media-type="application/json-ld"/>
</metadata>
<manifest>
<item id="t001" href="contents.xhtml" properties="nav" media-type="application/xhtml+xml"/>
</manifest>
<spine>
<itemref idref="t001"/>
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<metadata>
<dc:title>Title</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="uid">NOID</dc:identifier>
<meta property="dcterms:modified">2019-01-01T12:00:00Z</meta>
<link hreflang=" en " rel="alternate" href="https://example.org/package.json" media-type="application/json-ld"/>
</metadata>
<manifest>
<item id="t001" href="contents.xhtml" properties="nav" media-type="application/xhtml+xml"/>
</manifest>
<spine>
<itemref idref="t001"/>
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<metadata>
<dc:title>Title</dc:title>
<dc:language> </dc:language>
<dc:identifier id="uid">NOID</dc:identifier>
<meta property="dcterms:modified">2019-01-01T12:00:00Z</meta>
</metadata>
<manifest>
<item id="t001" href="contents.xhtml" properties="nav" media-type="application/xhtml+xml"/>
</manifest>
<spine>
<itemref idref="t001"/>
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<metadata>
<dc:title>Title</dc:title>
<dc:language>a-value</dc:language>
<dc:identifier id="uid">NOID</dc:identifier>
<meta property="dcterms:modified">2019-01-01T12:00:00Z</meta>
</metadata>
<manifest>
<item id="t001" href="contents.xhtml" properties="nav" media-type="application/xhtml+xml"/>
</manifest>
<spine>
<itemref idref="t001"/>
</spine>
</package>
33 changes: 32 additions & 1 deletion src/test/resources/epub3/package-document.feature
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ Feature: EPUB 3 ▸ Packages ▸ Package Document Checks
Scenario: the 'xml:lang' attribute can be empty
When checking file 'attr-lang-empty-valid.opf'
Then no other errors or warnings are reported

Scenario: the 'xml:lang' language tag must not have leading/trailing whitespace
When checking file 'attr-lang-whitespace-error.opf'
Then error OPF-092 is reported
And no other errors or warnings are reported

Scenario: the 'xml:lang' language tag must be well-formed
When checking file 'attr-lang-not-well-formed-error.opf'
Then error OPF-092 is reported
And no other errors or warnings are reported

## 3.4.3 Metadata
### 3.4.3 The metadata element
Expand All @@ -106,7 +116,18 @@ Feature: EPUB 3 ▸ Packages ▸ Package Document Checks
When checking file 'metadata-identifier-uuid-invalid-warning.opf'
Then warning OPF-085 is reported
And no other errors or warnings are reported


Scenario: 'dc:language' must not be empty
When checking file 'metadata-language-empty-error.opf'
Then error RSC-005 is reported
And the message contains "must be a string with length at least 1"
And no other errors or warnings are reported

Scenario: 'dc:language' must be well-formed
When checking file 'metadata-language-not-well-formed-error.opf'
Then error OPF-092 is reported
And no other errors or warnings are reported

Scenario: 'dc:modified' must be defined
When checking file 'metadata-modified-missing-error.opf'
Then error RSC-005 is reported
Expand Down Expand Up @@ -245,6 +266,16 @@ Feature: EPUB 3 ▸ Packages ▸ Package Document Checks
Scenario: the 'link' 'hreflang' attribute can be empty
When checking file 'link-hreflang-empty-valid.opf'
Then no other errors or warnings are reported

Scenario: the 'link' 'hreflang' language tag must not have leading/trailing whitespace
When checking file 'link-hreflang-whitespace-error.opf'
Then error OPF-092 is reported
And no other errors or warnings are reported

Scenario: the 'link' 'hreflang' language tag must be well-formed
When checking file 'link-hreflang-not-well-formed-error.opf'
Then error OPF-092 is reported
And no other errors or warnings are reported

### 3.4.4 Manifest

Expand Down