diff --git a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java index aa896790a..89c5344e1 100644 --- a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java +++ b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java @@ -145,6 +145,7 @@ private void initialize() severities.put(MessageId.HTM_059, Severity.ERROR); severities.put(MessageId.HTM_060a, Severity.USAGE); severities.put(MessageId.HTM_060b, Severity.USAGE); + severities.put(MessageId.HTM_061, Severity.ERROR); // Media severities.put(MessageId.MED_001, Severity.SUPPRESSED); diff --git a/src/main/java/com/adobe/epubcheck/messages/MessageId.java b/src/main/java/com/adobe/epubcheck/messages/MessageId.java index c2b2ab7b8..41befa2ba 100644 --- a/src/main/java/com/adobe/epubcheck/messages/MessageId.java +++ b/src/main/java/com/adobe/epubcheck/messages/MessageId.java @@ -139,6 +139,7 @@ public enum MessageId implements Comparable HTM_059("HTM_059"), HTM_060a("HTM_060a"), HTM_060b("HTM_060b"), + HTM_061("HTM_061"), // Messages associated with media (images, audio and video) MED_001("MED-001"), diff --git a/src/main/java/com/adobe/epubcheck/xml/HTMLUtils.java b/src/main/java/com/adobe/epubcheck/xml/HTMLUtils.java index ee0cd543c..5beda2b6f 100644 --- a/src/main/java/com/adobe/epubcheck/xml/HTMLUtils.java +++ b/src/main/java/com/adobe/epubcheck/xml/HTMLUtils.java @@ -5,6 +5,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; +import net.sf.saxon.om.NameChecker; + /** * Utilities for HTML-specific logic. * @@ -44,7 +46,7 @@ public static boolean isCustomNamespace(String namespace) * attributes. * * @param name - * the name of an attribute defined in the HTML specification + * the name of an attribute defined in the HTML specification * @return true iff the attribute value is case-insensitive */ public static boolean isCaseInsensitiveAttribute(String namespace, String name) @@ -57,6 +59,25 @@ public static boolean isDataAttribute(String namespace, String name) return namespace.isEmpty() && name.startsWith("data-"); } + /** + * Tells if a string is a valid + * custom data attribute, as + * defined in HTML. + * + * @param name + * the data attribute to test + * @return true if {@code name} is a valid custom data attribute + */ + public static boolean isValidDataAttribute(String name) + { + Preconditions.checkArgument(name != null && name.startsWith("data-")); + name = name.substring(5); + return !name.isEmpty() + && NameChecker.isValidNCName(name) + && !name.matches(".*[A-Z].*"); + } + private HTMLUtils() { // Not instanciable. diff --git a/src/main/java/com/adobe/epubcheck/xml/handlers/PreprocessingDefaultHandler.java b/src/main/java/com/adobe/epubcheck/xml/handlers/PreprocessingDefaultHandler.java index 0fdc8f96d..d739315be 100644 --- a/src/main/java/com/adobe/epubcheck/xml/handlers/PreprocessingDefaultHandler.java +++ b/src/main/java/com/adobe/epubcheck/xml/handlers/PreprocessingDefaultHandler.java @@ -86,6 +86,11 @@ private Attributes preprocessAttributes(String elemNamespace, Attributes atts) // Remove data-* attributes in both XHTML and SVG if (HTMLUtils.isDataAttribute(namespace, name)) { + if (!HTMLUtils.isValidDataAttribute(name)) + { + context.report.message(MessageId.HTM_061, LocationHandler.location(context, locator), + name); + } attributes.removeAttribute(i); } // Remove custom namespace attributes in XHTML diff --git a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties index 973446a74..7e1b5df02 100644 --- a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties +++ b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties @@ -64,6 +64,7 @@ HTM_058=HTML documents must be encoded in UTF-8, but UTF-16 was detected. HTM_059=Viewport "%1$s" property must not be defined more than once, but found values [%2$s]. HTM_060a=EPUB reading systems must ignore secondary viewport meta elements in fixed-layout documents; viewport declaration "%1$s" will be ignored. HTM_060b=EPUB reading systems must ignore viewport meta elements in reflowable documents; viewport declaration "%1$s" will be ignored. +HTM_061="%1$s" is not a valid custom data attribute (it must have at least one character after the hyphen, be XML-compatible, and not contain ASCII uppercase letters). #media MED_003=Picture "img" elements must reference core media type resources, but found resource "%1$s" of type "%2$s". diff --git a/src/test/resources/epub3/06-content-document/content-document-xhtml.feature b/src/test/resources/epub3/06-content-document/content-document-xhtml.feature index 535ca7e9b..e9456c732 100644 --- a/src/test/resources/epub3/06-content-document/content-document-xhtml.feature +++ b/src/test/resources/epub3/06-content-document/content-document-xhtml.feature @@ -227,6 +227,11 @@ Feature: EPUB 3 — Content Documents — XHTML When checking document 'data-attr-valid.xhtml' Then no errors or warnings are reported + Scenario: Report invalid `data-*` attributes + When checking document 'data-attr-invalid-error.xhtml' + Then error HTM-061 is reported 3 times + And no other errors or warnings are reported + Scenario: Report invalid elements after a `data-*` attribute See issue 189 - was allowed by stripping of `data-*` attributes When checking EPUB 'content-xhtml-data-attr-removal-markup-error' diff --git a/src/test/resources/epub3/06-content-document/files/data-attr-invalid-error.xhtml b/src/test/resources/epub3/06-content-document/files/data-attr-invalid-error.xhtml new file mode 100644 index 000000000..98191537f --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/data-attr-invalid-error.xhtml @@ -0,0 +1,12 @@ + + + + + data-* attributes + + +
invalid (no character after hte hyphen)
+
invalid (not an XML name)
+
invalid (contains upper alphas)
+ +