Skip to content

Commit

Permalink
feat: improve fallback detection check
Browse files Browse the repository at this point in the history
This commit updates fallback-related checks and refactors the code used
for checking references to publication resources.

The following checks are introduced or updated:

- `RSC-032` (new): reports foreign resources with no fallback used in
  content documents. Replaces `MED-001`, `MED-002`, and `CSS-010`
- `MED-003` now reports when an `img` element child of a `picture` element
  is not a core media type.
- `MED-001` is suppressed. It was used to report a video `poster` attribute
  did not reference a core image media type. It is now reported as `RSC-032`.
- `MED-002` is suppressed. It was used to report HTML elements referencing
  foreign resources without fallback. It is now reported as `RSC-032`.
- `CSS-010` is suppressed. It was used to report references to foreign
  stylesheets with no fallback. It is now reported as `RSC-032`.
- `OPF_040` is now reported when the `fallback` attribute of a package
  document `item` element does not points to an existing ID. This was
  previously implemented as a Schematron check (`RSC-005`) in EPUB 3.x.
- `OPF-013` is now a warning. It is reported when a MIME type declared
  inline in content (for instance with an HTML `type` attribute) does
  not match the MIME type declared in the package document.

The code is refactored as follows:
- the reference/resource registry functionality of the `XRefChecker`
  class is extracted to new top-level classes in the (new) package
  `org.w3c.epubcheck.references`.
  - `Resource` represents a publication resource
  - `ResourceRegistry` is a registry of `Resource` instances
  - `Reference` represents a reference (URL) used anywhere in content
  - `ReferenceRegistry` is a registry of `Reference` instances
  - `XRefChecker` is renamed to `ResourceReferencesChecker`.
  - `ValidationContext` contains optional references to `ResourceRegistry`
    and `ReferenceRegistry`.
  - fallback chain resolution is now done in a new `FallbackChainResolver`
    class, when building `OPFItem` instances from the builders created
    when parsing the package document.
  - `XMLHandler` has convenience methods used to register references
    to the `ReferenceRegistry`.

Fix #1304, Fix #1298.
  • Loading branch information
rdeltour committed Nov 28, 2022
1 parent fdf9b22 commit 545b7f7
Show file tree
Hide file tree
Showing 351 changed files with 3,065 additions and 2,960 deletions.
566 changes: 0 additions & 566 deletions OCFCheckerCopy.java

This file was deleted.

608 changes: 0 additions & 608 deletions XRefChecker_copy.java

This file was deleted.

2 changes: 1 addition & 1 deletion src/main/java/com/adobe/epubcheck/api/EpubCheck.java
Expand Up @@ -32,7 +32,7 @@

import org.w3c.epubcheck.constants.MIMEType;
import org.w3c.epubcheck.core.Checker;
import org.w3c.epubcheck.url.URLUtils;
import org.w3c.epubcheck.util.url.URLUtils;

import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.ocf.OCFChecker;
Expand Down
22 changes: 10 additions & 12 deletions src/main/java/com/adobe/epubcheck/css/CSSHandler.java
Expand Up @@ -19,16 +19,15 @@
import org.idpf.epubcheck.util.css.CssGrammar.CssSelector;
import org.idpf.epubcheck.util.css.CssGrammar.CssURI;
import org.idpf.epubcheck.util.css.CssLocation;
import org.w3c.epubcheck.url.URLChecker;
import org.w3c.epubcheck.core.references.URLChecker;
import org.w3c.epubcheck.core.references.Reference;

import com.adobe.epubcheck.api.EPUBLocation;
import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.opf.OPFChecker;
import com.adobe.epubcheck.opf.OPFChecker30;
import com.adobe.epubcheck.opf.ValidationContext;
import com.adobe.epubcheck.opf.XRefChecker;
import com.adobe.epubcheck.opf.XRefChecker.Type;
import com.adobe.epubcheck.util.EPUBVersion;
import com.adobe.epubcheck.util.FeatureEnum;
import com.adobe.epubcheck.vocab.PackageVocabs;
Expand All @@ -42,7 +41,6 @@
public class CSSHandler implements CssContentHandler, CssErrorHandler
{
final ValidationContext context;
final XRefChecker xrefChecker;
final Report report;
final EPUBVersion version;
int startingLineNumber = 0; // append to line info from css parser
Expand All @@ -69,7 +67,6 @@ public class CSSHandler implements CssContentHandler, CssErrorHandler
public CSSHandler(ValidationContext context)
{
this.context = context;
this.xrefChecker = context.xrefChecker.orNull();
this.report = context.report;
this.version = context.version;
this.urlChecker = new URLChecker(context);
Expand Down Expand Up @@ -159,7 +156,7 @@ else if (uriOrString.getType() == CssConstruct.Type.STRING)
}
if (uri != null)
{
resolveAndRegister(uri, line, col, atRule.toCssString(), Type.GENERIC);
resolveAndRegister(uri, line, col, atRule.toCssString(), Reference.Type.GENERIC);
}
}
}
Expand Down Expand Up @@ -322,10 +319,10 @@ else if (propertyName.equals("src"))
if (construct.getType() == CssConstruct.Type.URI)
{
URL fontURL = parsedURLs.get(((CssURI) construct).toUriString());
if (fontURL != null)
if (fontURL != null && context.resourceRegistry.isPresent())
{
// check font mimetypes
String fontMimeType = xrefChecker.getMimeType(fontURL);
String fontMimeType = context.getMimeType(fontURL);
if (fontMimeType != null)
{
boolean blessed = true;
Expand Down Expand Up @@ -367,12 +364,13 @@ private void registerURIs(List<CssConstruct> constructs, int line, int col)
if (construct.getType() == CssConstruct.Type.URI)
{
resolveAndRegister(((CssURI) construct).toUriString(), line, col, construct.toCssString(),
inFontFace ? Type.FONT : Type.GENERIC);
inFontFace ? Reference.Type.FONT : Reference.Type.GENERIC);
}
}
}

private void resolveAndRegister(String uriString, int line, int col, String cssContext, Type type)
private void resolveAndRegister(String uriString, int line, int col, String cssContext,
Reference.Type type)
{
if (uriString != null && uriString.trim().length() > 0)
{
Expand All @@ -386,9 +384,9 @@ private void resolveAndRegister(String uriString, int line, int col, String cssC
URL url = urlChecker.checkURL(uriString, getCorrectedEPUBLocation(line, col, cssContext));
parsedURLs.put(uriString, url);

if (url != null)
if (url != null && context.referenceRegistry.isPresent())
{
xrefChecker.registerReference(url, type, getCorrectedEPUBLocation(line, col, cssContext));
context.referenceRegistry.get().registerReference(url, type, getCorrectedEPUBLocation(line, col, cssContext));
if (context.isRemote(url))
{
detectedProperties.add(ITEM_PROPERTIES.REMOTE_RESOURCES);
Expand Down
@@ -1,7 +1,8 @@
package com.adobe.epubcheck.dict;

import org.w3c.epubcheck.core.references.Reference;

import com.adobe.epubcheck.opf.ValidationContext;
import com.adobe.epubcheck.opf.XRefChecker.Type;
import com.adobe.epubcheck.xml.handlers.XMLHandler;
import com.adobe.epubcheck.xml.model.XMLElement;

Expand Down Expand Up @@ -39,10 +40,7 @@ else if ("match".equals(name))
private void processRef()
{
URL ref = checkURL(currentElement().getAttribute("href"));
if (ref != null && context.xrefChecker.isPresent())
{
context.xrefChecker.get().registerReference(ref, Type.SEARCH_KEY, location());
}
registerReference(ref, Reference.Type.SEARCH_KEY);
}

}
22 changes: 13 additions & 9 deletions src/main/java/com/adobe/epubcheck/dtbook/DTBookHandler.java
Expand Up @@ -22,9 +22,10 @@

package com.adobe.epubcheck.dtbook;

import org.w3c.epubcheck.core.references.Reference;

import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.opf.ValidationContext;
import com.adobe.epubcheck.opf.XRefChecker;
import com.adobe.epubcheck.util.FeatureEnum;
import com.adobe.epubcheck.util.URISchemes;
import com.adobe.epubcheck.xml.handlers.XMLHandler;
Expand All @@ -34,12 +35,10 @@

public class DTBookHandler extends XMLHandler
{
private final XRefChecker xrefChecker;

public DTBookHandler(ValidationContext context)
{
super(context);
this.xrefChecker = context.xrefChecker.get();
}

@Override
Expand All @@ -51,11 +50,15 @@ public void startElement()
if (ns.equals("http://www.daisy.org/z3986/2005/dtbook/"))
{
// Register IDs
xrefChecker.registerID(e.getAttribute("id"), XRefChecker.Type.GENERIC, location());
if (context.resourceRegistry.isPresent())
{
context.resourceRegistry.get().registerID(e.getAttribute("id"), Reference.Type.GENERIC,
location().url);
}

// Check cross-references (link@href | a@href | img@src)
URL url = null;
XRefChecker.Type type = XRefChecker.Type.GENERIC;
Reference.Type type = Reference.Type.GENERIC;
/*
* This section checks to see if the references used are registered
* schema-types and whether they point to external resources. The
Expand All @@ -68,8 +71,9 @@ public void startElement()

if (url != null && "true".equals(e.getAttribute("external")))
{
//FIXME 2022 check that external attribute is set for remote URLs
if (context.isRemote(url)) {
// FIXME 2022 check that external attribute is set for remote URLs
if (context.isRemote(url))
{
report.info(path, FeatureEnum.REFERENCE, url.toString());
if (!URISchemes.contains(url.scheme()))
{
Expand All @@ -86,12 +90,12 @@ else if (name.equals("link"))
else if (name.equals("img"))
{
url = checkURL(e.getAttribute("src"));
type = XRefChecker.Type.IMAGE;
type = Reference.Type.IMAGE;
}

if (url != null)
{
xrefChecker.registerReference(url, type, location());
registerReference(url, type);
if (context.isRemote(url))
{
report.info(path, FeatureEnum.REFERENCE, url.toString());
Expand Down
Expand Up @@ -144,8 +144,8 @@ private void initialize()
severities.put(MessageId.HTM_058, Severity.ERROR);

// Media
severities.put(MessageId.MED_001, Severity.ERROR);
severities.put(MessageId.MED_002, Severity.ERROR);
severities.put(MessageId.MED_001, Severity.SUPPRESSED);
severities.put(MessageId.MED_002, Severity.SUPPRESSED);
severities.put(MessageId.MED_003, Severity.ERROR);
severities.put(MessageId.MED_004, Severity.ERROR);
severities.put(MessageId.MED_005, Severity.ERROR);
Expand Down Expand Up @@ -205,7 +205,7 @@ private void initialize()
severities.put(MessageId.OPF_010, Severity.ERROR);
severities.put(MessageId.OPF_011, Severity.ERROR);
severities.put(MessageId.OPF_012, Severity.ERROR);
severities.put(MessageId.OPF_013, Severity.ERROR);
severities.put(MessageId.OPF_013, Severity.WARNING);
severities.put(MessageId.OPF_014, Severity.ERROR);
severities.put(MessageId.OPF_015, Severity.ERROR);
severities.put(MessageId.OPF_016, Severity.ERROR);
Expand Down Expand Up @@ -348,6 +348,7 @@ private void initialize()
severities.put(MessageId.RSC_029, Severity.ERROR);
severities.put(MessageId.RSC_030, Severity.ERROR);
severities.put(MessageId.RSC_031, Severity.WARNING);
severities.put(MessageId.RSC_032, Severity.ERROR);

// Scripting
severities.put(MessageId.SCP_001, Severity.SUPPRESSED); // checking scripts is out of scope
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Expand Up @@ -342,6 +342,7 @@ public enum MessageId implements Comparable<MessageId>
RSC_029("RSC-029"),
RSC_030("RSC-030"),
RSC_031("RSC-031"),
RSC_032("RSC-032"),

// Messages relating to scripting
SCP_001("SCP-001"),
Expand Down
12 changes: 6 additions & 6 deletions src/main/java/com/adobe/epubcheck/nav/NavHandler.java
Expand Up @@ -3,9 +3,10 @@
import java.util.EnumSet;
import java.util.Set;

import org.w3c.epubcheck.core.references.Reference;

import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.opf.ValidationContext;
import com.adobe.epubcheck.opf.XRefChecker;
import com.adobe.epubcheck.ops.OPSHandler30;
import com.adobe.epubcheck.util.EpubConstants;
import com.adobe.epubcheck.util.FeatureEnum;
Expand Down Expand Up @@ -78,12 +79,11 @@ public void startElement()
// cross-reference checker, to be able to check that they are in reading
// order
// after all the Content Documents have been parsed
else if ((NavType.TOC__PAGE_LIST.contains(currentNavType)) && xrefChecker.isPresent())
else if (NavType.TOC__PAGE_LIST.contains(currentNavType))
{
xrefChecker.get().registerReference(url,
(currentNavType == NavType.TOC) ? XRefChecker.Type.NAV_TOC_LINK
: XRefChecker.Type.NAV_PAGELIST_LINK,
location());
registerReference(url,
(currentNavType == NavType.TOC) ? Reference.Type.NAV_TOC_LINK
: Reference.Type.NAV_PAGELIST_LINK);
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions src/main/java/com/adobe/epubcheck/ncx/NCXHandler.java
Expand Up @@ -22,9 +22,10 @@

package com.adobe.epubcheck.ncx;

import org.w3c.epubcheck.core.references.Reference;

import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.opf.ValidationContext;
import com.adobe.epubcheck.opf.XRefChecker;
import com.adobe.epubcheck.util.FeatureEnum;
import com.adobe.epubcheck.xml.handlers.XMLHandler;
import com.adobe.epubcheck.xml.model.XMLElement;
Expand All @@ -33,14 +34,12 @@

public class NCXHandler extends XMLHandler
{
private final XRefChecker xrefChecker;
private static final String TEXT = "text";
String uid;

public NCXHandler(ValidationContext context)
{
super(context);
this.xrefChecker = context.xrefChecker.get();
}

@Override
Expand Down Expand Up @@ -76,7 +75,7 @@ public void startElement()
{
report.info(path, FeatureEnum.REFERENCE, srcURL.toString());
}
xrefChecker.registerReference(srcURL, XRefChecker.Type.HYPERLINK, location());
registerReference(srcURL, Reference.Type.HYPERLINK);
}
}
else if ("meta".equals(name))
Expand Down
4 changes: 0 additions & 4 deletions src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java
Expand Up @@ -48,8 +48,6 @@
import com.adobe.epubcheck.opf.OPFItem;
import com.adobe.epubcheck.opf.ValidationContext;
import com.adobe.epubcheck.opf.ValidationContext.ValidationContextBuilder;
import com.adobe.epubcheck.opf.XRefChecker;
import com.adobe.epubcheck.overlay.OverlayTextChecker;
import com.adobe.epubcheck.util.CheckUtil;
import com.adobe.epubcheck.util.EPUBVersion;
import com.adobe.epubcheck.util.FeatureEnum;
Expand Down Expand Up @@ -170,8 +168,6 @@ public void check()

opfContext.container(container);
opfContext.pubTypes(state.getPublicationTypes(packageDoc));
opfContext.xrefChecker(new XRefChecker(state.context().build()));
opfContext.overlayTextChecker(new OverlayTextChecker());

Checker opfChecker = CheckerFactory.newChecker(opfContext.build());
assert opfChecker instanceof OPFChecker;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/adobe/epubcheck/ocf/OCFContainer.java
Expand Up @@ -7,7 +7,7 @@
import java.util.Set;
import java.util.UUID;

import org.w3c.epubcheck.url.URLUtils;
import org.w3c.epubcheck.util.url.URLUtils;

import com.adobe.epubcheck.ocf.encryption.EncryptionFilter;
import com.adobe.epubcheck.util.GenericResourceProvider;
Expand Down

0 comments on commit 545b7f7

Please sign in to comment.