w3c
diff --git a/‎src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java
Lines changed: 0 additions & 1 deletion b/‎src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/main/java/com/adobe/epubcheck/messages/MessageId.java
Lines changed: 0 additions & 1 deletion b/‎src/main/java/com/adobe/epubcheck/messages/MessageId.java
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java
Lines changed: 13 additions & 16 deletions b/‎src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java
Lines changed: 13 additions & 16 deletions
diff --git a/‎src/main/java/org/w3c/epubcheck/util/text/UnicodeUtils.java
Lines changed: 39 additions & 0 deletions b/‎src/main/java/org/w3c/epubcheck/util/text/UnicodeUtils.java
Lines changed: 39 additions & 0 deletions
diff --git a/‎src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties
Lines changed: 1 addition & 2 deletions b/‎src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-unicode-normalization-warning.epub renamed to ‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-canonical-normalization-error.epub b/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-unicode-normalization-warning.epub renamed to ‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-canonical-normalization-error.epub
diff --git a/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-case-normalization-error.epub renamed to ‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-common-case-folding-error.epub b/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-case-normalization-error.epub renamed to ‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-common-case-folding-error.epub
diff --git a/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-compatibility-normalization-valid.epub
1.78 KB b/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-compatibility-normalization-valid.epub
1.78 KB
diff --git a/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-full-case-folding-error.epub
1.81 KB b/‎src/test/resources/epub3/04-ocf/files/ocf-filename-duplicate-after-full-case-folding-error.epub
1.81 KB
diff --git a/‎src/test/resources/epub3/04-ocf/ocf.feature
Lines changed: 16 additions & 5 deletions b/‎src/test/resources/epub3/04-ocf/ocf.feature
Lines changed: 16 additions & 5 deletions
@@ -248,7 +248,6 @@ private void initialize()
     severities.put(MessageId.OPF_058, Severity.SUPPRESSED);
     severities.put(MessageId.OPF_059, Severity.SUPPRESSED);
     severities.put(MessageId.OPF_060, Severity.ERROR);
-    severities.put(MessageId.OPF_061, Severity.WARNING);
     severities.put(MessageId.OPF_062, Severity.USAGE);
     severities.put(MessageId.OPF_063, Severity.WARNING);
     severities.put(MessageId.OPF_064, Severity.INFO);
 
@@ -242,7 +242,6 @@ public enum MessageId implements Comparable<MessageId>
   OPF_058("OPF-058"),
   OPF_059("OPF-059"),
   OPF_060("OPF-060"),
-  OPF_061("OPF-061"),
   OPF_062("OPF-062"),
   OPF_063("OPF-063"),
   OPF_064("OPF-064"),
 
@@ -25,18 +25,17 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.text.Normalizer;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 
 import org.w3c.epubcheck.constants.MIMEType;
 import org.w3c.epubcheck.core.AbstractChecker;
 import org.w3c.epubcheck.core.Checker;
 import org.w3c.epubcheck.core.CheckerFactory;
+import org.w3c.epubcheck.util.text.UnicodeUtils;
 
 import com.adobe.epubcheck.api.EPUBLocation;
 import com.adobe.epubcheck.api.EPUBProfile;
@@ -296,18 +295,6 @@ private boolean checkContainerStructure(OCFCheckerState state)
 
         // FIXME 2022 report symbolic links and continue
 
-        // Check duplicate entries
-        if (normalizedPaths.contains(resource.getPath().toLowerCase(Locale.ROOT)))
-        {
-          context.report.message(MessageId.OPF_060, EPUBLocation.of(context), resource.getPath());
-        }
-        // Check duplicate entries after NFC normalization
-        else if (normalizedPaths.contains(
-            Normalizer.normalize(resource.getPath().toLowerCase(Locale.ROOT), Normalizer.Form.NFC)))
-        {
-          context.report.message(MessageId.OPF_061, EPUBLocation.of(context), resource.getPath());
-        }
-
         // Store the resource in the data structure
         if (resource.isDirectory())
         {
@@ -318,9 +305,19 @@ else if (normalizedPaths.contains(
         else
         {
           // The container resource is a file,
-          // sStore its path for later checking of empty directories
+          // store its path for later checking of empty directories
           filePaths.add(resource.getPath());
-          normalizedPaths.add(resource.getPath().toLowerCase(Locale.ROOT));
+
+          // Check duplicate entries
+          String normalizedPath = UnicodeUtils.canonicalCaseFold(resource.getPath());
+          if (normalizedPaths.contains(normalizedPath))
+          {
+            context.report.message(MessageId.OPF_060, EPUBLocation.of(context), resource.getPath());
+          }
+          else
+          {
+            normalizedPaths.add(normalizedPath);
+          }
 
           // Check file name requirements
           new OCFFilenameChecker(resource.getPath(), state.context().build()).check();
 
@@ -0,0 +1,39 @@
+package org.w3c.epubcheck.util.text;
+
+import com.google.common.base.Preconditions;
+import com.ibm.icu.text.CaseMap;
+import com.ibm.icu.text.Normalizer2;
+
+public final class UnicodeUtils
+{
+
+  private static final Normalizer2 NFD_NORMALIZER = Normalizer2.getNFCInstance();
+  private static final CaseMap.Fold CASE_FOLDER = CaseMap.fold();
+
+  private UnicodeUtils()
+  {
+    // static utility class
+  }
+
+  /**
+   * Applies Unicode Canonical Case Fold Normalization as defined in
+   * https://www.w3.org/TR/charmod-norm/#CanonicalFoldNormalizationStep
+   * 
+   * This applies, in sequence: - canonical decomposition (NFD) - case folding
+   * 
+   * Note that the result is **not** recomposed (NFC), i.e. the optional
+   * post-folding NFC normalization is not applied.
+   * 
+   * In other words, the result is suitable for string comparison for
+   * case-insensitive string comparison, but not for display.
+   * 
+   * @param string
+   *          the string to normalize
+   * @return the string normalized by applying NFD then case folding
+   */
+  public static String canonicalCaseFold(String string)
+  {
+    Preconditions.checkArgument(string != null);
+    return CASE_FOLDER.apply(NFD_NORMALIZER.normalize(string));
+  }
+}
@@ -238,8 +238,7 @@ OPF_058=Spine item "%1$s" is not referenced from the TOC in the Nav Doc.
 OPF_058_SUG=Every spine item in the manifest should be referenced by at least one TOC entry in the Nav Doc.
 OPF_059=Spine item "%1$s" is not referenced from the TOC in the NCX.
 OPF_059_SUG=Every spine item in the manifest should be referenced by at least one TOC entry in the NCX file.
-OPF_060=Duplicate entry in the ZIP file: "%1$s".
-OPF_061=Duplicate entry in the ZIP file (after Unicode NFC normalization) "%1$s".
+OPF_060=Duplicate entry in the ZIP file: "%1$s" (file names must be unique after Unicode canonical normalization and full case folding).
 OPF_062=Found Adobe page-map attribute on spine element in opf file.
 OPF_063=Referenced Adobe page-map item "%1$s" was not found in the manifest.
 OPF_064=OPF declares type "%1$s", validating using profile "%2$s".
 
@@ -29,17 +29,28 @@ Feature: EPUB 3 — Open Container Format
     Then no errors or warnings are reported
 
   @spec @xref:sec-container-filenames
-  Scenario: Report a duplicate filename if two files only differ by case
-    When checking EPUB 'ocf-filename-duplicate-after-case-normalization-error.epub'
+  Scenario: Report a duplicate filename after common case folding
+    When checking EPUB 'ocf-filename-duplicate-after-common-case-folding-error.epub'
     Then error OPF-060 is reported
     And no other errors or warnings are reported
 
   @spec @xref:sec-container-filenames
-  Scenario: Report a duplicate filename if two files have the same name after Unicode normalization
-    When checking EPUB 'ocf-filename-duplicate-after-unicode-normalization-warning.epub'
-    Then warning OPF-061 is reported
+  Scenario: Report a duplicate filename after full case folding
+    When checking EPUB 'ocf-filename-duplicate-after-full-case-folding-error.epub'
+    Then error OPF-060 is reported
+    And no other errors or warnings are reported
+
+  @spec @xref:sec-container-filenames
+  Scenario: Report a duplicate filename after Unicode canonical normalization (NFC)
+    When checking EPUB 'ocf-filename-duplicate-after-canonical-normalization-error.epub'
+    Then error OPF-060 is reported
     And no other errors or warnings are reported
 
+  @spec @xref:sec-container-filenames
+  Scenario: Allow a duplicate filename after Unicode compatibility normalization (NFKC)
+    When checking EPUB 'ocf-filename-duplicate-after-compatibility-normalization-valid.epub'
+    Then no other errors or warnings are reported
+
   @spec @xref:sec-container-filenames
   Scenario: Allow Unicode emoji tag set in file name
     When checking EPUB 'ocf-filename-character-emoji-tag-sequence-valid'