CLDR-14824 Generate new charts, and make any necessary fixes. (#1315)

* CLDR-14824 Generate new charts, and make any necessary fixes. * CLDR-14824 fix problems in build (puts the odd-ball untranslated units in one place also)
unicode-org · Jun 12, 2021 · 4c02213 · 4c02213
1 parent fecc3de
commit 4c02213
Show file tree

Hide file tree

Showing 9 changed files with 198 additions and 28 deletions.
diff --git a/common/main/en.xml b/common/main/en.xml
@@ -6236,6 +6236,11 @@ annotations.
 				<unitPattern count="one">{0} mole</unitPattern>
 				<unitPattern count="other">{0} moles</unitPattern>
 			</unit>
+			<unit type="concentr-item">
+				<displayName>items</displayName>
+				<unitPattern count="one">{0} item</unitPattern>
+				<unitPattern count="other">{0} items</unitPattern>
+			</unit>
 			<unit type="consumption-liter-per-kilometer">
 				<displayName>liters per kilometer</displayName>
 				<unitPattern count="one">{0} liter per kilometer</unitPattern>
@@ -7178,6 +7183,11 @@ annotations.
 				<unitPattern count="one">{0} mol</unitPattern>
 				<unitPattern count="other">{0} mol</unitPattern>
 			</unit>
+			<unit type="concentr-item">
+				<displayName>item</displayName>
+				<unitPattern count="one">{0} item</unitPattern>
+				<unitPattern count="other">{0} item</unitPattern>
+			</unit>
 			<unit type="consumption-liter-per-kilometer">
 				<displayName>liters/km</displayName>
 				<unitPattern count="one">{0} L/km</unitPattern>
@@ -8047,6 +8057,11 @@ annotations.
 				<unitPattern count="one">{0}mol</unitPattern>
 				<unitPattern count="other">{0}mol</unitPattern>
 			</unit>
+			<unit type="concentr-item">
+				<displayName>item</displayName>
+				<unitPattern count="one">{0}item</unitPattern>
+				<unitPattern count="other">{0}item</unitPattern>
+			</unit>
 			<unit type="consumption-liter-per-kilometer">
 				<displayName>L/km</displayName>
 				<unitPattern count="one">{0}L/km</unitPattern>

diff --git a/common/main/root.xml b/common/main/root.xml
@@ -4475,6 +4475,10 @@ for derived annotations.
 				<displayName>mol</displayName>
 				<unitPattern count="other">{0} mol</unitPattern>
 			</unit>
+			<unit type="concentr-item">
+				<displayName>item</displayName>
+				<unitPattern count="other">{0} item</unitPattern>
+			</unit>
 			<unit type="consumption-liter-per-kilometer">
 				<displayName>L/km</displayName>
 				<unitPattern count="other">{0} L/km</unitPattern>

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartGrammaticalForms.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartGrammaticalForms.java
@@ -318,9 +318,9 @@ public void writeSubcharts(Anchors anchors) throws IOException {
         }
         unitToBestUnit = ImmutableMap.copyOf(unitToBestUnit);
         // quick check
-        final BestUnitForGender u1 = unitToBestUnit.get("meter");
-        final BestUnitForGender u2 = unitToBestUnit.get("square-centimeter");
-        int comp = u1.compareTo(u2); // should be less
+//        final BestUnitForGender u1 = unitToBestUnit.get("meter");
+//        final BestUnitForGender u2 = unitToBestUnit.get("square-centimeter");
+//        int comp = u1.compareTo(u2); // should be less
 
         Set<BestUnitForGender> sorted2 = new TreeSet<>(unitToBestUnit.values());
         System.out.println(sorted2);
@@ -400,8 +400,13 @@ public void writeSubcharts(Anchors anchors) throws IOException {
                     String quantity = shortUnit.contentEquals("generic") ? "temperature" : uc.getQuantityFromUnit(shortUnit, false);
 
                     String gender = UnitPathType.gender.getTrans(cldrFile, "long", shortUnit, null, null, null, null);
+                    gender = gender == null ? "n/a" : gender;
                     Set<String> systems = uc.getSystems(shortUnit);
 
+                    if (unitCell == null || quantity == null || gender == null || sizeInBaseUnits.value == null) {
+                        throw new IllegalArgumentException("No best base unit for: " + shortUnit);
+                    }
+
                     for (String case1 : sortedCases) { //
                         // start a row, then add the cells in the row.
                         caseTablePrinter
@@ -455,7 +460,7 @@ public void writeSubcharts(Anchors anchors) throws IOException {
                     final String shortUnit = uc.getShortId(longUnit);
                     String gender = UnitPathType.gender.getTrans(cldrFile, "long", shortUnit, null, null, null, null);
                     final BestUnitForGender bestUnit = unitToBestUnit.get(shortUnit);
-                    if (bestUnit != null) {
+                    if (gender != null && bestUnit != null) {
                         bestUnitForGender.put(gender, bestUnit);
                     }
                 }
@@ -615,7 +620,7 @@ public static String getBestBaseUnit(UnitConverter uc, final String shortUnit, O
                     final UnitId unitId = uc.createUnitId(bestUnit);
                     unitPattern = unitId.toString(ENGLISH, "long", pluralCategory, null, null, false);
                     if (unitPattern == null) {
-                        int debug = 0;
+                        return null;
                     }
                 }
                 String unitMeasure = MessageFormat.format(unitPattern, string.contains("/") ? "~" + bestDoubleFactor : string);

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateAllCharts.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateAllCharts.java
@@ -21,12 +21,12 @@ public static void main(String[] args) throws Exception {
         FileCopier.copy(GenerateAllCharts.class, "main-index.html", CLDRPaths.CHART_DIRECTORY, "index.html");
         FormattedFileWriter.copyIncludeHtmls(CLDRPaths.CHART_DIRECTORY);
 
-        ShowLanguages.main(args);
-
         if (ToolConstants.CHART_VERSION.compareTo("37") >= 0) {
             new ChartGrammaticalForms().writeChart(null);
         }
 
+        ShowLanguages.main(args);
+
         new ChartAnnotations().writeChart(null);
         new ChartSubdivisionNames().writeChart(null);
         GenerateBcp47Text.main(args);

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ListGrammarInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ListGrammarInfo.java
@@ -0,0 +1,73 @@
+package org.unicode.cldr.tool;
+
+import java.util.Collection;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.unicode.cldr.util.CLDRConfig;
+import org.unicode.cldr.util.CLDRFile;
+import org.unicode.cldr.util.GrammarInfo;
+import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
+import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
+import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
+import org.unicode.cldr.util.LanguageTagParser;
+import org.unicode.cldr.util.SupplementalDataInfo;
+
+import com.google.common.base.Joiner;
+
+public class ListGrammarInfo {
+    public static final CLDRConfig CONFIG = CLDRConfig.getInstance();
+    public static final SupplementalDataInfo SDI = CONFIG.getSupplementalDataInfo();
+    public static final CLDRFile english = CONFIG.getEnglish();
+    public static void main(String[] args) {
+        Set<String> locales = GrammarInfo.getGrammarLocales();
+        LanguageTagParser ltp = new LanguageTagParser();
+        Set<String> sortedGenderLocales = new TreeSet<>();
+        Set<String> sortedCaseLocales = new TreeSet<>();
+        Set<String> sortedBothLocales = new TreeSet<>();
+
+        for (String locale : locales) {
+            if (locale.equals("root")) {
+                continue;
+            }
+            ltp.set(locale);
+            String region = ltp.getRegion();
+            if (!region.isEmpty()) {
+                continue;
+            }
+            GrammarInfo grammarInfo = SDI.getGrammarInfo(locale, true);
+            if (grammarInfo == null || !grammarInfo.hasInfo(GrammaticalTarget.nominal)) {
+                continue;
+            }
+            //CLDRFile cldrFile = factory.make(locale, true);
+
+            Collection<String> genders = grammarInfo.get(GrammaticalTarget.nominal, GrammaticalFeature.grammaticalGender, GrammaticalScope.units);
+            Collection<String> rawCases = grammarInfo.get(GrammaticalTarget.nominal, GrammaticalFeature.grammaticalCase, GrammaticalScope.units);
+
+            boolean hasGender = genders != null && genders.size() > 1;
+            boolean hasCase = rawCases != null && rawCases.size() > 1;
+
+            if (hasGender) {
+                if (hasCase) {
+                    sortedBothLocales.add(format(locale, genders, rawCases));
+                } else {
+                    sortedGenderLocales.add(format(locale, genders));
+                }
+            } else if (hasCase) {
+                sortedCaseLocales.add(format(locale, rawCases));
+            }
+        }
+        System.out.println("Gender\t" + Joiner.on(", ").join(sortedGenderLocales));
+        System.out.println("Case\t" + Joiner.on(", ").join(sortedCaseLocales));
+        System.out.println("Gender & Case\t" + Joiner.on(", ").join(sortedBothLocales));
+    }
+
+    private static String format(String locale, Collection<String> genders, Collection<String> rawCases) {
+        return english.getName(locale) + " (" + locale + "/" + genders.size() + "×" + rawCases.size() + ")";
+    }
+
+    public static String format(String locale, Collection<String> genders) {
+        return english.getName(locale) + " (" + locale + "/" + genders.size() + ")";
+    }
+}
+
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java
@@ -36,16 +36,17 @@ public class GrammarInfo implements Freezable<GrammarInfo>{
     public enum GrammaticalTarget {nominal}
 
     /**
-     * The ordering of these values is intended to put the default values first, and to group values together that tend to have similar forms.
+     * The ordering of these values is intended to put the default values first, and to group values together that tend to have similar forms for the most common cases,
+     * then have the rest in alphabetical order.
      */
-    public enum CaseValues {nominative, vocative, accusative, oblique, genitive, dative, locative, instrumental, prepositional,
-        ablative, adessive, allative, causal, delative, elative, essive, illative, inessive, sublative, superessive, terminative, translative;
+    public enum CaseValues {nominative, vocative, accusative, oblique, genitive, dative, locative, instrumental, prepositional, ablative,
+        abessive, adessive, allative, causal, comitative, delative, elative, ergative, essive, illative, inessive, locativecopulative, partitive, sociative, sublative, superessive, terminative, translative;
         public static Comparator<String> COMPARATOR = EnumComparator.create(CaseValues.class);
     }
     public enum GenderValues {neuter, masculine, inanimate, animate, common, personal, feminine;
         public static Comparator<String> COMPARATOR = EnumComparator.create(GenderValues.class);
     }
-    public enum DefinitenessValues {indefinite, definite, construct;
+    public enum DefinitenessValues {unspecified, indefinite, definite, construct;
         public static Comparator<String> COMPARATOR = EnumComparator.create(DefinitenessValues.class);
     }
     public enum PluralValues {zero, one, two, few, many, other;
@@ -143,11 +144,27 @@ public void add(GrammaticalTarget target, GrammaticalFeature feature, Grammatica
             if (values == null) {
                 usageToValues.put(usage, values = new TreeSet<>());
             }
+            validate(feature, valueSet);
             values.addAll(valueSet);
         }
     }
 
 
+    private void validate(GrammaticalFeature feature, Collection<String> valueSet) {
+        for (String value : valueSet) {
+            validate(feature, value);
+        }
+    }
+
+    private void validate(GrammaticalFeature feature, String value) {
+        switch (feature) {
+        case grammaticalCase: CaseValues.valueOf(value); break;
+        case grammaticalDefiniteness: DefinitenessValues.valueOf(value); break;
+        case grammaticalGender: GenderValues.valueOf(value); break;
+        case grammaticalNumber: PluralValues.valueOf(value); break;
+        }
+    }
+
     /**
      * Note: when there is known to be no features, the featureRaw will be null
      * Only internal */
@@ -484,12 +501,12 @@ private void getSourceCaseAndPluralPolish(String gender, String value,
     static class GrammarLocales {
         static final Set<String> data = ImmutableSortedSet.copyOf(ImmutableSet.<String>builder()
             .addAll(
-            CLDRConfig.getInstance().getSupplementalDataInfo()
-            .getLocalesWithFeatures(GrammaticalTarget.nominal, GrammaticalScope.units, GrammaticalFeature.grammaticalCase))
+                CLDRConfig.getInstance().getSupplementalDataInfo()
+                .getLocalesWithFeatures(GrammaticalTarget.nominal, GrammaticalScope.units, GrammaticalFeature.grammaticalCase))
             .addAll(
-            CLDRConfig.getInstance().getSupplementalDataInfo()
-            .getLocalesWithFeatures(GrammaticalTarget.nominal, GrammaticalScope.units, GrammaticalFeature.grammaticalGender)
-            ).build());
+                CLDRConfig.getInstance().getSupplementalDataInfo()
+                .getLocalesWithFeatures(GrammaticalTarget.nominal, GrammaticalScope.units, GrammaticalFeature.grammaticalGender)
+                ).build());
     }
 
     /**

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java
@@ -10,6 +10,7 @@
 import java.util.EnumSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -53,7 +54,20 @@ public class UnitConverter implements Freezable<UnitConverter> {
     static final Splitter BAR_SPLITTER = Splitter.on('-');
     static final Splitter SPACE_SPLITTER = Splitter.on(' ').trimResults().omitEmptyStrings();
 
-    public static final Set<String> HACK_SKIP_UNIT_NAMES = ImmutableSet.of("dot-per-centimeter", "dot-per-inch", "liter-per-100-kilometer", "millimeter-ofhg", "inch-ofhg");
+    public static final Set<String> UNTRANSLATED_UNIT_NAMES = ImmutableSet.of(
+        "portion",
+        "ofglucose",
+        "100-kilometer",
+        "ofhg");
+
+    public static final Set<String> HACK_SKIP_UNIT_NAMES = ImmutableSet.of(
+        // skip dot because pixel is preferred
+        "dot-per-centimeter",
+        "dot-per-inch",
+        // skip because a component is not translated
+        "liter-per-100-kilometer",
+        "millimeter-ofhg",
+        "inch-ofhg");
 
 
     final RationalParser rationalParser;
@@ -1543,10 +1557,32 @@ public String getLongId(String shortUnitId) {
         return CldrUtility.ifNull(SHORT_TO_LONG_ID.get(shortUnitId), shortUnitId);
     }
 
+    public Set<String> getLongIds(Iterable<String> shortUnitIds) {
+        LinkedHashSet<String> result = new LinkedHashSet<>();
+        for (String longUnitId : shortUnitIds) {
+            String shortId = SHORT_TO_LONG_ID.get(longUnitId);
+            if (shortId != null) {
+                result.add(shortId);
+            }
+        }
+        return ImmutableSet.copyOf(result);
+    }
+
     public String getShortId(String longUnitId) {
         return CldrUtility.ifNull(SHORT_TO_LONG_ID.inverse().get(longUnitId), longUnitId);
     }
 
+    public Set<String> getShortIds(Iterable<String> longUnitIds) {
+        LinkedHashSet<String> result = new LinkedHashSet<>();
+        for (String longUnitId : longUnitIds) {
+            String shortId = SHORT_TO_LONG_ID.inverse().get(longUnitId);
+            if (shortId != null) {
+                result.add(shortId);
+            }
+        }
+        return ImmutableSet.copyOf(result);
+    }
+
     public Multimap<String, Continuation> getContinuations() {
         return continuations;
     }

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/ValuePathStatus.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/ValuePathStatus.java
@@ -45,6 +45,8 @@ static int countZeros(String otherValue) {
         return result;
     }
 
+    static final UnicodeSet ASCII_DIGITS = new UnicodeSet("[0-9]");
+
     public static boolean isMissingOk(CLDRFile sourceFile, String path, boolean latin, boolean aliased) {
         if (sourceFile.getLocaleID().equals("en")) {
             return true;
@@ -68,12 +70,16 @@ public static boolean isMissingOk(CLDRFile sourceFile, String path, boolean lati
             return aliased;
         case compact:
             // special processing for compact numbers
+            // //ldml/numbers/decimalFormats[@numberSystem="%A"]/decimalFormatLength[@type="%A"]/decimalFormat[@type="standard"]/pattern[@type="%A"][@count="%A"] ; compact
             if (path.contains("[@count=\"other\"]")) {
                 return false; // the 'other' class always counts as missing
             }
-            String otherPath = "//ldml/numbers/decimalFormats[@numberSystem=\"" + arguments.value[1]
-                + "\"]/decimalFormatLength[@type=\"" + arguments.value[2]
-                + "\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"" + arguments.value[3]
+            final String numberSystem = arguments.value[1];
+            final String formatLength = arguments.value[2];
+            final String patternType = arguments.value[3];
+            String otherPath = "//ldml/numbers/decimalFormats[@numberSystem=\"" + numberSystem
+                + "\"]/decimalFormatLength[@type=\"" + formatLength
+                + "\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"" + patternType
                 + "\"][@count=\"other\"]";
             String otherValue = sourceFile.getWinningValue(otherPath);
             if (otherValue == null) {
@@ -83,8 +89,13 @@ public static boolean isMissingOk(CLDRFile sourceFile, String path, boolean lati
             if (digits > 4) { // we can only handle to 4 digits
                 return false;
             }
-            // if there are no possible Count values for this many digits, then it is ok to be missing.
-            Count c = Count.valueOf(arguments.value[4]);
+            // If the count is numeric or if there are no possible Count values for this many digits, then it is ok to be missing.
+            final String count = arguments.value[4];
+            if (ASCII_DIGITS.containsAll(count)) {
+                return true; // ok to be missing
+            }
+            Count c = Count.valueOf(count);
+
             SupplementalDataInfo supplementalDataInfo2 = CLDRConfig.getInstance().getSupplementalDataInfo();
             // SupplementalDataInfo.getInstance(sourceFile.getSupplementalDirectory());
             PluralInfo plurals = supplementalDataInfo2.getPlurals(sourceFile.getLocaleID());