Remove MATCH_GRISU3_OUTPUT; always round even

- add some tests to check cases that previously did not match Grisu3 - there might be a tiny performance loss, but I couldn't confirm with the benchmark on my MacBook (because it's so close) - update the README to remove reference to the mode
ulfjack · May 30, 2018 · ed05761 · ed05761
1 parent ca7d50a
commit ed05761
Show file tree

Hide file tree

Showing 5 changed files with 74 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -141,13 +141,10 @@ $ java -cp out info.adams.ryu.benchmark.BenchmarkMain
 
 ### Grisu3
 
-We have added a mode to more closely match Grisu3 output, which can be
-enabled by setting the `MATCH_GRISU3_OUTPUT` preprocessor symbol. This only
-applies to values that are exactly halfway between two shortest decimal numbers;
-the generated strings for all other numbers are unaffected. In this mode, the
-benchmark also verfies that the generated strings are identical.
+Ryu's output should exactly match Grisu3's output. Our benchmark verifies that
+the generated numbers are identical.
 ```
-$ bazel run -c opt --copt=-DMATCH_GRISU3_OUTPUT //ryu/benchmark -- -64
+$ bazel run -c opt //ryu/benchmark -- -64
     Average & Stddev Ryu  Average & Stddev Grisu3
 64:   29.806    3.182      103.060   98.717
 ```

diff --git a/ryu/benchmark/benchmark.cc b/ryu/benchmark/benchmark.cc
@@ -137,8 +137,7 @@ static int bench32(int samples, int iterations, bool verbose) {
     if (throwaway == 12345) {
       printf("Argh!\n");
     }
-    if (strlen(own) != strlen(theirs)) {
-//    if (strcmp(own, theirs) != 0) {
+    if (strcmp(own, theirs) != 0) {
       printf("For %x %20s %20s\n", r, own, theirs);
     }
   }
@@ -184,11 +183,7 @@ static int bench64(int samples, int iterations, bool verbose) {
 
     char* own = bufferown;
     char* theirs = dcv(f);
-#ifdef MATCH_GRISU3_OUTPUT
     if (strcmp(own, theirs) != 0) {
-#else
-    if (strlen(own) != strlen(theirs)) {
-#endif
       printf("For %16" PRIX64 " %28s %28s\n", r, own, theirs);
     }
   }

diff --git a/ryu/d2s.c b/ryu/d2s.c
@@ -15,7 +15,6 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.
 
-// Compile with -DMATCH_GRISU3_OUTPUT to match Grisu3 output perfectly.
 // Compile with -DDEBUG to get very verbose debugging output to stdout.
 
 #include "ryu/ryu.h"
@@ -329,9 +328,7 @@ void d2s_buffered(double f, char* result) {
   uint64_t vr, vp, vm;
   int32_t e10;
   bool vmIsTrailingZeros = false;
-#ifdef MATCH_GRISU3_OUTPUT
   bool vrIsTrailingZeros = false;
-#endif
   if (e2 >= 0) {
     // I tried special-casing q == 0, but there was no effect on performance.
     int32_t q = max_uint32(0, ((int32_t) (((uint64_t) e2 * LOG10_2_NUMERATOR) / LOG10_2_DENOMINATOR)) - 1);
@@ -348,9 +345,7 @@ void d2s_buffered(double f, char* result) {
     if (q <= 21) {
       // Only one of mp, mv, and mm can be a multiple of 5, if any.
       if (mv % 5 == 0) {
-#ifdef MATCH_GRISU3_OUTPUT
         vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
-#endif
       } else {
         if (acceptBounds) {
           // Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
@@ -378,36 +373,29 @@ void d2s_buffered(double f, char* result) {
     printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
 #endif
     if (q <= 1) {
-#ifdef MATCH_GRISU3_OUTPUT
-      vrIsTrailingZeros = (mv & ((1ull << q) - 1)) == 0;
-#endif
+      vrIsTrailingZeros = (~mv & 1) >= q;
       if (acceptBounds) {
         vmIsTrailingZeros = (~mm & 1) >= q;
       } else {
         vp -= 1 >= q;
       }
-    } else {
-#ifdef MATCH_GRISU3_OUTPUT
-      // We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q
-      // <=> ntz(mv) >= q  &&  pow5Factor(mv) - e2 >= q
-      // <=> ntz(mv) >= q
-      // <=> mv & ((1 << q) - 1) == 0
+    } else if (q < mantissaBits) {
+      // We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q-1
+      // <=> ntz(mv) >= q-1  &&  pow5Factor(mv) - e2 >= q-1
+      // <=> ntz(mv) >= q-1
+      // <=> mv & ((1 << (q-1)) - 1) == 0
       // We also need to make sure that the left shift does not overflow.
-      vrIsTrailingZeros =
-          ((q < mantissaBits) && ((mv & ((1ull << q) - 1)) == 0));
+      vrIsTrailingZeros = (mv & ((1ull << (q - 1)) - 1)) == 0;
 #if DEBUG
       printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
-#endif
 #endif
     }
   }
 #ifdef DEBUG
   printf("e10=%d\n", e10);
   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
-  printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
-#ifdef MATCH_GRISU3_OUTPUT
+  printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
-#endif
 #endif
 
   // Step 4: Find the shortest decimal representation in the interval of legal representations.
@@ -418,19 +406,13 @@ void d2s_buffered(double f, char* result) {
   int32_t lastRemovedDigit = 0;
   uint64_t output;
   // On average, we remove ~2 digits.
-  if (vmIsTrailingZeros
-#ifdef MATCH_GRISU3_OUTPUT
-      || vrIsTrailingZeros
-#endif
-      ) {
+  if (vmIsTrailingZeros || vrIsTrailingZeros) {
     // General case, which happens rarely (<1%).
     while (vp / 10 > vm / 10) {
       // The compiler does not realize that vm % 10 can be computed from vm / 10
       // as vm - (vm / 10) * 10.
       vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; // vm % 10 == 0;
-#ifdef MATCH_GRISU3_OUTPUT
       vrIsTrailingZeros &= lastRemovedDigit == 0;
-#endif
       uint64_t nvr = vr / 10;
       lastRemovedDigit = vr - 10 * nvr;
       vr = nvr;
@@ -445,9 +427,7 @@ void d2s_buffered(double f, char* result) {
     // Same as above; use vm % 10 == vm - (vm / 10) * 10.
     if (vmIsTrailingZeros) {
       while (vm - (vm / 10) * 10 == 0) {
-#ifdef MATCH_GRISU3_OUTPUT
         vrIsTrailingZeros &= lastRemovedDigit == 0;
-#endif
         uint64_t nvr = vr / 10;
         lastRemovedDigit = vr - 10 * nvr;
         vr = nvr;
@@ -458,17 +438,12 @@ void d2s_buffered(double f, char* result) {
     }
 #ifdef DEBUG
     printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);
-#ifdef MATCH_GRISU3_OUTPUT
     printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
 #endif
-#endif
-#ifdef MATCH_GRISU3_OUTPUT
-    if (vrIsTrailingZeros && (lastRemovedDigit == 5)) {
-      if (vr % 10 != 7) {
-        lastRemovedDigit = 4;
-      }
+    if (vrIsTrailingZeros && (lastRemovedDigit == 5) && (vr % 2 == 0)) {
+      // Round down not up if the number ends in X50000.
+      lastRemovedDigit = 4;
     }
-#endif
     // We need to take vr+1 if vr is outside bounds or we need to round up.
     output = vr +
         ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || (lastRemovedDigit >= 5));
@@ -484,9 +459,7 @@ void d2s_buffered(double f, char* result) {
     }
 #ifdef DEBUG
     printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);
-#ifdef MATCH_GRISU3_OUTPUT
     printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
-#endif
 #endif
     // We need to take vr+1 if vr is outside bounds or we need to round up.
     output = vr + ((vr == vm) || (lastRemovedDigit >= 5));

diff --git a/ryu/f2s.c b/ryu/f2s.c
@@ -90,6 +90,10 @@ static uint32_t pow5Factor(uint32_t value) {
   return 0;
 }
 
+static inline bool multipleOfPowerOf5(uint32_t value, int32_t p) {
+  return pow5Factor(value) >= p;
+}
+
 static inline uint32_t pow5bits(int32_t e) {
   return e == 0 ? 1 : (uint32_t) ((e * LOG2_5_NUMERATOR + LOG2_5_DENOMINATOR - 1) / LOG2_5_DENOMINATOR);
 }
@@ -180,6 +184,7 @@ void f2s_buffered(float f, char* result) {
   uint32_t vp, vm;
   int32_t e10;
   bool vmIsTrailingZeros = false;
+  bool vrIsTrailingZeros = false;
   uint32_t lastRemovedDigit = 0;
   if (e2 >= 0) {
     int32_t q = (int32_t) ((e2 * LOG10_2_NUMERATOR) / LOG10_2_DENOMINATOR);
@@ -189,6 +194,10 @@ void f2s_buffered(float f, char* result) {
     vr = (uint32_t) mulPow5InvDivPow2(mv, q, i);
     vp = (uint32_t) mulPow5InvDivPow2(mp, q, i);
     vm = (uint32_t) mulPow5InvDivPow2(mm, q, i);
+#ifdef DEBUG
+    printf("%d * 2^%d / 10^%d\n", mv, e2, q);
+    printf("V+=%d\nV =%d\nV-=%d\n", vp, vr, vm);
+#endif
     if (q != 0 && ((vp - 1) / 10 <= vm / 10)) {
       // We need to know one removed digit even if we are not going to loop below. We could use
       // q = X - 1 above, except that would require 33 bits for the result, and we've found that
@@ -197,10 +206,15 @@ void f2s_buffered(float f, char* result) {
       lastRemovedDigit = (uint32_t) (mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10);
     }
     if (q <= 9) {
-      if (acceptBounds) {
-        vmIsTrailingZeros = pow5Factor(mm) >= q;
+      // Only one of mp, mv, and mm can be a multiple of 5, if any.
+      if (mv % 5 == 0) {
+        vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
       } else {
-        vp -= pow5Factor(mp) >= q;
+        if (acceptBounds) {
+          vmIsTrailingZeros = multipleOfPowerOf5(mm, q);
+        } else {
+          vp -= multipleOfPowerOf5(mp, q);
+        }
       }
     }
   } else {
@@ -212,40 +226,54 @@ void f2s_buffered(float f, char* result) {
     vr = (uint32_t) mulPow5divPow2(mv, i, j);
     vp = (uint32_t) mulPow5divPow2(mp, i, j);
     vm = (uint32_t) mulPow5divPow2(mm, i, j);
+#ifdef DEBUG
+    printf("%d * 5^%d / 10^%d\n", mv, -e2, q);
+    printf("%d %d %d %d\n", q, i, k, j);
+    printf("V+=%d\nV =%d\nV-=%d\n", vp, vr, vm);
+#endif
     if (q != 0 && ((vp - 1) / 10 <= vm / 10)) {
       j = q - 1 - (pow5bits(i + 1) - POW5_BITCOUNT);
       lastRemovedDigit = (uint32_t) (mulPow5divPow2(mv, i + 1, j) % 10);
     }
-    if (acceptBounds) {
-      vmIsTrailingZeros = (~mm & 1) >= q;
-    } else {
-      vp -= 1 >= q;
+    if (q <= 1) {
+      vrIsTrailingZeros = (~mv & 1) >= q;
+      if (acceptBounds) {
+        vmIsTrailingZeros = (~mm & 1) >= q;
+      } else {
+        vp -= 1 >= q;
+      }
+    } else if (q < mantissaBits) {
+      vrIsTrailingZeros = (mv & ((1 << (q - 1)) - 1)) == 0;
     }
   }
 #ifdef DEBUG
   printf("e10=%d\n", e10);
   printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
-  printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
+  printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
+  printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
 #endif
 
   // Step 4: Find the shortest decimal representation in the interval of legal representations.
   uint32_t vplength = decimalLength(vp);
   int32_t exp = e10 + vplength - 1;
 
   uint32_t removed = 0;
-  if (vmIsTrailingZeros) {
+  uint32_t output;
+  if (vmIsTrailingZeros || vrIsTrailingZeros) {
     // General case, which happens rarely.
     while (vp / 10 > vm / 10) {
       vmIsTrailingZeros &= vm % 10 == 0;
+      vrIsTrailingZeros &= lastRemovedDigit == 0;
       uint64_t nvr = vr / 10;
       lastRemovedDigit = vr - 10 * nvr;
       vr = nvr;
       vp /= 10;
       vm /= 10;
       removed++;
     }
-    if (vmIsTrailingZeros && acceptBounds) {
+    if (vmIsTrailingZeros) {
       while (vm % 10 == 0) {
+        vrIsTrailingZeros &= lastRemovedDigit == 0;
         uint64_t nvr = vr / 10;
         lastRemovedDigit = vr - 10 * nvr;
         vr = nvr;
@@ -254,6 +282,13 @@ void f2s_buffered(float f, char* result) {
         removed++;
       }
     }
+    if (vrIsTrailingZeros && (lastRemovedDigit == 5) && (vr % 2 == 0)) {
+      // Round down not up if the number ends in X50000.
+      lastRemovedDigit = 4;
+    }
+    // We need to take vr+1 if vr is outside bounds or we need to round up.
+    output = vr +
+        ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || (lastRemovedDigit >= 5));
   } else {
     // Common case.
     while (vp / 10 > vm / 10) {
@@ -264,9 +299,9 @@ void f2s_buffered(float f, char* result) {
       vm /= 10;
       removed++;
     }
+    // We need to take vr+1 if vr is outside bounds or we need to round up.
+    output = vr + ((vr == vm) || (lastRemovedDigit >= 5));
   }
-  uint32_t output = vr +
-      ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || (lastRemovedDigit >= 5));
   uint32_t olength = vplength - removed;
 
   // Step 5: Print the decimal representation.

diff --git a/ryu/tests/f2s_test.cc b/ryu/tests/f2s_test.cc
@@ -45,12 +45,22 @@ TEST(F2sTest, MinAndMax) {
   ASSERT_STREQ("1E-45", f2s(int32Bits2Float(1)));
 }
 
-TEST(F2sTest, RoundingModeEven) {
+// Check that we return the exact boundary if it is the shortest
+// representation, but only if the original floating point number is even.
+TEST(F2sTest, BoundaryRoundEven) {
   ASSERT_STREQ("3.355445E7", f2s(3.355445E7f));
   ASSERT_STREQ("9E9", f2s(8.999999E9f));
   ASSERT_STREQ("3.436672E10", f2s(3.4366717E10f));
 }
 
+// If the exact value is exactly halfway between two shortest representations,
+// then we round to even. It seems like this only makes a difference if the
+// last two digits are ...2|5 or ...7|5, and we cut off the 5.
+TEST(F2sTest, ExactValueRoundEven) {
+  ASSERT_STREQ("3.0540412E5", f2s(3.0540412E5f));
+  ASSERT_STREQ("8.0990312E3", f2s(8.0990312E3f));
+}
+
 TEST(F2sTest, Regression) {
   ASSERT_STREQ("4.7223665E21", f2s(4.7223665E21f));
   ASSERT_STREQ("8.388608E6", f2s(8388608.0f));