Skip to content

Commit

Permalink
Remove MATCH_GRISU3_OUTPUT; always round even
Browse files Browse the repository at this point in the history
- add some tests to check cases that previously did not match Grisu3
- there might be a tiny performance loss, but I couldn't confirm with the
  benchmark on my MacBook (because it's so close)
- update the README to remove reference to the mode
  • Loading branch information
ulfjack committed May 30, 2018
1 parent ca7d50a commit ed05761
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 64 deletions.
9 changes: 3 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,10 @@ $ java -cp out info.adams.ryu.benchmark.BenchmarkMain

### Grisu3

We have added a mode to more closely match Grisu3 output, which can be
enabled by setting the `MATCH_GRISU3_OUTPUT` preprocessor symbol. This only
applies to values that are exactly halfway between two shortest decimal numbers;
the generated strings for all other numbers are unaffected. In this mode, the
benchmark also verfies that the generated strings are identical.
Ryu's output should exactly match Grisu3's output. Our benchmark verifies that
the generated numbers are identical.
```
$ bazel run -c opt --copt=-DMATCH_GRISU3_OUTPUT //ryu/benchmark -- -64
$ bazel run -c opt //ryu/benchmark -- -64
Average & Stddev Ryu Average & Stddev Grisu3
64: 29.806 3.182 103.060 98.717
```
Expand Down
7 changes: 1 addition & 6 deletions ryu/benchmark/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,7 @@ static int bench32(int samples, int iterations, bool verbose) {
if (throwaway == 12345) {
printf("Argh!\n");
}
if (strlen(own) != strlen(theirs)) {
// if (strcmp(own, theirs) != 0) {
if (strcmp(own, theirs) != 0) {
printf("For %x %20s %20s\n", r, own, theirs);
}
}
Expand Down Expand Up @@ -184,11 +183,7 @@ static int bench64(int samples, int iterations, bool verbose) {

char* own = bufferown;
char* theirs = dcv(f);
#ifdef MATCH_GRISU3_OUTPUT
if (strcmp(own, theirs) != 0) {
#else
if (strlen(own) != strlen(theirs)) {
#endif
printf("For %16" PRIX64 " %28s %28s\n", r, own, theirs);
}
}
Expand Down
51 changes: 12 additions & 39 deletions ryu/d2s.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.

// Compile with -DMATCH_GRISU3_OUTPUT to match Grisu3 output perfectly.
// Compile with -DDEBUG to get very verbose debugging output to stdout.

#include "ryu/ryu.h"
Expand Down Expand Up @@ -329,9 +328,7 @@ void d2s_buffered(double f, char* result) {
uint64_t vr, vp, vm;
int32_t e10;
bool vmIsTrailingZeros = false;
#ifdef MATCH_GRISU3_OUTPUT
bool vrIsTrailingZeros = false;
#endif
if (e2 >= 0) {
// I tried special-casing q == 0, but there was no effect on performance.
int32_t q = max_uint32(0, ((int32_t) (((uint64_t) e2 * LOG10_2_NUMERATOR) / LOG10_2_DENOMINATOR)) - 1);
Expand All @@ -348,9 +345,7 @@ void d2s_buffered(double f, char* result) {
if (q <= 21) {
// Only one of mp, mv, and mm can be a multiple of 5, if any.
if (mv % 5 == 0) {
#ifdef MATCH_GRISU3_OUTPUT
vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
#endif
} else {
if (acceptBounds) {
// Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
Expand Down Expand Up @@ -378,36 +373,29 @@ void d2s_buffered(double f, char* result) {
printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
#endif
if (q <= 1) {
#ifdef MATCH_GRISU3_OUTPUT
vrIsTrailingZeros = (mv & ((1ull << q) - 1)) == 0;
#endif
vrIsTrailingZeros = (~mv & 1) >= q;
if (acceptBounds) {
vmIsTrailingZeros = (~mm & 1) >= q;
} else {
vp -= 1 >= q;
}
} else {
#ifdef MATCH_GRISU3_OUTPUT
// We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q
// <=> ntz(mv) >= q && pow5Factor(mv) - e2 >= q
// <=> ntz(mv) >= q
// <=> mv & ((1 << q) - 1) == 0
} else if (q < mantissaBits) {
// We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q-1
// <=> ntz(mv) >= q-1 && pow5Factor(mv) - e2 >= q-1
// <=> ntz(mv) >= q-1
// <=> mv & ((1 << (q-1)) - 1) == 0
// We also need to make sure that the left shift does not overflow.
vrIsTrailingZeros =
((q < mantissaBits) && ((mv & ((1ull << q) - 1)) == 0));
vrIsTrailingZeros = (mv & ((1ull << (q - 1)) - 1)) == 0;
#if DEBUG
printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
#endif
}
}
#ifdef DEBUG
printf("e10=%d\n", e10);
printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
#ifdef MATCH_GRISU3_OUTPUT
printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
#endif

// Step 4: Find the shortest decimal representation in the interval of legal representations.
Expand All @@ -418,19 +406,13 @@ void d2s_buffered(double f, char* result) {
int32_t lastRemovedDigit = 0;
uint64_t output;
// On average, we remove ~2 digits.
if (vmIsTrailingZeros
#ifdef MATCH_GRISU3_OUTPUT
|| vrIsTrailingZeros
#endif
) {
if (vmIsTrailingZeros || vrIsTrailingZeros) {
// General case, which happens rarely (<1%).
while (vp / 10 > vm / 10) {
// The compiler does not realize that vm % 10 can be computed from vm / 10
// as vm - (vm / 10) * 10.
vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; // vm % 10 == 0;
#ifdef MATCH_GRISU3_OUTPUT
vrIsTrailingZeros &= lastRemovedDigit == 0;
#endif
uint64_t nvr = vr / 10;
lastRemovedDigit = vr - 10 * nvr;
vr = nvr;
Expand All @@ -445,9 +427,7 @@ void d2s_buffered(double f, char* result) {
// Same as above; use vm % 10 == vm - (vm / 10) * 10.
if (vmIsTrailingZeros) {
while (vm - (vm / 10) * 10 == 0) {
#ifdef MATCH_GRISU3_OUTPUT
vrIsTrailingZeros &= lastRemovedDigit == 0;
#endif
uint64_t nvr = vr / 10;
lastRemovedDigit = vr - 10 * nvr;
vr = nvr;
Expand All @@ -458,17 +438,12 @@ void d2s_buffered(double f, char* result) {
}
#ifdef DEBUG
printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);
#ifdef MATCH_GRISU3_OUTPUT
printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
#endif
#ifdef MATCH_GRISU3_OUTPUT
if (vrIsTrailingZeros && (lastRemovedDigit == 5)) {
if (vr % 10 != 7) {
lastRemovedDigit = 4;
}
if (vrIsTrailingZeros && (lastRemovedDigit == 5) && (vr % 2 == 0)) {
// Round down not up if the number ends in X50000.
lastRemovedDigit = 4;
}
#endif
// We need to take vr+1 if vr is outside bounds or we need to round up.
output = vr +
((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || (lastRemovedDigit >= 5));
Expand All @@ -484,9 +459,7 @@ void d2s_buffered(double f, char* result) {
}
#ifdef DEBUG
printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);
#ifdef MATCH_GRISU3_OUTPUT
printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
#endif
// We need to take vr+1 if vr is outside bounds or we need to round up.
output = vr + ((vr == vm) || (lastRemovedDigit >= 5));
Expand Down
59 changes: 47 additions & 12 deletions ryu/f2s.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ static uint32_t pow5Factor(uint32_t value) {
return 0;
}

static inline bool multipleOfPowerOf5(uint32_t value, int32_t p) {
return pow5Factor(value) >= p;
}

static inline uint32_t pow5bits(int32_t e) {
return e == 0 ? 1 : (uint32_t) ((e * LOG2_5_NUMERATOR + LOG2_5_DENOMINATOR - 1) / LOG2_5_DENOMINATOR);
}
Expand Down Expand Up @@ -180,6 +184,7 @@ void f2s_buffered(float f, char* result) {
uint32_t vp, vm;
int32_t e10;
bool vmIsTrailingZeros = false;
bool vrIsTrailingZeros = false;
uint32_t lastRemovedDigit = 0;
if (e2 >= 0) {
int32_t q = (int32_t) ((e2 * LOG10_2_NUMERATOR) / LOG10_2_DENOMINATOR);
Expand All @@ -189,6 +194,10 @@ void f2s_buffered(float f, char* result) {
vr = (uint32_t) mulPow5InvDivPow2(mv, q, i);
vp = (uint32_t) mulPow5InvDivPow2(mp, q, i);
vm = (uint32_t) mulPow5InvDivPow2(mm, q, i);
#ifdef DEBUG
printf("%d * 2^%d / 10^%d\n", mv, e2, q);
printf("V+=%d\nV =%d\nV-=%d\n", vp, vr, vm);
#endif
if (q != 0 && ((vp - 1) / 10 <= vm / 10)) {
// We need to know one removed digit even if we are not going to loop below. We could use
// q = X - 1 above, except that would require 33 bits for the result, and we've found that
Expand All @@ -197,10 +206,15 @@ void f2s_buffered(float f, char* result) {
lastRemovedDigit = (uint32_t) (mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10);
}
if (q <= 9) {
if (acceptBounds) {
vmIsTrailingZeros = pow5Factor(mm) >= q;
// Only one of mp, mv, and mm can be a multiple of 5, if any.
if (mv % 5 == 0) {
vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
} else {
vp -= pow5Factor(mp) >= q;
if (acceptBounds) {
vmIsTrailingZeros = multipleOfPowerOf5(mm, q);
} else {
vp -= multipleOfPowerOf5(mp, q);
}
}
}
} else {
Expand All @@ -212,40 +226,54 @@ void f2s_buffered(float f, char* result) {
vr = (uint32_t) mulPow5divPow2(mv, i, j);
vp = (uint32_t) mulPow5divPow2(mp, i, j);
vm = (uint32_t) mulPow5divPow2(mm, i, j);
#ifdef DEBUG
printf("%d * 5^%d / 10^%d\n", mv, -e2, q);
printf("%d %d %d %d\n", q, i, k, j);
printf("V+=%d\nV =%d\nV-=%d\n", vp, vr, vm);
#endif
if (q != 0 && ((vp - 1) / 10 <= vm / 10)) {
j = q - 1 - (pow5bits(i + 1) - POW5_BITCOUNT);
lastRemovedDigit = (uint32_t) (mulPow5divPow2(mv, i + 1, j) % 10);
}
if (acceptBounds) {
vmIsTrailingZeros = (~mm & 1) >= q;
} else {
vp -= 1 >= q;
if (q <= 1) {
vrIsTrailingZeros = (~mv & 1) >= q;
if (acceptBounds) {
vmIsTrailingZeros = (~mm & 1) >= q;
} else {
vp -= 1 >= q;
}
} else if (q < mantissaBits) {
vrIsTrailingZeros = (mv & ((1 << (q - 1)) - 1)) == 0;
}
}
#ifdef DEBUG
printf("e10=%d\n", e10);
printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif

// Step 4: Find the shortest decimal representation in the interval of legal representations.
uint32_t vplength = decimalLength(vp);
int32_t exp = e10 + vplength - 1;

uint32_t removed = 0;
if (vmIsTrailingZeros) {
uint32_t output;
if (vmIsTrailingZeros || vrIsTrailingZeros) {
// General case, which happens rarely.
while (vp / 10 > vm / 10) {
vmIsTrailingZeros &= vm % 10 == 0;
vrIsTrailingZeros &= lastRemovedDigit == 0;
uint64_t nvr = vr / 10;
lastRemovedDigit = vr - 10 * nvr;
vr = nvr;
vp /= 10;
vm /= 10;
removed++;
}
if (vmIsTrailingZeros && acceptBounds) {
if (vmIsTrailingZeros) {
while (vm % 10 == 0) {
vrIsTrailingZeros &= lastRemovedDigit == 0;
uint64_t nvr = vr / 10;
lastRemovedDigit = vr - 10 * nvr;
vr = nvr;
Expand All @@ -254,6 +282,13 @@ void f2s_buffered(float f, char* result) {
removed++;
}
}
if (vrIsTrailingZeros && (lastRemovedDigit == 5) && (vr % 2 == 0)) {
// Round down not up if the number ends in X50000.
lastRemovedDigit = 4;
}
// We need to take vr+1 if vr is outside bounds or we need to round up.
output = vr +
((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || (lastRemovedDigit >= 5));
} else {
// Common case.
while (vp / 10 > vm / 10) {
Expand All @@ -264,9 +299,9 @@ void f2s_buffered(float f, char* result) {
vm /= 10;
removed++;
}
// We need to take vr+1 if vr is outside bounds or we need to round up.
output = vr + ((vr == vm) || (lastRemovedDigit >= 5));
}
uint32_t output = vr +
((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || (lastRemovedDigit >= 5));
uint32_t olength = vplength - removed;

// Step 5: Print the decimal representation.
Expand Down
12 changes: 11 additions & 1 deletion ryu/tests/f2s_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,22 @@ TEST(F2sTest, MinAndMax) {
ASSERT_STREQ("1E-45", f2s(int32Bits2Float(1)));
}

TEST(F2sTest, RoundingModeEven) {
// Check that we return the exact boundary if it is the shortest
// representation, but only if the original floating point number is even.
TEST(F2sTest, BoundaryRoundEven) {
ASSERT_STREQ("3.355445E7", f2s(3.355445E7f));
ASSERT_STREQ("9E9", f2s(8.999999E9f));
ASSERT_STREQ("3.436672E10", f2s(3.4366717E10f));
}

// If the exact value is exactly halfway between two shortest representations,
// then we round to even. It seems like this only makes a difference if the
// last two digits are ...2|5 or ...7|5, and we cut off the 5.
TEST(F2sTest, ExactValueRoundEven) {
ASSERT_STREQ("3.0540412E5", f2s(3.0540412E5f));
ASSERT_STREQ("8.0990312E3", f2s(8.0990312E3f));
}

TEST(F2sTest, Regression) {
ASSERT_STREQ("4.7223665E21", f2s(4.7223665E21f));
ASSERT_STREQ("8.388608E6", f2s(8388608.0f));
Expand Down

0 comments on commit ed05761

Please sign in to comment.