Skip to content

Commit

Permalink
Reland^2 "[bigint] Karatsuba multiplication"
Browse files Browse the repository at this point in the history
This is a reland of 81dd3f4,
which was a reland of 59eff3b

Original change's description:
> [bigint] Karatsuba multiplication
>
> The Karatsuba algorithm is used for BigInts with 34 or more internal
> digits, and thanks to better asymptotic complexity provides greater
> speedups the bigger the inputs.
>
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2782283
> Reviewed-by: Michael Achenbach <machenbach@chromium.org>
> Reviewed-by: Thibaud Michaud <thibaudm@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#74916}

Bug: v8:11515
Change-Id: I08f7d59dfa39fb3b532684685afd9fa750e0e84e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2933666
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Michael Achenbach <machenbach@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74969}
  • Loading branch information
jakobkummerow authored and V8 LUCI CQ committed Jun 7, 2021
1 parent d1a0896 commit df7f886
Show file tree
Hide file tree
Showing 16 changed files with 716 additions and 6 deletions.
2 changes: 2 additions & 0 deletions BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -4899,7 +4899,9 @@ v8_source_set("v8_bigint") {
"src/bigint/bigint-internal.h",
"src/bigint/bigint.h",
"src/bigint/digit-arithmetic.h",
"src/bigint/mul-karatsuba.cc",
"src/bigint/mul-schoolbook.cc",
"src/bigint/util.h",
"src/bigint/vector-arithmetic.cc",
"src/bigint/vector-arithmetic.h",
]
Expand Down
3 changes: 2 additions & 1 deletion src/bigint/bigint-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ void ProcessorImpl::Multiply(RWDigits Z, Digits X, Digits Y) {
if (X.len() == 0 || Y.len() == 0) return Z.Clear();
if (X.len() < Y.len()) std::swap(X, Y);
if (Y.len() == 1) return MultiplySingle(Z, X, Y[0]);
return MultiplySchoolbook(Z, X, Y);
if (Y.len() < kKaratsubaThreshold) return MultiplySchoolbook(Z, X, Y);
return MultiplyKaratsuba(Z, X, Y);
}

Status Processor::Multiply(RWDigits Z, Digits X, Digits Y) {
Expand Down
29 changes: 29 additions & 0 deletions src/bigint/bigint-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
namespace v8 {
namespace bigint {

constexpr int kKaratsubaThreshold = 34;

class ProcessorImpl : public Processor {
public:
explicit ProcessorImpl(Platform* platform);
Expand All @@ -21,6 +23,11 @@ class ProcessorImpl : public Processor {
void MultiplySingle(RWDigits Z, Digits X, digit_t y);
void MultiplySchoolbook(RWDigits Z, Digits X, Digits Y);

void MultiplyKaratsuba(RWDigits Z, Digits X, Digits Y);
void KaratsubaStart(RWDigits Z, Digits X, Digits Y, RWDigits scratch, int k);
void KaratsubaChunk(RWDigits Z, Digits X, Digits Y, RWDigits scratch);
void KaratsubaMain(RWDigits Z, Digits X, Digits Y, RWDigits scratch, int n);

private:
// Each unit is supposed to represent approximately one CPU {mul} instruction.
// Doesn't need to be accurate; we just want to make sure to check for
Expand Down Expand Up @@ -59,6 +66,28 @@ class ProcessorImpl : public Processor {
#define DCHECK(cond) (void(0))
#endif

// RAII memory for a Digits array.
class Storage {
public:
explicit Storage(int count) : ptr_(new digit_t[count]) {}

digit_t* get() { return ptr_.get(); }

private:
std::unique_ptr<digit_t[]> ptr_;
};

// A writable Digits array with attached storage.
class ScratchDigits : public RWDigits {
public:
explicit ScratchDigits(int len) : RWDigits(nullptr, len), storage_(len) {
digits_ = storage_.get();
}

private:
Storage storage_;
};

} // namespace bigint
} // namespace v8

Expand Down
30 changes: 30 additions & 0 deletions src/bigint/digit-arithmetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,36 @@ inline digit_t digit_add3(digit_t a, digit_t b, digit_t c, digit_t* carry) {
#endif
}

// {borrow} will be set to 0 or 1.
inline digit_t digit_sub(digit_t a, digit_t b, digit_t* borrow) {
#if HAVE_TWODIGIT_T
twodigit_t result = twodigit_t{a} - b;
*borrow = (result >> kDigitBits) & 1;
return static_cast<digit_t>(result);
#else
digit_t result = a - b;
*borrow = (result > a) ? 1 : 0;
return result;
#endif
}

// {borrow_out} will be set to 0 or 1.
inline digit_t digit_sub2(digit_t a, digit_t b, digit_t borrow_in,
digit_t* borrow_out) {
#if HAVE_TWODIGIT_T
twodigit_t subtrahend = twodigit_t{b} + borrow_in;
twodigit_t result = twodigit_t{a} - subtrahend;
*borrow_out = (result >> kDigitBits) & 1;
return static_cast<digit_t>(result);
#else
digit_t result = a - b;
*borrow_out = (result > a) ? 1 : 0;
if (result < borrow_in) *borrow_out += 1;
result -= borrow_in;
return result;
#endif
}

// Returns the low half of the result. High half is in {high}.
inline digit_t digit_mul(digit_t a, digit_t b, digit_t* high) {
#if HAVE_TWODIGIT_T
Expand Down
189 changes: 189 additions & 0 deletions src/bigint/mul-karatsuba.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Karatsuba multiplication. This is loosely based on Go's implementation
// found at https://golang.org/src/math/big/nat.go, licensed as follows:
//
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file [1].
//
// [1] https://golang.org/LICENSE

#include <algorithm>
#include <utility>

#include "src/bigint/bigint-internal.h"
#include "src/bigint/digit-arithmetic.h"
#include "src/bigint/util.h"
#include "src/bigint/vector-arithmetic.h"

namespace v8 {
namespace bigint {

namespace {

// The Karatsuba algorithm sometimes finishes more quickly when the
// input length is rounded up a bit. This method encodes some heuristics
// to accomplish this. The details have been determined experimentally.
int RoundUpLen(int len) {
if (len <= 36) return RoundUp(len, 2);
// Keep the 4 or 5 most significant non-zero bits.
int shift = BitLength(len) - 5;
if ((len >> shift) >= 0x18) {
shift++;
}
// Round up, unless we're only just above the threshold. This smoothes
// the steps by which time goes up as input size increases.
int additive = ((1 << shift) - 1);
if (shift >= 2 && (len & additive) < (1 << (shift - 2))) {
return len;
}
return ((len + additive) >> shift) << shift;
}

// This method makes the final decision how much to bump up the input size.
int KaratsubaLength(int n) {
n = RoundUpLen(n);
int i = 0;
while (n > kKaratsubaThreshold) {
n >>= 1;
i++;
}
return n << i;
}

// Performs the specific subtraction required by {KaratsubaMain} below.
void KaratsubaSubtractionHelper(RWDigits result, Digits X, Digits Y,
int* sign) {
X.Normalize();
Y.Normalize();
digit_t borrow = 0;
int i = 0;
if (!GreaterThanOrEqual(X, Y)) {
*sign = -(*sign);
std::swap(X, Y);
}
for (; i < Y.len(); i++) {
result[i] = digit_sub2(X[i], Y[i], borrow, &borrow);
}
for (; i < X.len(); i++) {
result[i] = digit_sub(X[i], borrow, &borrow);
}
DCHECK(borrow == 0); // NOLINT(readability/check)
for (; i < result.len(); i++) result[i] = 0;
}

} // namespace

void ProcessorImpl::MultiplyKaratsuba(RWDigits Z, Digits X, Digits Y) {
DCHECK(X.len() >= Y.len());
DCHECK(Y.len() >= kKaratsubaThreshold);
DCHECK(Z.len() >= X.len() + Y.len());
int k = KaratsubaLength(Y.len());
int scratch_len = 4 * k;
ScratchDigits scratch(scratch_len);
KaratsubaStart(Z, X, Y, scratch, k);
}

// Entry point for Karatsuba-based multiplication, takes care of inputs
// with unequal lengths by chopping the larger into chunks.
void ProcessorImpl::KaratsubaStart(RWDigits Z, Digits X, Digits Y,
RWDigits scratch, int k) {
KaratsubaMain(Z, X, Y, scratch, k);
if (should_terminate()) return;
for (int i = 2 * k; i < Z.len(); i++) Z[i] = 0;
if (k < Y.len() || X.len() != Y.len()) {
ScratchDigits T(2 * k);
// Add X0 * Y1 * b.
Digits X0(X, 0, k);
Digits Y1 = Y + std::min(k, Y.len());
if (Y1.len() > 0) {
KaratsubaChunk(T, X0, Y1, scratch);
if (should_terminate()) return;
AddAt(Z + k, T);
}

// Add Xi * Y0 << i and Xi * Y1 * b << (i + k).
Digits Y0(Y, 0, k);
for (int i = k; i < X.len(); i += k) {
Digits Xi(X, i, k);
KaratsubaChunk(T, Xi, Y0, scratch);
if (should_terminate()) return;
AddAt(Z + i, T);
if (Y1.len() > 0) {
KaratsubaChunk(T, Xi, Y1, scratch);
if (should_terminate()) return;
AddAt(Z + (i + k), T);
}
}
}
}

// Entry point for chunk-wise multiplications, selects an appropriate
// algorithm for the inputs based on their sizes.
void ProcessorImpl::KaratsubaChunk(RWDigits Z, Digits X, Digits Y,
RWDigits scratch) {
X.Normalize();
Y.Normalize();
if (X.len() == 0 || Y.len() == 0) return Z.Clear();
if (X.len() < Y.len()) std::swap(X, Y);
if (Y.len() == 1) return MultiplySingle(Z, X, Y[0]);
if (Y.len() < kKaratsubaThreshold) return MultiplySchoolbook(Z, X, Y);
int k = KaratsubaLength(Y.len());
DCHECK(scratch.len() >= 4 * k);
return KaratsubaStart(Z, X, Y, scratch, k);
}

// The main recursive Karatsuba method.
void ProcessorImpl::KaratsubaMain(RWDigits Z, Digits X, Digits Y,
RWDigits scratch, int n) {
if (n < kKaratsubaThreshold) {
X.Normalize();
Y.Normalize();
if (X.len() >= Y.len()) {
return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), X, Y);
} else {
return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), Y, X);
}
}
DCHECK(scratch.len() >= 4 * n);
DCHECK((n & 1) == 0); // NOLINT(readability/check)
int n2 = n >> 1;
Digits X0(X, 0, n2);
Digits X1(X, n2, n2);
Digits Y0(Y, 0, n2);
Digits Y1(Y, n2, n2);
RWDigits scratch_for_recursion(scratch, 2 * n, 2 * n);
RWDigits P0(scratch, 0, n);
KaratsubaMain(P0, X0, Y0, scratch_for_recursion, n2);
if (should_terminate()) return;
for (int i = 0; i < n; i++) Z[i] = P0[i];
RWDigits P2(scratch, n, n);
KaratsubaMain(P2, X1, Y1, scratch_for_recursion, n2);
if (should_terminate()) return;
RWDigits Z1 = Z + n;
int end = std::min(Z1.len(), P2.len());
for (int i = 0; i < end; i++) Z1[i] = P2[i];
for (int i = end; i < n; i++) {
DCHECK(P2[i] == 0); // NOLINT(readability/check)
}
AddAt(Z + n2, P0);
AddAt(Z + n2, P2);
RWDigits X_diff(scratch, 0, n2);
RWDigits Y_diff(scratch, n2, n2);
int sign = 1;
KaratsubaSubtractionHelper(X_diff, X1, X0, &sign);
KaratsubaSubtractionHelper(Y_diff, Y0, Y1, &sign);
RWDigits P1(scratch, n, n);
KaratsubaMain(P1, X_diff, Y_diff, scratch_for_recursion, n2);
if (sign > 0) {
AddAt(Z + n2, P1);
} else {
SubAt(Z + n2, P1);
}
}

} // namespace bigint
} // namespace v8
2 changes: 0 additions & 2 deletions src/bigint/mul-schoolbook.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ void ProcessorImpl::MultiplySchoolbook(RWDigits Z, Digits X, Digits Y) {
next_carry = 0;
BODY(0, i);
AddWorkEstimate(i);
if (should_terminate()) return;
}
// Last part: i exceeds Y now, we have to be careful about bounds.
int loop_end = X.len() + Y.len() - 2;
Expand All @@ -85,7 +84,6 @@ void ProcessorImpl::MultiplySchoolbook(RWDigits Z, Digits X, Digits Y) {
next_carry = 0;
BODY(min_x_index, max_x_index);
AddWorkEstimate(max_x_index - min_x_index);
if (should_terminate()) return;
}
// Write the last digit, and zero out any extra space in Z.
Z[i++] = digit_add2(next, carry, &carry);
Expand Down
60 changes: 60 additions & 0 deletions src/bigint/util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// "Generic" helper functions (not specific to BigInts).

#include <stdint.h>

#include <type_traits>

#ifdef _MSC_VER
#include <intrin.h> // For _BitScanReverse.
#endif

#ifndef V8_BIGINT_UTIL_H_
#define V8_BIGINT_UTIL_H_

// Integer division, rounding up.
#define DIV_CEIL(x, y) (((x)-1) / (y) + 1)

namespace v8 {
namespace bigint {

// Rounds up x to a multiple of y.
inline constexpr int RoundUp(int x, int y) { return (x + y - 1) & -y; }

// Different environments disagree on how 64-bit uintptr_t and uint64_t are
// defined, so we have to use templates to be generic.
template <typename T, typename = typename std::enable_if<
std::is_unsigned<T>::value && sizeof(T) == 8>::type>
constexpr int CountLeadingZeros(T value) {
#if __GNUC__ || __clang__
return value == 0 ? 64 : __builtin_clzll(value);
#elif _MSC_VER
unsigned long index = 0; // NOLINT(runtime/int). MSVC insists.
return _BitScanReverse64(&index, value) ? 63 - index : 64;
#else
#error Unsupported compiler.
#endif
}

constexpr int CountLeadingZeros(uint32_t value) {
#if __GNUC__ || __clang__
return value == 0 ? 32 : __builtin_clz(value);
#elif _MSC_VER
unsigned long index = 0; // NOLINT(runtime/int). MSVC insists.
return _BitScanReverse(&index, value) ? 31 - index : 32;
#else
#error Unsupported compiler.
#endif
}

inline constexpr int BitLength(int n) {
return 32 - CountLeadingZeros(static_cast<uint32_t>(n));
}

} // namespace bigint
} // namespace v8

#endif // V8_BIGINT_UTIL_H_
Loading

0 comments on commit df7f886

Please sign in to comment.