Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ members = [
"block-multiplier",
"block-multiplier-sys",
"block-multiplier-codegen",
"skyscraper",
]

[workspace.package]
Expand Down
55 changes: 38 additions & 17 deletions block-multiplier/benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use {
criterion::{Criterion, black_box, criterion_group, criterion_main},
fp_rounding::{Zero, with_rounding_mode},
rand::{Rng, SeedableRng, prelude::StdRng},
};

Expand Down Expand Up @@ -48,8 +49,6 @@ fn bench_block_multiplier(c: &mut Criterion) {
rng.random::<u64>(),
];

let rtz = rtz::RTZ::set().unwrap();

group.bench_function("scalar_mul", |bencher| {
bencher.iter(|| block_multiplier::scalar_mul(black_box(s0_a), black_box(s0_b)))
});
Expand All @@ -73,23 +72,45 @@ fn bench_block_multiplier(c: &mut Criterion) {
})
});

group.bench_function("block_mul", |bencher| {
bencher.iter(|| {
block_multiplier::block_mul(
&rtz,
black_box(s0_a),
black_box(s0_b),
black_box(v0_a),
black_box(v0_b),
black_box(v1_a),
black_box(v1_b),
)
group.bench_function("block_mul", |bencher| unsafe {
with_rounding_mode((), |guard, _| {
bencher.iter(|| {
block_multiplier::block_mul(
guard,
black_box(s0_a),
black_box(s0_b),
black_box(v0_a),
black_box(v0_b),
black_box(v1_a),
black_box(v1_b),
)
})
})
});

group.bench_function("block_sqr", |bencher| {
bencher.iter(|| {
block_multiplier::block_sqr(&rtz, black_box(s0_a), black_box(v0_a), black_box(v1_a))
group.bench_function("block_sqr", |bencher| unsafe {
with_rounding_mode((), |guard, _| {
bencher.iter(|| {
block_multiplier::block_sqr(
guard,
black_box(s0_a),
black_box(v0_a),
black_box(v1_a),
)
})
});
});

group.finish();
}

fn bench_rtz(c: &mut Criterion) {
let mut group = c.benchmark_group("with_rounding_mode");
group.bench_function("with_rounding_mode", |bencher| {
bencher.iter(|| unsafe {
with_rounding_mode::<Zero, _, _>((), |guard, _| {
black_box(guard);
})
})
});

Expand All @@ -103,6 +124,6 @@ criterion_group!(
// Warm up is warm because it literally warms up the pi
.warm_up_time(std::time::Duration::new(1,0))
.measurement_time(std::time::Duration::new(10,0));
targets = bench_block_multiplier
targets = bench_block_multiplier, bench_rtz
);
criterion_main!(benches);
81 changes: 45 additions & 36 deletions block-multiplier/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ pub const U64_R: [u64; 4] = [

// R^2 mod P
pub const U64_R2: [u64; 4] = [
0x1BB8E645AE216DA7,
0x53FE3AB1E35C59E3,
0x8C49833D53BB8085,
0x0216D0B17F4E44A5,
0x1bb8e645ae216da7,
0x53fe3ab1e35c59e3,
0x8c49833d53bb8085,
0x0216d0b17f4e44a5,
];

// R^-1 mod P
Expand All @@ -38,29 +38,37 @@ pub const U64_R_INV: [u64; 4] = [
0x15ebf95182c5551c,
];

pub const U52_NP0: u64 = 0x1F593EFFFFFFF;
pub const U52_NP0: u64 = 0x1f593efffffff;
pub const U52_R2: [u64; 5] = [
0x0B852D16DA6F5,
0xC621620CDDCE3,
0xAF1B95343FFB6,
0xC3C15E103E7C2,
0x00281528FA122,
0x0b852d16da6f5,
0xc621620cddce3,
0xaf1b95343ffb6,
0xc3c15e103e7c2,
0x00281528fa122,
];

pub const U52_P: [u64; 5] = [
0x1F593F0000001,
0x4879B9709143E,
0x181585D2833E8,
0xA029B85045B68,
0x030644E72E131,
0x1f593f0000001,
0x4879b9709143e,
0x181585d2833e8,
0xa029b85045b68,
0x030644e72e131,
];

pub const U52_2P: [u64; 5] = [
0x3EB27E0000002,
0x90F372E12287C,
0x302B0BA5067D0,
0x405370A08B6D0,
0x060C89CE5C263,
0x3eb27e0000002,
0x90f372e12287c,
0x302b0ba5067d0,
0x405370a08b6d0,
0x060c89ce5c263,
];

pub const F52_P: [f64; 5] = [
0x1f593f0000001_u64 as f64,
0x4879b9709143e_u64 as f64,
0x181585d2833e8_u64 as f64,
0xa029b85045b68_u64 as f64,
0x030644e72e131_u64 as f64,
];

pub const MASK52: u64 = 2_u64.pow(52) - 1;
Expand All @@ -80,14 +88,15 @@ pub const U64_I2: [u64; 4] = [
];

pub const U64_I3: [u64; 4] = [
0x9BACB016127CBE4E,
0x0B2051FA31944124,
0xB064EEA46091C76C,
0x2B062AAA49F80C7D,
0x9bacb016127cbe4e,
0x0b2051fa31944124,
0xb064eea46091c76c,
0x2b062aaa49f80c7d,
];
pub const U64_MU0: u64 = 0xc2e1f593efffffff;

// -- [FP SIMD CONSTANTS] --------------------------------------------------------------------------
// -- [FP SIMD CONSTANTS]
// --------------------------------------------------------------------------
pub const RHO_1: [u64; 5] = [
0x82e644ee4c3d2,
0xf93893c98b1de,
Expand All @@ -105,19 +114,19 @@ pub const RHO_2: [u64; 5] = [
];

pub const RHO_3: [u64; 5] = [
0x0E8C656567D77,
0x430D05713AE61,
0xEA3BA6B167128,
0xA7DAE55C5A296,
0x01B4AFD513572,
0x0e8c656567d77,
0x430d05713ae61,
0xea3ba6b167128,
0xa7dae55c5a296,
0x01b4afd513572,
];

pub const RHO_4: [u64; 5] = [
0x22E2400E2F27D,
0x323B46EA19686,
0xE6C43F0DF672D,
0x7824014C39E8B,
0x00C6B48AFE1B8,
0x22e2400e2f27d,
0x323b46ea19686,
0xe6c43f0df672d,
0x7824014c39e8b,
0x00c6b48afe1b8,
];

pub const C1: f64 = pow_2(104); // 2.0^104
Expand All @@ -128,6 +137,6 @@ pub const C2: f64 = pow_2(104) + pow_2(52); // 2.0^104 + 2.0^52
const fn pow_2(n: u32) -> f64 {
// Unfortunately we can't use f64::powi in const fn yet
// This is a workaround that creates the bit pattern directly
let exp = ((n as u64 + 1023) & 0x7FF) << 52;
let exp = ((n as u64 + 1023) & 0x7ff) << 52;
f64::from_bits(exp)
}
Loading
Loading