Skip to content

Commit

Permalink
ci: improve bench by data size
Browse files Browse the repository at this point in the history
  • Loading branch information
vincent-herlemont committed Jan 4, 2024
1 parent 40022c5 commit 7240432
Show file tree
Hide file tree
Showing 10 changed files with 307 additions and 22 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/build_test_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@ env:
RUST_BACKTRACE: full

jobs:
build_bench:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: true
matrix:
os: [ubuntu-latest]
toolchain: [stable]
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
- name: Setup Rust
uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.toolchain }}
override: true
- uses: extractions/setup-just@v1
- uses: hustcer/setup-nu@v3.8
with:
version: '0.85'
env:
GITHUB_TOKEN: ${{ secrets.PAT_GLOBAL }}
- name: Just version
run: just --version
- name: Build
run: just bench_build
build_test:
runs-on: ${{ matrix.os }}
strategy:
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ criterion = { version = "0.5.1" }
doc-comment = "0.3.3"
uuid = { version = "1", features = ["serde", "v4"] }
chrono = { version = "0.4", features = ["serde"] }

rand = "0.8"
once_cell = "1.19"

[features]
default = []
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Here's a drop-in, fast, embedded database for multi-platform apps (server, deskt

- Simple API 🦀.
- Support for **multiple indexes** (primary, secondary, unique, non-unique, optional).
- Minimal boilerplate.
- Minimal boilerplate see benchmarks [here](./benches).
- Transparent serialization/deserialization using [native_model](https://github.com/vincent-herlemont/native_model).
- **Automatic model migration** 🌟.
- **Thread-safe** and fully **ACID-compliant** transactions provided by [redb](https://github.com/cberner/redb).
Expand Down
24 changes: 24 additions & 0 deletions benches/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Benchmarks

Highlight the Native DB overhead compared to direct access to the underlying database ([redb](https://github.com/cberner/redb)).

The benchmarks ignore:
- [`native_model`](https://github.com/vincent-herlemont/native_model) overhead.
- Serialization overhead used by `native_model` like `bincode`,`postcard` etc.
- The fact that `redb` can copy the data using zero-copy.

## Run benchmarks

If you want to run the benchmarks, you need to install [just](https://github.com/casey/just), [nushell](https://www.nushell.sh/) and run:
```bash
just bench
```

## Results

We can see that the overhead is very low. These result are obtained with the version `0.5.3` of `native_db`.

![](./results/insert_random.png)
![](./results/get_random.png)
![](./results/remove_random.png)
![](./results/scan_random.png)
269 changes: 249 additions & 20 deletions benches/overhead_data_size.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,48 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use native_db::*;
use native_model::{native_model, Model};
use redb::TableDefinition;
use redb::{ReadableTable, TableDefinition};
use serde::{Deserialize, Serialize};
use once_cell::sync::Lazy;
use rand::prelude::SliceRandom;

// 1 byte * 10000, 10 bytes * 10000, 100 bytes * 5000, 1KB * 1000, 1MB * 100, 10MB * 10
const ITERATIONS:&'static [(usize, usize)] = &[(1, 10000), (10, 10000), (100, 5000), (1024, 1000), (1024 * 1024, 100), (10 * 1024 * 1024, 10)];

static DATABASE_BUILDER: Lazy<DatabaseBuilder> = Lazy::new(|| {
let mut builder = DatabaseBuilder::new();
builder.define::<Data>().unwrap();
builder
});

fn init_database() -> (redb::Database, Database<'static>) {
let redb_backend = redb::backends::InMemoryBackend::new();
let redb_db = redb::Database::builder()
.create_with_backend(redb_backend)
.unwrap();

let native_db = DATABASE_BUILDER.create_in_memory().unwrap();
(redb_db, native_db)
}

fn generate_random_data(redb_db: &redb::Database, native_db: &Database,nb_bytes: &usize, nb_items: &usize) -> Vec<Data> {
let data = Data {
x: 1,
data: vec![1u8; *nb_bytes],
};

let mut out = vec![];

for _ in 0..*nb_items {
let mut data = data.clone();
data.random_x();
use_redb_insert(&redb_db, data.clone());
use_native_db_insert(&native_db, data.clone());
out.push(data);
}

out
}

#[derive(Serialize, Deserialize, Clone)]
#[native_model(id = 1, version = 1)]
Expand All @@ -13,51 +53,240 @@ struct Data {
data: Vec<u8>,
}

impl Data {
fn random_x(&mut self) {
self.x = rand::random();
}
}

const TABLE_REDB: TableDefinition<u32, &'static [u8]> = TableDefinition::new("my_data");
fn use_redb(db: &redb::Database, data: Data) {

fn use_redb_insert(db: &redb::Database, data: Data) {
let rw = db.begin_write().unwrap();
{
let mut table = rw.open_table(TABLE_REDB).unwrap();
// Because native_db use native_model to encode data, we do the same here
// to remove the overhead of the encoding.
let encode = native_model::encode(&data).unwrap();
table.insert(data.x, encode.as_slice()).unwrap();
}
rw.commit().unwrap();
}

fn use_native_db(db: &native_db::Database, data: Data) {
fn use_redb_get(db: &redb::Database, x: u32) -> Data {
let ro = db.begin_read().unwrap();
let out;
{
let table = ro.open_table(TABLE_REDB).unwrap();
out = table.get(x).unwrap().map(|v| {
native_model::decode(v.value().to_vec()).unwrap().0
}).expect("Data not found");
}
out
}

fn use_redb_scan(db: &redb::Database) -> Vec<Data> {
let ro = db.begin_read().unwrap();
let out;
{
let table = ro.open_table(TABLE_REDB).unwrap();
out = table.iter().unwrap().map(|r| {
let (_, v) = r.unwrap();
native_model::decode(v.value().to_vec()).unwrap().0
}).collect::<Vec<Data>>();
}
out
}

fn redb_remove(db: &redb::Database, x: u32) {
let rw = db.begin_write().unwrap();
{
let mut table = rw.open_table(TABLE_REDB).unwrap();
table.remove(x).unwrap().expect("Data not found");
}
rw.commit().unwrap();
}

fn use_native_db_insert(db: &Database, data: Data) {
let rw = db.rw_transaction().unwrap();
rw.insert(data).unwrap();
rw.commit().unwrap();
}

fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("insert");
fn use_native_db_scan(db: &Database) -> Vec<Data> {
let r = db.r_transaction().unwrap();
let out = r.scan().primary().unwrap().all().collect::<Vec<_>>();
out
}

fn use_native_db_get(db: &Database, x: u32) -> Data {
let r = db.r_transaction().unwrap();
let out = r.get().primary(x).unwrap().unwrap();
out
}

fn native_db_remove(db: &Database, data: Data) {
let rw = db.rw_transaction().unwrap();
rw.remove(data).unwrap();
rw.commit().unwrap();
}

// Benchmarks

fn bench_get_random(c: &mut Criterion) {
let mut group = c.benchmark_group("get_random");
let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic);
group.plot_config(plot_config.clone());
group.sampling_mode(criterion::SamplingMode::Flat);

for (nb_bytes,nb_items) in ITERATIONS {
group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64));

let (redb_db, native_db) = init_database();
let data = generate_random_data(&redb_db, &native_db, nb_bytes, nb_items);

group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| {
b.iter_batched(
|| {
let item = data.choose(&mut rand::thread_rng()).unwrap();
item.x
},
|x| use_redb_get(&redb_db, x),
criterion::BatchSize::SmallInput
);
});
group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| {
b.iter_batched(
|| {
let item = data.choose(&mut rand::thread_rng()).unwrap();
item.x
},
|x| use_native_db_get(&native_db, x),
criterion::BatchSize::SmallInput
);
});
}
}


// 1 byte, 1KB, 1MB, 10MB, 100MB
for nb_bytes in [1, 1024, 1024 * 1024, 10 * 1024 * 1024, 100 * 1024 * 1024] {
group.throughput(criterion::Throughput::Bytes(nb_bytes as u64));
fn bench_scan_random(c: &mut Criterion) {
let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic);
let mut group = c.benchmark_group("scan_random");
group.plot_config(plot_config.clone());
group.sampling_mode(criterion::SamplingMode::Flat);

for (nb_bytes,nb_items) in ITERATIONS {
group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64));

let (redb_db, native_db) = init_database();
generate_random_data(&redb_db, &native_db, nb_bytes, nb_items);

group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| {
b.iter_with_large_drop(|| use_redb_scan(&redb_db));
});

group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| {
b.iter_with_large_drop(|| use_native_db_scan(&native_db));
});
}
}


fn bench_remove_random(c: &mut Criterion) {
let mut group = c.benchmark_group("remove_random");
let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic);
group.plot_config(plot_config.clone());
group.sampling_mode(criterion::SamplingMode::Flat);

for (nb_bytes,nb_items) in ITERATIONS {
group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64));

let (redb_db, native_db) = init_database();

group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| {
b.iter_batched(
|| {
let mut data = Data {
x: 1,
data: vec![1u8; *nb_bytes as usize],
};
data.random_x();
use_redb_insert(&redb_db, data.clone());
data
},
|data| redb_remove(&redb_db, data.x),
criterion::BatchSize::SmallInput
);
});

group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| {
b.iter_batched(
|| {
let mut data = Data {
x: 1,
data: vec![1u8; *nb_bytes as usize],
};
data.random_x();
use_native_db_insert(&native_db, data.clone());
data
},
|data| native_db_remove(&native_db, data),
criterion::BatchSize::SmallInput
);
});
}
}


fn bench_insert_random(c: &mut Criterion) {
let mut insert_random_group = c.benchmark_group("insert_random");
let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic);
insert_random_group.plot_config(plot_config.clone());
insert_random_group.sampling_mode(criterion::SamplingMode::Flat);

// 1 byte, 10 bytes, 100 bytes, 1KB, 1MB, 10MB
for (nb_bytes,_) in ITERATIONS {
insert_random_group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64));

let data = Data {
x: 1,
data: vec![1u8; nb_bytes as usize],
data: vec![1u8; *nb_bytes as usize],
};

let redb_backend = redb::backends::InMemoryBackend::new();
let redb_db = redb::Database::builder()
.create_with_backend(redb_backend)
.unwrap();
let (redb_db, native_db) = init_database();

group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| {
b.iter(|| use_redb(&redb_db, data.clone()))
let batch_size = match nb_bytes {
nb_bytes if *nb_bytes < 1024 => criterion::BatchSize::SmallInput,
nb_bytes if *nb_bytes < 1024 * 1024 => criterion::BatchSize::LargeInput,
_ => criterion::BatchSize::PerIteration,
};

insert_random_group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| {
b.iter_batched(
|| {
let mut data = data.clone();
data.random_x();
data
},
|data| use_redb_insert(&redb_db, data),
batch_size
);
});

let mut native_db = native_db::Database::create_in_memory().unwrap();
native_db.define::<Data>().unwrap();
group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| {
b.iter(|| use_native_db(&native_db, data.clone()))
insert_random_group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| {
b.iter_batched(
|| {
let mut data = data.clone();
data.random_x();
data
},
|data| use_native_db_insert(&native_db, data),
batch_size
);
});
}
insert_random_group.finish();
}

criterion_group!(benches, criterion_benchmark);
criterion_group!(benches, bench_insert_random, bench_scan_random, bench_get_random, bench_remove_random);
criterion_main!(benches);
Binary file added benches/results/get_random.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added benches/results/insert_random.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added benches/results/remove_random.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added benches/results/scan_random.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 7240432

Please sign in to comment.