diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 180339a..aa411dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,11 @@ name: CI -on: [push, pull_request] +on: + pull_request: + types: [ready_for_review] + push: + branches: + - main jobs: test: diff --git a/.github/workflows/pypi-publish.yml.disabled b/.github/workflows/pypi-publish.yml.disabled deleted file mode 100644 index 1779007..0000000 --- a/.github/workflows/pypi-publish.yml.disabled +++ /dev/null @@ -1,77 +0,0 @@ -name: Build and Publish Python Package - -on: - push: - tags: - - 'v*' # Trigger on version tags like v0.2.1 - workflow_dispatch: # Allow manual triggering - -permissions: - id-token: write # IMPORTANT: this permission is mandatory for trusted publishing - -jobs: - build-wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, windows-latest, macos-latest, macos-14] # macos-14 is ARM64 - - steps: - - uses: actions/checkout@v4 - - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.target }} - args: --release --out dist --features python --find-interpreter - sccache: 'true' - manylinux: auto - - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-${{ matrix.os }} - path: dist - - build-sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Build sdist - uses: PyO3/maturin-action@v1 - with: - command: sdist - args: --out dist - - - name: Upload sdist - uses: actions/upload-artifact@v4 - with: - name: wheels-sdist - path: dist - - publish: - name: Publish to PyPI - runs-on: ubuntu-latest - needs: [build-wheels, build-sdist] - environment: - name: pypi - url: https://pypi.org/p/prollytree - - steps: - - name: Download all artifacts - uses: actions/download-artifact@v4 - with: - pattern: wheels-* - path: dist - merge-multiple: true - - - name: Publish to PyPI - uses: PyO3/maturin-action@v1 - with: - command: upload - args: --non-interactive --skip-existing dist/* - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000..0a0f6ab --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,63 @@ +name: Build Python Package + +on: + pull_request: + types: [ready_for_review] + push: + branches: + - main + +jobs: + build-wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + # Linux + - os: ubuntu-latest + target: x86_64 + manylinux: auto +# - os: ubuntu-latest +# target: aarch64 +# manylinux: auto +# # Windows +# - os: windows-latest +# target: x64 +# manylinux: false +# - os: windows-latest +# target: x86 +# manylinux: false +# # macOS +# - os: macos-latest +# target: x86_64 +# manylinux: false +# - os: macos-14 +# target: aarch64 +# manylinux: false + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Build wheels (CPython only) + uses: PyO3/maturin-action@v1 + env: + # Ensure PyO3 doesn't try to use PyPy + PYO3_CROSS_PYTHON_VERSION: "3.11" + with: + target: ${{ matrix.target }} + args: --release --out dist --features python --interpreter python3.11 + sccache: 'true' + manylinux: ${{ matrix.manylinux }} + rust-toolchain: stable + + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }}-${{ matrix.target }} + path: dist diff --git a/Cargo.toml b/Cargo.toml index d524f0f..fcdfa10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,3 +67,13 @@ required-features = ["git", "sql"] [[bench]] name = "prollytree_bench" harness = false + +[[bench]] +name = "sql_bench" +harness = false +required-features = ["sql"] + +[[bench]] +name = "git_prolly_bench" +harness = false +required-features = ["git", "sql"] diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 0000000..b3ae732 --- /dev/null +++ b/benches/README.md @@ -0,0 +1,111 @@ +# ProllyTree Benchmarks + +This directory contains comprehensive benchmarks for ProllyTree, including core operations, SQL functionality, and git-prolly integration. + +## Available Benchmarks + +### 1. Core ProllyTree Operations (`prollytree_bench.rs`) +Basic tree operations benchmarks: +- **Insert**: Single and batch insertions +- **Delete**: Single and batch deletions +- **Get**: Key lookups +- **Mixed operations**: Combined insert/get/delete operations + +### 2. SQL Operations (`sql_bench.rs`) +GlueSQL integration benchmarks: +- **Insert**: SQL INSERT operations +- **Select**: Basic SELECT queries +- **Join**: JOIN operations between tables +- **Aggregation**: GROUP BY with aggregate functions +- **Update**: UPDATE operations +- **Delete**: DELETE operations +- **Index**: CREATE INDEX and indexed queries +- **Transaction**: Transaction performance +- **Complex queries**: Subqueries and complex SQL + +### 3. Git-Prolly Integration (`git_prolly_bench.rs`) +Git versioning and SQL integration: +- **Versioned commits**: Multiple version commits +- **Git-SQL integration**: Combined git versioning with SQL queries +- **Git operations**: Basic git operations (commit, branch) +- **Branch operations**: Creating and switching branches +- **Time travel queries**: Historical data queries +- **Concurrent operations**: Parallel table operations + +## Running Benchmarks + +### Run all benchmarks: +```bash +cargo bench +``` + +### Run specific benchmark suite: +```bash +# Core benchmarks only +cargo bench --bench prollytree_bench + +# SQL benchmarks only (requires sql feature) +cargo bench --bench sql_bench --features sql + +# Git-Prolly benchmarks (requires both git and sql features) +cargo bench --bench git_prolly_bench --features git,sql +``` + +### Run specific benchmark within a suite: +```bash +# Run only insert benchmarks +cargo bench insert + +# Run only SQL join benchmarks +cargo bench sql_join + +# Run only git versioning benchmarks +cargo bench git_versioned +``` + +### Generate HTML reports: +```bash +# Results will be in target/criterion/report/index.html +cargo bench -- --verbose +``` + +### Compare with baseline: +```bash +# Save current results as baseline +cargo bench -- --save-baseline my_baseline + +# Compare against baseline +cargo bench -- --baseline my_baseline +``` + +## Benchmark Configuration + +Benchmarks use different data sizes to test scalability: +- Small: 100 records +- Medium: 500-1000 records +- Large: 10,000 records + +Sample sizes and iterations are configured per benchmark group for optimal runtime. + +## Interpreting Results + +Results show: +- **Time**: Average time per operation +- **Throughput**: Operations per second +- **Variance**: Consistency of performance + +Lower times and higher throughput indicate better performance. + +## Adding New Benchmarks + +1. Add benchmark function following the pattern: +```rust +fn bench_my_operation(c: &mut Criterion) { + let mut group = c.benchmark_group("my_operation"); + // ... benchmark code +} +``` + +2. Add to appropriate criterion group at the bottom of the file + +3. Update this README with the new benchmark description \ No newline at end of file diff --git a/benches/git_prolly_bench.rs b/benches/git_prolly_bench.rs new file mode 100644 index 0000000..e5e708a --- /dev/null +++ b/benches/git_prolly_bench.rs @@ -0,0 +1,371 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(all(feature = "git", feature = "sql"))] +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +#[cfg(all(feature = "git", feature = "sql"))] +use gluesql_core::prelude::Glue; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::config::TreeConfig; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::git::GitNodeStorage; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::git::VersionedKvStore; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::sql::ProllyStorage; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::tree::{ProllyTree, Tree}; +#[cfg(all(feature = "git", feature = "sql"))] +use tempfile::TempDir; + +#[cfg(all(feature = "git", feature = "sql"))] +fn generate_versioned_data( + versions: usize, + records_per_version: usize, +) -> Vec> { + let mut data = Vec::new(); + + for v in 0..versions { + let mut version_data = Vec::new(); + for i in 0..records_per_version { + version_data.push(( + format!("key_{:04}_v{}", i, v), + format!("value_{:04}_version_{}", i, v), + )); + } + data.push(version_data); + } + + data +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_versioned_commits(c: &mut Criterion) { + let mut group = c.benchmark_group("git_versioned_commits"); + group.sample_size(10); + + for size in &[10, 50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + b.iter_batched( + || { + let temp_dir = TempDir::new().unwrap(); + let store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + (store, temp_dir, generate_versioned_data(5, size)) + }, + |(mut store, _temp_dir, data)| { + // Create multiple versions with commits + for (version, records) in data.iter().enumerate() { + for (key, value) in records { + store + .insert(key.as_bytes().to_vec(), value.as_bytes().to_vec()) + .unwrap(); + } + store.commit(&format!("Version {}", version)).unwrap(); + } + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_sql_integration(c: &mut Criterion) { + let mut group = c.benchmark_group("git_sql_integration"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table with versioning in mind + glue.execute( + "CREATE TABLE versioned_data ( + id INTEGER PRIMARY KEY, + key TEXT NOT NULL, + value TEXT NOT NULL, + version INTEGER NOT NULL, + timestamp TIMESTAMP + )" + ).await.unwrap(); + + (glue, temp_dir) + }) + }, + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Insert versioned data + for v in 0..3 { + for i in 0..size { + let sql = format!( + "INSERT INTO versioned_data (id, key, value, version, timestamp) + VALUES ({}, 'key_{}', 'value_{}_v{}', {}, TIMESTAMP '2024-01-{:02} 12:00:00')", + v * size + i, i, i, v, v, (i % 28) + 1 + ); + glue.execute(&sql).await.unwrap(); + } + } + + // Query latest version + let result = glue.execute( + "SELECT key, value, MAX(version) as latest_version + FROM versioned_data + GROUP BY key" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("git_operations"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + b.iter_batched( + || { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path().to_path_buf(); + let repo = gix::init(&repo_path).unwrap(); + let dataset_dir = repo_path.join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let storage = GitNodeStorage::<32>::new(repo, dataset_dir).unwrap(); + let tree = ProllyTree::new(storage, TreeConfig::<32>::default()); + (tree, temp_dir) + }, + |(mut tree, _temp_dir)| { + // Insert data + for i in 0..size { + let key = format!("git_key_{:06}", i).into_bytes(); + let value = format!("git_value_{:06}", i).into_bytes(); + tree.insert(key, value); + } + + // For benchmarking, we'll just measure the tree operations + // Git commit operations would require more complex setup + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_branch_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("git_branch_operations"); + group.sample_size(10); + + for size in &[50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + b.iter_batched( + || { + let temp_dir = TempDir::new().unwrap(); + let mut store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + + // Initialize with some data + for i in 0..size { + store + .insert( + format!("key_{:04}", i).into_bytes(), + format!("value_{:04}", i).into_bytes(), + ) + .unwrap(); + } + store.commit("Initial commit").unwrap(); + + (store, temp_dir) + }, + |(mut store, _temp_dir)| { + // Create and switch branches + for branch_num in 0..3 { + let branch_name = format!("feature-{}", branch_num); + store.create_branch(&branch_name).unwrap(); + store.checkout(&branch_name).unwrap(); + + // Make changes on branch + for i in 0..10 { + store + .insert( + format!("branch_{}_key_{}", branch_num, i).into_bytes(), + format!("branch_{}_value_{}", branch_num, i).into_bytes(), + ) + .unwrap(); + } + + store + .commit(&format!("Branch {} changes", branch_num)) + .unwrap(); + } + + // Switch back to main + store.checkout("main").unwrap(); + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_sql_time_travel_queries(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_time_travel"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table + glue.execute( + "CREATE TABLE time_series ( + id INTEGER PRIMARY KEY, + metric TEXT, + value DECIMAL, + timestamp TIMESTAMP + )" + ).await.unwrap(); + + // Insert time series data + for i in 0..size { + for hour in 0..24 { + let sql = format!( + "INSERT INTO time_series (id, metric, value, timestamp) + VALUES ({}, 'metric_{}', {}, TIMESTAMP '2024-01-01 {:02}:00:00')", + i * 24 + hour, i % 10, 100.0 + (i as f64) + (hour as f64 * 0.1), hour + ); + glue.execute(&sql).await.unwrap(); + } + } + + (glue, temp_dir) + }) + }, + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Time-based aggregation query + let result = glue.execute( + "SELECT + metric, + DATE_TRUNC('hour', timestamp) as hour, + AVG(value) as avg_value, + MIN(value) as min_value, + MAX(value) as max_value + FROM time_series + WHERE timestamp >= TIMESTAMP '2024-01-01 06:00:00' + AND timestamp <= TIMESTAMP '2024-01-01 18:00:00' + GROUP BY metric, DATE_TRUNC('hour', timestamp) + ORDER BY metric, hour" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_concurrent_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_operations"); + group.sample_size(10); + + for size in &[50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter(|| { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create tables sequentially (GlueSQL doesn't support concurrent operations well) + for table_num in 0..4 { + // Create table + let create_sql = format!( + "CREATE TABLE table_{} ( + id INTEGER PRIMARY KEY, + data TEXT + )", + table_num + ); + glue.execute(&create_sql).await.unwrap(); + + // Insert data + for i in 0..size { + let insert_sql = format!( + "INSERT INTO table_{} (id, data) VALUES ({}, 'data_{}')", + table_num, i, i + ); + glue.execute(&insert_sql).await.unwrap(); + } + } + }) + }); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +criterion_group!( + git_prolly_benches, + bench_git_versioned_commits, + bench_git_sql_integration, + bench_git_operations, + bench_git_branch_operations, + bench_sql_time_travel_queries, + bench_concurrent_operations +); + +#[cfg(all(feature = "git", feature = "sql"))] +criterion_main!(git_prolly_benches); + +#[cfg(not(all(feature = "git", feature = "sql")))] +fn main() { + println!("Git-Prolly benchmarks require both 'git' and 'sql' features to be enabled."); + println!("Run with: cargo bench --features git,sql"); +} diff --git a/benches/sql_bench.rs b/benches/sql_bench.rs new file mode 100644 index 0000000..62b3b8d --- /dev/null +++ b/benches/sql_bench.rs @@ -0,0 +1,420 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(feature = "sql")] +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +#[cfg(feature = "sql")] +use gluesql_core::prelude::Glue; +#[cfg(feature = "sql")] +use prollytree::sql::ProllyStorage; +#[cfg(feature = "sql")] +use tempfile::TempDir; + +#[cfg(feature = "sql")] +async fn setup_database(record_count: usize) -> (Glue>, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table + let create_sql = r#" + CREATE TABLE users ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + email TEXT NOT NULL, + age INTEGER, + city TEXT, + created_at TIMESTAMP + ) + "#; + glue.execute(create_sql).await.unwrap(); + + // Insert test data + for i in 0..record_count { + let insert_sql = format!( + "INSERT INTO users (id, name, email, age, city, created_at) + VALUES ({}, 'User{}', 'user{}@example.com', {}, 'City{}', TIMESTAMP '2024-01-{:02} 12:00:00')", + i, i, i, 20 + (i % 50), i % 10, (i % 28) + 1 + ); + glue.execute(&insert_sql).await.unwrap(); + } + + (glue, temp_dir) +} + +#[cfg(feature = "sql")] +fn bench_sql_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_insert"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter(|| { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table + glue.execute("CREATE TABLE bench_table (id INTEGER PRIMARY KEY, data TEXT)") + .await + .unwrap(); + + // Insert records + for i in 0..size { + let sql = format!( + "INSERT INTO bench_table (id, data) VALUES ({}, 'data_{}')", + i, i + ); + glue.execute(&sql).await.unwrap(); + } + }) + }); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_select(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_select"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Simple SELECT + let result = glue + .execute("SELECT * FROM users WHERE age > 30") + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_join(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_join"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || { + runtime.block_on(async { + let (mut glue, temp_dir) = setup_database(size).await; + + // Create orders table + glue.execute( + "CREATE TABLE orders ( + id INTEGER PRIMARY KEY, + user_id INTEGER, + amount DECIMAL, + status TEXT + )", + ) + .await + .unwrap(); + + // Insert orders + for i in 0..size * 2 { + let sql = format!( + "INSERT INTO orders (id, user_id, amount, status) + VALUES ({}, {}, {}, '{}')", + i, + i % size, + 100.0 + (i as f64), + if i % 2 == 0 { "completed" } else { "pending" } + ); + glue.execute(&sql).await.unwrap(); + } + + (glue, temp_dir) + }) + }, + |(mut glue, _temp_dir)| { + runtime.block_on(async { + let result = glue + .execute( + "SELECT u.name, COUNT(o.id) as order_count, SUM(o.amount) as total + FROM users u + JOIN orders o ON u.id = o.user_id + WHERE o.status = 'completed' + GROUP BY u.name", + ) + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_aggregation(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_aggregation"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + let result = glue + .execute( + "SELECT city, + COUNT(*) as user_count, + AVG(age) as avg_age, + MIN(age) as min_age, + MAX(age) as max_age + FROM users + GROUP BY city + HAVING COUNT(*) > 5 + ORDER BY user_count DESC", + ) + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_update(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_update"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Update multiple records + let result = glue + .execute( + "UPDATE users + SET age = age + 1, + city = 'UpdatedCity' + WHERE age < 30", + ) + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_delete(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_delete"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Delete records + let result = glue + .execute("DELETE FROM users WHERE age > 50") + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_index_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_index"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Create index + glue.execute("CREATE INDEX idx_users_age ON users(age)") + .await + .unwrap(); + + // Query using index + let result = glue + .execute("SELECT * FROM users WHERE age = 25") + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_transaction(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_transaction"); + group.sample_size(10); + + for size in &[10, 50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(100)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Begin transaction + glue.execute("BEGIN").await.unwrap(); + + // Multiple operations in transaction + for i in 0..size { + let sql = format!("UPDATE users SET age = age + 1 WHERE id = {}", i); + glue.execute(&sql).await.unwrap(); + } + + // Commit transaction + glue.execute("COMMIT").await.unwrap(); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_complex_query(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_complex"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Complex query with subqueries + let result = glue + .execute( + "SELECT + u.city, + COUNT(DISTINCT u.id) as user_count, + (SELECT COUNT(*) + FROM users u2 + WHERE u2.city = u.city AND u2.age > 40) as senior_count, + AVG(u.age) as avg_age + FROM users u + WHERE u.id IN ( + SELECT id FROM users + WHERE age BETWEEN 25 AND 45 + ) + GROUP BY u.city + ORDER BY user_count DESC, avg_age ASC", + ) + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +criterion_group!( + sql_benches, + bench_sql_insert, + bench_sql_select, + bench_sql_join, + bench_sql_aggregation, + bench_sql_update, + bench_sql_delete, + bench_sql_index_operations, + bench_sql_transaction, + bench_sql_complex_query +); + +#[cfg(feature = "sql")] +criterion_main!(sql_benches); + +#[cfg(not(feature = "sql"))] +fn main() { + println!("SQL benchmarks require the 'sql' feature to be enabled."); + println!("Run with: cargo bench --features sql"); +} diff --git a/examples/git_diff_example.rs b/examples/git_diff.rs similarity index 100% rename from examples/git_diff_example.rs rename to examples/git_diff.rs diff --git a/examples/git_merge_example.rs b/examples/git_merge.rs similarity index 100% rename from examples/git_merge_example.rs rename to examples/git_merge.rs diff --git a/examples/sql_example.rs b/examples/git_sql.rs similarity index 100% rename from examples/sql_example.rs rename to examples/git_sql.rs diff --git a/run_benchmarks.sh b/run_benchmarks.sh new file mode 100755 index 0000000..e19ecf3 --- /dev/null +++ b/run_benchmarks.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Script to run ProllyTree benchmarks + +echo "🚀 Running ProllyTree Benchmarks" +echo "================================" + +echo "" +echo "📊 1. Running Core Tree Benchmarks..." +cargo bench --bench prollytree_bench --quiet -- --quick + +echo "" +echo "📊 2. Running SQL Benchmarks..." +cargo bench --bench sql_bench --features sql --quiet -- --quick + +echo "" +echo "📊 3. Running Git-Prolly Integration Benchmarks..." +cargo bench --bench git_prolly_bench --features "git sql" --quiet -- --quick + +echo "" +echo "✅ All benchmarks completed!" +echo "" +echo "📈 To view detailed results, run:" +echo " cargo bench --bench " +echo "" +echo "📊 Available benchmarks:" +echo " - prollytree_bench: Core tree operations" +echo " - sql_bench: SQL operations (requires --features sql)" +echo " - git_prolly_bench: Git integration (requires --features git,sql)" \ No newline at end of file diff --git a/tests/sql_integration_test.rs b/tests/sql_integration_test.rs deleted file mode 100644 index ba2de99..0000000 --- a/tests/sql_integration_test.rs +++ /dev/null @@ -1,351 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#[cfg(feature = "sql")] -mod integration_tests { - use gluesql_core::{error::Result, executor::Payload, prelude::Glue}; - use prollytree::sql::ProllyStorage; - use tempfile::TempDir; - - async fn setup_test_db() -> Result<(TempDir, Glue>)> { - let temp_dir = TempDir::new().map_err(|e| { - gluesql_core::error::Error::StorageMsg(format!("Failed to create temp dir: {}", e)) - })?; - - // Initialize git repository - std::process::Command::new("git") - .arg("init") - .current_dir(temp_dir.path()) - .output() - .map_err(|e| { - gluesql_core::error::Error::StorageMsg(format!("Failed to init git: {}", e)) - })?; - - // Create dataset subdirectory - let dataset_path = temp_dir.path().join("dataset"); - std::fs::create_dir(&dataset_path).map_err(|e| { - gluesql_core::error::Error::StorageMsg(format!("Failed to create dataset dir: {}", e)) - })?; - - let storage = ProllyStorage::<32>::init(&dataset_path)?; - let glue = Glue::new(storage); - - Ok((temp_dir, glue)) - } - - #[tokio::test] - async fn test_create_table_and_insert() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Create table - let create_sql = r#" - CREATE TABLE test_table ( - id INTEGER, - name TEXT, - value INTEGER - ) - "#; - - let result = glue.execute(create_sql).await?; - assert_eq!(result.len(), 1); - assert!(matches!(result[0], Payload::Create)); - - // Insert data - let insert_sql = r#" - INSERT INTO test_table (id, name, value) VALUES - (1, 'first', 100), - (2, 'second', 200), - (3, 'third', 300) - "#; - - let result = glue.execute(insert_sql).await?; - assert_eq!(result.len(), 1); - assert!(matches!(result[0], Payload::Insert(3))); - - Ok(()) - } - - #[tokio::test] - async fn test_select_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup data - glue.execute( - r#" - CREATE TABLE products ( - id INTEGER, - name TEXT, - price INTEGER, - category TEXT - ) - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO products (id, name, price, category) VALUES - (1, 'Laptop', 1000, 'Electronics'), - (2, 'Book', 20, 'Education'), - (3, 'Phone', 800, 'Electronics'), - (4, 'Notebook', 5, 'Education') - "#, - ) - .await?; - - // Test SELECT * - let result = glue.execute("SELECT * FROM products").await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels.len(), 4); - assert_eq!(rows.len(), 4); - assert_eq!(labels, &vec!["id", "name", "price", "category"]); - } else { - panic!("Expected Select payload"); - } - - // Test SELECT with WHERE - let result = glue - .execute("SELECT name, price FROM products WHERE category = 'Electronics'") - .await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["name", "price"]); - assert_eq!(rows.len(), 2); - } else { - panic!("Expected Select payload"); - } - - // Test ORDER BY - let result = glue - .execute("SELECT name FROM products ORDER BY price") - .await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["name"]); - assert_eq!(rows.len(), 4); - } else { - panic!("Expected Select payload"); - } - - Ok(()) - } - - #[tokio::test] - async fn test_join_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup tables - glue.execute( - r#" - CREATE TABLE customers ( - id INTEGER, - name TEXT, - email TEXT - ) - "#, - ) - .await?; - - glue.execute( - r#" - CREATE TABLE orders ( - id INTEGER, - customer_id INTEGER, - product TEXT, - amount INTEGER - ) - "#, - ) - .await?; - - // Insert data - glue.execute( - r#" - INSERT INTO customers (id, name, email) VALUES - (1, 'Alice', 'alice@example.com'), - (2, 'Bob', 'bob@example.com') - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO orders (id, customer_id, product, amount) VALUES - (1, 1, 'Laptop', 1000), - (2, 1, 'Mouse', 50), - (3, 2, 'Keyboard', 100) - "#, - ) - .await?; - - // Test JOIN - let result = glue - .execute( - r#" - SELECT c.name, o.product, o.amount - FROM customers c - JOIN orders o ON c.id = o.customer_id - ORDER BY c.name, o.product - "#, - ) - .await?; - - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["name", "product", "amount"]); - assert_eq!(rows.len(), 3); - } else { - panic!("Expected Select payload"); - } - - Ok(()) - } - - #[tokio::test] - async fn test_update_delete_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup data - glue.execute( - r#" - CREATE TABLE items ( - id INTEGER, - name TEXT, - quantity INTEGER - ) - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO items (id, name, quantity) VALUES - (1, 'Item1', 10), - (2, 'Item2', 20), - (3, 'Item3', 30) - "#, - ) - .await?; - - // Test UPDATE - let result = glue - .execute("UPDATE items SET quantity = 15 WHERE id = 1") - .await?; - assert!(matches!(result[0], Payload::Update(1))); - - // Verify update - let result = glue - .execute("SELECT quantity FROM items WHERE id = 1") - .await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 1); - } - - // Test DELETE - let result = glue.execute("DELETE FROM items WHERE id = 3").await?; - assert!(matches!(result[0], Payload::Delete(1))); - - // Verify delete - let result = glue.execute("SELECT * FROM items").await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 2); - } - - Ok(()) - } - - #[tokio::test] - async fn test_aggregation_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup data - glue.execute( - r#" - CREATE TABLE sales ( - id INTEGER, - product TEXT, - quantity INTEGER, - price INTEGER - ) - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO sales (id, product, quantity, price) VALUES - (1, 'A', 2, 100), - (2, 'B', 1, 200), - (3, 'A', 3, 150), - (4, 'C', 1, 300) - "#, - ) - .await?; - - // Test COUNT - let result = glue.execute("SELECT COUNT(id) FROM sales").await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 1); - } - - // Test GROUP BY with COUNT - let result = glue - .execute("SELECT product, COUNT(id) FROM sales GROUP BY product ORDER BY product") - .await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["product", "COUNT(id)"]); - assert_eq!(rows.len(), 3); // A, B, C - } - - // Test AVG - let result = glue.execute("SELECT AVG(price) FROM sales").await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 1); - } - - Ok(()) - } - - #[tokio::test] - async fn test_schema_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Create multiple tables - glue.execute( - r#" - CREATE TABLE table1 ( - id INTEGER, - name TEXT - ) - "#, - ) - .await?; - - glue.execute( - r#" - CREATE TABLE table2 ( - id INTEGER, - value INTEGER - ) - "#, - ) - .await?; - - // Test that we can query both tables - let result = glue.execute("SELECT * FROM table1").await?; - assert!(matches!(result[0], Payload::Select { .. })); - - let result = glue.execute("SELECT * FROM table2").await?; - assert!(matches!(result[0], Payload::Select { .. })); - - Ok(()) - } -}