From 6e779ae5d251b25630180b6a523013f27be3a1f6 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 16:24:28 -0700 Subject: [PATCH 1/8] Clean up code structure and add pypi to ci build --- .../{pypi-publish.yml.disabled => pypi.yml} | 36 +- examples/{git_diff_example.rs => git_diff.rs} | 0 .../{git_merge_example.rs => git_merge.rs} | 0 examples/{sql_example.rs => git_sql.rs} | 0 tests/sql_integration_test.rs | 351 ------------------ 5 files changed, 16 insertions(+), 371 deletions(-) rename .github/workflows/{pypi-publish.yml.disabled => pypi.yml} (68%) rename examples/{git_diff_example.rs => git_diff.rs} (100%) rename examples/{git_merge_example.rs => git_merge.rs} (100%) rename examples/{sql_example.rs => git_sql.rs} (100%) delete mode 100644 tests/sql_integration_test.rs diff --git a/.github/workflows/pypi-publish.yml.disabled b/.github/workflows/pypi.yml similarity index 68% rename from .github/workflows/pypi-publish.yml.disabled rename to .github/workflows/pypi.yml index 1779007..2c83b39 100644 --- a/.github/workflows/pypi-publish.yml.disabled +++ b/.github/workflows/pypi.yml @@ -1,13 +1,6 @@ -name: Build and Publish Python Package +name: Build Python Package -on: - push: - tags: - - 'v*' # Trigger on version tags like v0.2.1 - workflow_dispatch: # Allow manual triggering - -permissions: - id-token: write # IMPORTANT: this permission is mandatory for trusted publishing +on: [push, pull_request] jobs: build-wheels: @@ -52,13 +45,10 @@ jobs: name: wheels-sdist path: dist - publish: - name: Publish to PyPI + collect-artifacts: + name: Collect all artifacts runs-on: ubuntu-latest needs: [build-wheels, build-sdist] - environment: - name: pypi - url: https://pypi.org/p/prollytree steps: - name: Download all artifacts @@ -68,10 +58,16 @@ jobs: path: dist merge-multiple: true - - name: Publish to PyPI - uses: PyO3/maturin-action@v1 + - name: List all built artifacts + run: | + echo "📦 Built artifacts:" + ls -la dist/ + echo "" + echo "Total artifacts: $(ls -1 dist/ | wc -l)" + + - name: Upload combined artifacts + uses: actions/upload-artifact@v4 with: - command: upload - args: --non-interactive --skip-existing dist/* - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + name: all-python-artifacts + path: dist/ + retention-days: 90 \ No newline at end of file diff --git a/examples/git_diff_example.rs b/examples/git_diff.rs similarity index 100% rename from examples/git_diff_example.rs rename to examples/git_diff.rs diff --git a/examples/git_merge_example.rs b/examples/git_merge.rs similarity index 100% rename from examples/git_merge_example.rs rename to examples/git_merge.rs diff --git a/examples/sql_example.rs b/examples/git_sql.rs similarity index 100% rename from examples/sql_example.rs rename to examples/git_sql.rs diff --git a/tests/sql_integration_test.rs b/tests/sql_integration_test.rs deleted file mode 100644 index ba2de99..0000000 --- a/tests/sql_integration_test.rs +++ /dev/null @@ -1,351 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#[cfg(feature = "sql")] -mod integration_tests { - use gluesql_core::{error::Result, executor::Payload, prelude::Glue}; - use prollytree::sql::ProllyStorage; - use tempfile::TempDir; - - async fn setup_test_db() -> Result<(TempDir, Glue>)> { - let temp_dir = TempDir::new().map_err(|e| { - gluesql_core::error::Error::StorageMsg(format!("Failed to create temp dir: {}", e)) - })?; - - // Initialize git repository - std::process::Command::new("git") - .arg("init") - .current_dir(temp_dir.path()) - .output() - .map_err(|e| { - gluesql_core::error::Error::StorageMsg(format!("Failed to init git: {}", e)) - })?; - - // Create dataset subdirectory - let dataset_path = temp_dir.path().join("dataset"); - std::fs::create_dir(&dataset_path).map_err(|e| { - gluesql_core::error::Error::StorageMsg(format!("Failed to create dataset dir: {}", e)) - })?; - - let storage = ProllyStorage::<32>::init(&dataset_path)?; - let glue = Glue::new(storage); - - Ok((temp_dir, glue)) - } - - #[tokio::test] - async fn test_create_table_and_insert() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Create table - let create_sql = r#" - CREATE TABLE test_table ( - id INTEGER, - name TEXT, - value INTEGER - ) - "#; - - let result = glue.execute(create_sql).await?; - assert_eq!(result.len(), 1); - assert!(matches!(result[0], Payload::Create)); - - // Insert data - let insert_sql = r#" - INSERT INTO test_table (id, name, value) VALUES - (1, 'first', 100), - (2, 'second', 200), - (3, 'third', 300) - "#; - - let result = glue.execute(insert_sql).await?; - assert_eq!(result.len(), 1); - assert!(matches!(result[0], Payload::Insert(3))); - - Ok(()) - } - - #[tokio::test] - async fn test_select_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup data - glue.execute( - r#" - CREATE TABLE products ( - id INTEGER, - name TEXT, - price INTEGER, - category TEXT - ) - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO products (id, name, price, category) VALUES - (1, 'Laptop', 1000, 'Electronics'), - (2, 'Book', 20, 'Education'), - (3, 'Phone', 800, 'Electronics'), - (4, 'Notebook', 5, 'Education') - "#, - ) - .await?; - - // Test SELECT * - let result = glue.execute("SELECT * FROM products").await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels.len(), 4); - assert_eq!(rows.len(), 4); - assert_eq!(labels, &vec!["id", "name", "price", "category"]); - } else { - panic!("Expected Select payload"); - } - - // Test SELECT with WHERE - let result = glue - .execute("SELECT name, price FROM products WHERE category = 'Electronics'") - .await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["name", "price"]); - assert_eq!(rows.len(), 2); - } else { - panic!("Expected Select payload"); - } - - // Test ORDER BY - let result = glue - .execute("SELECT name FROM products ORDER BY price") - .await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["name"]); - assert_eq!(rows.len(), 4); - } else { - panic!("Expected Select payload"); - } - - Ok(()) - } - - #[tokio::test] - async fn test_join_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup tables - glue.execute( - r#" - CREATE TABLE customers ( - id INTEGER, - name TEXT, - email TEXT - ) - "#, - ) - .await?; - - glue.execute( - r#" - CREATE TABLE orders ( - id INTEGER, - customer_id INTEGER, - product TEXT, - amount INTEGER - ) - "#, - ) - .await?; - - // Insert data - glue.execute( - r#" - INSERT INTO customers (id, name, email) VALUES - (1, 'Alice', 'alice@example.com'), - (2, 'Bob', 'bob@example.com') - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO orders (id, customer_id, product, amount) VALUES - (1, 1, 'Laptop', 1000), - (2, 1, 'Mouse', 50), - (3, 2, 'Keyboard', 100) - "#, - ) - .await?; - - // Test JOIN - let result = glue - .execute( - r#" - SELECT c.name, o.product, o.amount - FROM customers c - JOIN orders o ON c.id = o.customer_id - ORDER BY c.name, o.product - "#, - ) - .await?; - - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["name", "product", "amount"]); - assert_eq!(rows.len(), 3); - } else { - panic!("Expected Select payload"); - } - - Ok(()) - } - - #[tokio::test] - async fn test_update_delete_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup data - glue.execute( - r#" - CREATE TABLE items ( - id INTEGER, - name TEXT, - quantity INTEGER - ) - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO items (id, name, quantity) VALUES - (1, 'Item1', 10), - (2, 'Item2', 20), - (3, 'Item3', 30) - "#, - ) - .await?; - - // Test UPDATE - let result = glue - .execute("UPDATE items SET quantity = 15 WHERE id = 1") - .await?; - assert!(matches!(result[0], Payload::Update(1))); - - // Verify update - let result = glue - .execute("SELECT quantity FROM items WHERE id = 1") - .await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 1); - } - - // Test DELETE - let result = glue.execute("DELETE FROM items WHERE id = 3").await?; - assert!(matches!(result[0], Payload::Delete(1))); - - // Verify delete - let result = glue.execute("SELECT * FROM items").await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 2); - } - - Ok(()) - } - - #[tokio::test] - async fn test_aggregation_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Setup data - glue.execute( - r#" - CREATE TABLE sales ( - id INTEGER, - product TEXT, - quantity INTEGER, - price INTEGER - ) - "#, - ) - .await?; - - glue.execute( - r#" - INSERT INTO sales (id, product, quantity, price) VALUES - (1, 'A', 2, 100), - (2, 'B', 1, 200), - (3, 'A', 3, 150), - (4, 'C', 1, 300) - "#, - ) - .await?; - - // Test COUNT - let result = glue.execute("SELECT COUNT(id) FROM sales").await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 1); - } - - // Test GROUP BY with COUNT - let result = glue - .execute("SELECT product, COUNT(id) FROM sales GROUP BY product ORDER BY product") - .await?; - if let Payload::Select { labels, rows } = &result[0] { - assert_eq!(labels, &vec!["product", "COUNT(id)"]); - assert_eq!(rows.len(), 3); // A, B, C - } - - // Test AVG - let result = glue.execute("SELECT AVG(price) FROM sales").await?; - if let Payload::Select { rows, .. } = &result[0] { - assert_eq!(rows.len(), 1); - } - - Ok(()) - } - - #[tokio::test] - async fn test_schema_operations() -> Result<()> { - let (_temp_dir, mut glue) = setup_test_db().await?; - - // Create multiple tables - glue.execute( - r#" - CREATE TABLE table1 ( - id INTEGER, - name TEXT - ) - "#, - ) - .await?; - - glue.execute( - r#" - CREATE TABLE table2 ( - id INTEGER, - value INTEGER - ) - "#, - ) - .await?; - - // Test that we can query both tables - let result = glue.execute("SELECT * FROM table1").await?; - assert!(matches!(result[0], Payload::Select { .. })); - - let result = glue.execute("SELECT * FROM table2").await?; - assert!(matches!(result[0], Payload::Select { .. })); - - Ok(()) - } -} From 367a1a2e333d4f356aaad75e5fb36a9d948197d1 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 16:48:48 -0700 Subject: [PATCH 2/8] fix pypi build --- .github/workflows/pypi.yml | 75 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 2c83b39..c9dbd66 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -8,23 +8,56 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, windows-latest, macos-latest, macos-14] # macos-14 is ARM64 + include: + # Linux + - os: ubuntu-latest + target: x86_64 + manylinux: auto +# - os: ubuntu-latest +# target: aarch64 +# manylinux: auto +# # Windows +# - os: windows-latest +# target: x64 +# manylinux: false +# - os: windows-latest +# target: x86 +# manylinux: false + # macOS + - os: macos-latest + target: x86_64 + manylinux: false +# - os: macos-14 +# target: aarch64 +# manylinux: false steps: - uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: | + 3.11 + - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} args: --release --out dist --features python --find-interpreter sccache: 'true' - manylinux: auto + manylinux: ${{ matrix.manylinux }} + rust-toolchain: stable + before-script-linux: | + # Install any system dependencies if needed + if [[ "${{ matrix.target }}" == "aarch64" ]]; then + echo "Setting up cross-compilation for aarch64" + fi - name: Upload wheels uses: actions/upload-artifact@v4 with: - name: wheels-${{ matrix.os }} + name: wheels-${{ matrix.os }}-${{ matrix.target }} path: dist build-sdist: @@ -33,11 +66,17 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Build sdist uses: PyO3/maturin-action@v1 with: command: sdist args: --out dist + rust-toolchain: stable - name: Upload sdist uses: actions/upload-artifact@v4 @@ -45,10 +84,35 @@ jobs: name: wheels-sdist path: dist + test-wheel: + name: Test built wheel + runs-on: ubuntu-latest + needs: [build-wheels] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Download Linux wheel + uses: actions/download-artifact@v4 + with: + name: wheels-ubuntu-latest-x86_64 + path: dist + + - name: Install and test wheel + run: | + pip install dist/*.whl + python -c "from prollytree import ProllyTree; tree = ProllyTree(); tree.insert(b'test', b'value'); assert tree.find(b'test') == b'value'" + echo "✅ Wheel test passed!" + collect-artifacts: name: Collect all artifacts runs-on: ubuntu-latest - needs: [build-wheels, build-sdist] + needs: [build-wheels, build-sdist, test-wheel] steps: - name: Download all artifacts @@ -64,6 +128,9 @@ jobs: ls -la dist/ echo "" echo "Total artifacts: $(ls -1 dist/ | wc -l)" + echo "" + echo "Artifact details:" + du -h dist/* - name: Upload combined artifacts uses: actions/upload-artifact@v4 From e1cf143c7086aae4dfa6cbd67649f3ca13dd5539 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 16:49:50 -0700 Subject: [PATCH 3/8] only triger at pull request --- .github/workflows/pypi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index c9dbd66..39a598b 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -1,6 +1,6 @@ name: Build Python Package -on: [push, pull_request] +on: [pull_request] jobs: build-wheels: From 1f0f84038b93189a1818713358b3f695fb4e7a9a Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 16:51:26 -0700 Subject: [PATCH 4/8] more changes --- .github/workflows/ci.yml | 6 +++++- .github/workflows/pypi.yml | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 180339a..e99fae5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,10 @@ name: CI -on: [push, pull_request] +on: + pull_request: + push: + branches: + - main jobs: test: diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 39a598b..ffeabc8 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -1,6 +1,10 @@ name: Build Python Package -on: [pull_request] +on: + pull_request: + push: + branches: + - main jobs: build-wheels: From 713730b7f0d3b9e7f106dddec7de200a676da7ec Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 16:53:25 -0700 Subject: [PATCH 5/8] pr fix --- .github/workflows/ci.yml | 1 + .github/workflows/pypi.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e99fae5..0ac4af2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,6 +2,7 @@ name: CI on: pull_request: + types: [opened, synchronize, reopened, ready_for_review] push: branches: - main diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index ffeabc8..e3468e0 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -2,6 +2,7 @@ name: Build Python Package on: pull_request: + types: [opened, synchronize, reopened, ready_for_review] push: branches: - main From 82b9daee6c7c4c35d770b6803c57a34830392aba Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 17:12:10 -0700 Subject: [PATCH 6/8] add benchmark --- .github/workflows/{pypi.yml => python.yml} | 15 +- Cargo.toml | 10 + benches/README.md | 111 ++++++ benches/git_prolly_bench.rs | 360 ++++++++++++++++++ benches/sql_bench.rs | 403 +++++++++++++++++++++ run_benchmarks.sh | 41 +++ 6 files changed, 931 insertions(+), 9 deletions(-) rename .github/workflows/{pypi.yml => python.yml} (90%) create mode 100644 benches/README.md create mode 100644 benches/git_prolly_bench.rs create mode 100644 benches/sql_bench.rs create mode 100755 run_benchmarks.sh diff --git a/.github/workflows/pypi.yml b/.github/workflows/python.yml similarity index 90% rename from .github/workflows/pypi.yml rename to .github/workflows/python.yml index e3468e0..d9e3316 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/python.yml @@ -42,22 +42,19 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: | - 3.11 + python-version: '3.11' - - name: Build wheels + - name: Build wheels (CPython only) uses: PyO3/maturin-action@v1 + env: + # Ensure PyO3 doesn't try to use PyPy + PYO3_CROSS_PYTHON_VERSION: "3.11" with: target: ${{ matrix.target }} - args: --release --out dist --features python --find-interpreter + args: --release --out dist --features python --interpreter python3.11 sccache: 'true' manylinux: ${{ matrix.manylinux }} rust-toolchain: stable - before-script-linux: | - # Install any system dependencies if needed - if [[ "${{ matrix.target }}" == "aarch64" ]]; then - echo "Setting up cross-compilation for aarch64" - fi - name: Upload wheels uses: actions/upload-artifact@v4 diff --git a/Cargo.toml b/Cargo.toml index d524f0f..fcdfa10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,3 +67,13 @@ required-features = ["git", "sql"] [[bench]] name = "prollytree_bench" harness = false + +[[bench]] +name = "sql_bench" +harness = false +required-features = ["sql"] + +[[bench]] +name = "git_prolly_bench" +harness = false +required-features = ["git", "sql"] diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 0000000..b3ae732 --- /dev/null +++ b/benches/README.md @@ -0,0 +1,111 @@ +# ProllyTree Benchmarks + +This directory contains comprehensive benchmarks for ProllyTree, including core operations, SQL functionality, and git-prolly integration. + +## Available Benchmarks + +### 1. Core ProllyTree Operations (`prollytree_bench.rs`) +Basic tree operations benchmarks: +- **Insert**: Single and batch insertions +- **Delete**: Single and batch deletions +- **Get**: Key lookups +- **Mixed operations**: Combined insert/get/delete operations + +### 2. SQL Operations (`sql_bench.rs`) +GlueSQL integration benchmarks: +- **Insert**: SQL INSERT operations +- **Select**: Basic SELECT queries +- **Join**: JOIN operations between tables +- **Aggregation**: GROUP BY with aggregate functions +- **Update**: UPDATE operations +- **Delete**: DELETE operations +- **Index**: CREATE INDEX and indexed queries +- **Transaction**: Transaction performance +- **Complex queries**: Subqueries and complex SQL + +### 3. Git-Prolly Integration (`git_prolly_bench.rs`) +Git versioning and SQL integration: +- **Versioned commits**: Multiple version commits +- **Git-SQL integration**: Combined git versioning with SQL queries +- **Git operations**: Basic git operations (commit, branch) +- **Branch operations**: Creating and switching branches +- **Time travel queries**: Historical data queries +- **Concurrent operations**: Parallel table operations + +## Running Benchmarks + +### Run all benchmarks: +```bash +cargo bench +``` + +### Run specific benchmark suite: +```bash +# Core benchmarks only +cargo bench --bench prollytree_bench + +# SQL benchmarks only (requires sql feature) +cargo bench --bench sql_bench --features sql + +# Git-Prolly benchmarks (requires both git and sql features) +cargo bench --bench git_prolly_bench --features git,sql +``` + +### Run specific benchmark within a suite: +```bash +# Run only insert benchmarks +cargo bench insert + +# Run only SQL join benchmarks +cargo bench sql_join + +# Run only git versioning benchmarks +cargo bench git_versioned +``` + +### Generate HTML reports: +```bash +# Results will be in target/criterion/report/index.html +cargo bench -- --verbose +``` + +### Compare with baseline: +```bash +# Save current results as baseline +cargo bench -- --save-baseline my_baseline + +# Compare against baseline +cargo bench -- --baseline my_baseline +``` + +## Benchmark Configuration + +Benchmarks use different data sizes to test scalability: +- Small: 100 records +- Medium: 500-1000 records +- Large: 10,000 records + +Sample sizes and iterations are configured per benchmark group for optimal runtime. + +## Interpreting Results + +Results show: +- **Time**: Average time per operation +- **Throughput**: Operations per second +- **Variance**: Consistency of performance + +Lower times and higher throughput indicate better performance. + +## Adding New Benchmarks + +1. Add benchmark function following the pattern: +```rust +fn bench_my_operation(c: &mut Criterion) { + let mut group = c.benchmark_group("my_operation"); + // ... benchmark code +} +``` + +2. Add to appropriate criterion group at the bottom of the file + +3. Update this README with the new benchmark description \ No newline at end of file diff --git a/benches/git_prolly_bench.rs b/benches/git_prolly_bench.rs new file mode 100644 index 0000000..3161ede --- /dev/null +++ b/benches/git_prolly_bench.rs @@ -0,0 +1,360 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(all(feature = "git", feature = "sql"))] +use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::git::GitNodeStorage; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::git::VersionedKvStore; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::config::TreeConfig; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::tree::{ProllyTree, Tree}; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::sql::ProllyStorage; +#[cfg(all(feature = "git", feature = "sql"))] +use gluesql_core::prelude::Glue; +#[cfg(all(feature = "git", feature = "sql"))] +use tempfile::TempDir; + +#[cfg(all(feature = "git", feature = "sql"))] +fn generate_versioned_data(versions: usize, records_per_version: usize) -> Vec> { + let mut data = Vec::new(); + + for v in 0..versions { + let mut version_data = Vec::new(); + for i in 0..records_per_version { + version_data.push(( + format!("key_{:04}_v{}", i, v), + format!("value_{:04}_version_{}", i, v) + )); + } + data.push(version_data); + } + + data +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_versioned_commits(c: &mut Criterion) { + let mut group = c.benchmark_group("git_versioned_commits"); + group.sample_size(10); + + for size in &[10, 50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + b.iter_batched( + || { + let temp_dir = TempDir::new().unwrap(); + let store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + (store, temp_dir, generate_versioned_data(5, size)) + }, + |(mut store, _temp_dir, data)| { + // Create multiple versions with commits + for (version, records) in data.iter().enumerate() { + for (key, value) in records { + store.insert(key.as_bytes().to_vec(), value.as_bytes().to_vec()).unwrap(); + } + store.commit(&format!("Version {}", version)).unwrap(); + } + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_sql_integration(c: &mut Criterion) { + let mut group = c.benchmark_group("git_sql_integration"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table with versioning in mind + glue.execute( + "CREATE TABLE versioned_data ( + id INTEGER PRIMARY KEY, + key TEXT NOT NULL, + value TEXT NOT NULL, + version INTEGER NOT NULL, + timestamp TIMESTAMP + )" + ).await.unwrap(); + + (glue, temp_dir) + }) + }, + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Insert versioned data + for v in 0..3 { + for i in 0..size { + let sql = format!( + "INSERT INTO versioned_data (id, key, value, version, timestamp) + VALUES ({}, 'key_{}', 'value_{}_v{}', {}, TIMESTAMP '2024-01-{:02} 12:00:00')", + v * size + i, i, i, v, v, (i % 28) + 1 + ); + glue.execute(&sql).await.unwrap(); + } + } + + // Query latest version + let result = glue.execute( + "SELECT key, value, MAX(version) as latest_version + FROM versioned_data + GROUP BY key" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("git_operations"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + b.iter_batched( + || { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path().to_path_buf(); + let repo = gix::init(&repo_path).unwrap(); + let dataset_dir = repo_path.join("dataset"); + std::fs::create_dir_all(&dataset_dir).unwrap(); + let storage = GitNodeStorage::<32>::new(repo, dataset_dir).unwrap(); + let tree = ProllyTree::new(storage, TreeConfig::<32>::default()); + (tree, temp_dir) + }, + |(mut tree, _temp_dir)| { + // Insert data + for i in 0..size { + let key = format!("git_key_{:06}", i).into_bytes(); + let value = format!("git_value_{:06}", i).into_bytes(); + tree.insert(key, value); + } + + // For benchmarking, we'll just measure the tree operations + // Git commit operations would require more complex setup + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_git_branch_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("git_branch_operations"); + group.sample_size(10); + + for size in &[50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + b.iter_batched( + || { + let temp_dir = TempDir::new().unwrap(); + let mut store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); + + // Initialize with some data + for i in 0..size { + store.insert( + format!("key_{:04}", i).into_bytes(), + format!("value_{:04}", i).into_bytes() + ).unwrap(); + } + store.commit("Initial commit").unwrap(); + + (store, temp_dir) + }, + |(mut store, _temp_dir)| { + // Create and switch branches + for branch_num in 0..3 { + let branch_name = format!("feature-{}", branch_num); + store.create_branch(&branch_name).unwrap(); + store.checkout(&branch_name).unwrap(); + + // Make changes on branch + for i in 0..10 { + store.insert( + format!("branch_{}_key_{}", branch_num, i).into_bytes(), + format!("branch_{}_value_{}", branch_num, i).into_bytes() + ).unwrap(); + } + + store.commit(&format!("Branch {} changes", branch_num)).unwrap(); + } + + // Switch back to main + store.checkout("main").unwrap(); + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_sql_time_travel_queries(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_time_travel"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table + glue.execute( + "CREATE TABLE time_series ( + id INTEGER PRIMARY KEY, + metric TEXT, + value DECIMAL, + timestamp TIMESTAMP + )" + ).await.unwrap(); + + // Insert time series data + for i in 0..size { + for hour in 0..24 { + let sql = format!( + "INSERT INTO time_series (id, metric, value, timestamp) + VALUES ({}, 'metric_{}', {}, TIMESTAMP '2024-01-01 {:02}:00:00')", + i * 24 + hour, i % 10, 100.0 + (i as f64) + (hour as f64 * 0.1), hour + ); + glue.execute(&sql).await.unwrap(); + } + } + + (glue, temp_dir) + }) + }, + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Time-based aggregation query + let result = glue.execute( + "SELECT + metric, + DATE_TRUNC('hour', timestamp) as hour, + AVG(value) as avg_value, + MIN(value) as min_value, + MAX(value) as max_value + FROM time_series + WHERE timestamp >= TIMESTAMP '2024-01-01 06:00:00' + AND timestamp <= TIMESTAMP '2024-01-01 18:00:00' + GROUP BY metric, DATE_TRUNC('hour', timestamp) + ORDER BY metric, hour" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +fn bench_concurrent_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_operations"); + group.sample_size(10); + + for size in &[50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter(|| { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create tables sequentially (GlueSQL doesn't support concurrent operations well) + for table_num in 0..4 { + // Create table + let create_sql = format!( + "CREATE TABLE table_{} ( + id INTEGER PRIMARY KEY, + data TEXT + )", + table_num + ); + glue.execute(&create_sql).await.unwrap(); + + // Insert data + for i in 0..size { + let insert_sql = format!( + "INSERT INTO table_{} (id, data) VALUES ({}, 'data_{}')", + table_num, i, i + ); + glue.execute(&insert_sql).await.unwrap(); + } + } + }) + }); + }); + } + + group.finish(); +} + +#[cfg(all(feature = "git", feature = "sql"))] +criterion_group!( + git_prolly_benches, + bench_git_versioned_commits, + bench_git_sql_integration, + bench_git_operations, + bench_git_branch_operations, + bench_sql_time_travel_queries, + bench_concurrent_operations +); + +#[cfg(all(feature = "git", feature = "sql"))] +criterion_main!(git_prolly_benches); + +#[cfg(not(all(feature = "git", feature = "sql")))] +fn main() { + println!("Git-Prolly benchmarks require both 'git' and 'sql' features to be enabled."); + println!("Run with: cargo bench --features git,sql"); +} \ No newline at end of file diff --git a/benches/sql_bench.rs b/benches/sql_bench.rs new file mode 100644 index 0000000..1d43e2b --- /dev/null +++ b/benches/sql_bench.rs @@ -0,0 +1,403 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(feature = "sql")] +use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; +#[cfg(feature = "sql")] +use gluesql_core::prelude::Glue; +#[cfg(feature = "sql")] +use prollytree::sql::ProllyStorage; +#[cfg(feature = "sql")] +use tempfile::TempDir; + +#[cfg(feature = "sql")] +async fn setup_database(record_count: usize) -> (Glue>, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table + let create_sql = r#" + CREATE TABLE users ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + email TEXT NOT NULL, + age INTEGER, + city TEXT, + created_at TIMESTAMP + ) + "#; + glue.execute(create_sql).await.unwrap(); + + // Insert test data + for i in 0..record_count { + let insert_sql = format!( + "INSERT INTO users (id, name, email, age, city, created_at) + VALUES ({}, 'User{}', 'user{}@example.com', {}, 'City{}', TIMESTAMP '2024-01-{:02} 12:00:00')", + i, i, i, 20 + (i % 50), i % 10, (i % 28) + 1 + ); + glue.execute(&insert_sql).await.unwrap(); + } + + (glue, temp_dir) +} + +#[cfg(feature = "sql")] +fn bench_sql_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_insert"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter(|| { + runtime.block_on(async { + let temp_dir = TempDir::new().unwrap(); + let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); + let mut glue = Glue::new(storage); + + // Create table + glue.execute( + "CREATE TABLE bench_table (id INTEGER PRIMARY KEY, data TEXT)" + ).await.unwrap(); + + // Insert records + for i in 0..size { + let sql = format!( + "INSERT INTO bench_table (id, data) VALUES ({}, 'data_{}')", + i, i + ); + glue.execute(&sql).await.unwrap(); + } + }) + }); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_select(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_select"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Simple SELECT + let result = glue.execute("SELECT * FROM users WHERE age > 30") + .await + .unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_join(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_join"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || { + runtime.block_on(async { + let (mut glue, temp_dir) = setup_database(size).await; + + // Create orders table + glue.execute( + "CREATE TABLE orders ( + id INTEGER PRIMARY KEY, + user_id INTEGER, + amount DECIMAL, + status TEXT + )" + ).await.unwrap(); + + // Insert orders + for i in 0..size*2 { + let sql = format!( + "INSERT INTO orders (id, user_id, amount, status) + VALUES ({}, {}, {}, '{}')", + i, i % size, 100.0 + (i as f64), + if i % 2 == 0 { "completed" } else { "pending" } + ); + glue.execute(&sql).await.unwrap(); + } + + (glue, temp_dir) + }) + }, + |(mut glue, _temp_dir)| { + runtime.block_on(async { + let result = glue.execute( + "SELECT u.name, COUNT(o.id) as order_count, SUM(o.amount) as total + FROM users u + JOIN orders o ON u.id = o.user_id + WHERE o.status = 'completed' + GROUP BY u.name" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_aggregation(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_aggregation"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + let result = glue.execute( + "SELECT city, + COUNT(*) as user_count, + AVG(age) as avg_age, + MIN(age) as min_age, + MAX(age) as max_age + FROM users + GROUP BY city + HAVING COUNT(*) > 5 + ORDER BY user_count DESC" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_update(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_update"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Update multiple records + let result = glue.execute( + "UPDATE users + SET age = age + 1, + city = 'UpdatedCity' + WHERE age < 30" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_delete(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_delete"); + group.sample_size(10); + + for size in &[100, 500, 1000] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Delete records + let result = glue.execute( + "DELETE FROM users WHERE age > 50" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_index_operations(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_index"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Create index + glue.execute("CREATE INDEX idx_users_age ON users(age)") + .await.unwrap(); + + // Query using index + let result = glue.execute( + "SELECT * FROM users WHERE age = 25" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_transaction(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_transaction"); + group.sample_size(10); + + for size in &[10, 50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(100)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Begin transaction + glue.execute("BEGIN").await.unwrap(); + + // Multiple operations in transaction + for i in 0..size { + let sql = format!( + "UPDATE users SET age = age + 1 WHERE id = {}", + i + ); + glue.execute(&sql).await.unwrap(); + } + + // Commit transaction + glue.execute("COMMIT").await.unwrap(); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +fn bench_sql_complex_query(c: &mut Criterion) { + let mut group = c.benchmark_group("sql_complex"); + group.sample_size(10); + + for size in &[100, 500] { + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + b.iter_batched( + || runtime.block_on(setup_database(size)), + |(mut glue, _temp_dir)| { + runtime.block_on(async { + // Complex query with subqueries + let result = glue.execute( + "SELECT + u.city, + COUNT(DISTINCT u.id) as user_count, + (SELECT COUNT(*) + FROM users u2 + WHERE u2.city = u.city AND u2.age > 40) as senior_count, + AVG(u.age) as avg_age + FROM users u + WHERE u.id IN ( + SELECT id FROM users + WHERE age BETWEEN 25 AND 45 + ) + GROUP BY u.city + ORDER BY user_count DESC, avg_age ASC" + ).await.unwrap(); + black_box(result); + }) + }, + criterion::BatchSize::SmallInput, + ); + }); + } + + group.finish(); +} + +#[cfg(feature = "sql")] +criterion_group!( + sql_benches, + bench_sql_insert, + bench_sql_select, + bench_sql_join, + bench_sql_aggregation, + bench_sql_update, + bench_sql_delete, + bench_sql_index_operations, + bench_sql_transaction, + bench_sql_complex_query +); + +#[cfg(feature = "sql")] +criterion_main!(sql_benches); + +#[cfg(not(feature = "sql"))] +fn main() { + println!("SQL benchmarks require the 'sql' feature to be enabled."); + println!("Run with: cargo bench --features sql"); +} \ No newline at end of file diff --git a/run_benchmarks.sh b/run_benchmarks.sh new file mode 100755 index 0000000..e19ecf3 --- /dev/null +++ b/run_benchmarks.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Script to run ProllyTree benchmarks + +echo "🚀 Running ProllyTree Benchmarks" +echo "================================" + +echo "" +echo "📊 1. Running Core Tree Benchmarks..." +cargo bench --bench prollytree_bench --quiet -- --quick + +echo "" +echo "📊 2. Running SQL Benchmarks..." +cargo bench --bench sql_bench --features sql --quiet -- --quick + +echo "" +echo "📊 3. Running Git-Prolly Integration Benchmarks..." +cargo bench --bench git_prolly_bench --features "git sql" --quiet -- --quick + +echo "" +echo "✅ All benchmarks completed!" +echo "" +echo "📈 To view detailed results, run:" +echo " cargo bench --bench " +echo "" +echo "📊 Available benchmarks:" +echo " - prollytree_bench: Core tree operations" +echo " - sql_bench: SQL operations (requires --features sql)" +echo " - git_prolly_bench: Git integration (requires --features git,sql)" \ No newline at end of file From b98b1c1798c0467943859a3241464437d0077ac7 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 17:14:51 -0700 Subject: [PATCH 7/8] fix fmt --- benches/git_prolly_bench.rs | 97 ++++++++++++++++------------- benches/sql_bench.rs | 119 ++++++++++++++++++++---------------- 2 files changed, 122 insertions(+), 94 deletions(-) diff --git a/benches/git_prolly_bench.rs b/benches/git_prolly_bench.rs index 3161ede..e5e708a 100644 --- a/benches/git_prolly_bench.rs +++ b/benches/git_prolly_bench.rs @@ -13,37 +13,40 @@ limitations under the License. */ #[cfg(all(feature = "git", feature = "sql"))] -use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; #[cfg(all(feature = "git", feature = "sql"))] -use prollytree::git::GitNodeStorage; -#[cfg(all(feature = "git", feature = "sql"))] -use prollytree::git::VersionedKvStore; +use gluesql_core::prelude::Glue; #[cfg(all(feature = "git", feature = "sql"))] use prollytree::config::TreeConfig; #[cfg(all(feature = "git", feature = "sql"))] -use prollytree::tree::{ProllyTree, Tree}; +use prollytree::git::GitNodeStorage; +#[cfg(all(feature = "git", feature = "sql"))] +use prollytree::git::VersionedKvStore; #[cfg(all(feature = "git", feature = "sql"))] use prollytree::sql::ProllyStorage; #[cfg(all(feature = "git", feature = "sql"))] -use gluesql_core::prelude::Glue; +use prollytree::tree::{ProllyTree, Tree}; #[cfg(all(feature = "git", feature = "sql"))] use tempfile::TempDir; #[cfg(all(feature = "git", feature = "sql"))] -fn generate_versioned_data(versions: usize, records_per_version: usize) -> Vec> { +fn generate_versioned_data( + versions: usize, + records_per_version: usize, +) -> Vec> { let mut data = Vec::new(); - + for v in 0..versions { let mut version_data = Vec::new(); for i in 0..records_per_version { version_data.push(( format!("key_{:04}_v{}", i, v), - format!("value_{:04}_version_{}", i, v) + format!("value_{:04}_version_{}", i, v), )); } data.push(version_data); } - + data } @@ -64,7 +67,9 @@ fn bench_git_versioned_commits(c: &mut Criterion) { // Create multiple versions with commits for (version, records) in data.iter().enumerate() { for (key, value) in records { - store.insert(key.as_bytes().to_vec(), value.as_bytes().to_vec()).unwrap(); + store + .insert(key.as_bytes().to_vec(), value.as_bytes().to_vec()) + .unwrap(); } store.commit(&format!("Version {}", version)).unwrap(); } @@ -85,14 +90,14 @@ fn bench_git_sql_integration(c: &mut Criterion) { for size in &[100, 500] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || { runtime.block_on(async { let temp_dir = TempDir::new().unwrap(); let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); let mut glue = Glue::new(storage); - + // Create table with versioning in mind glue.execute( "CREATE TABLE versioned_data ( @@ -103,7 +108,7 @@ fn bench_git_sql_integration(c: &mut Criterion) { timestamp TIMESTAMP )" ).await.unwrap(); - + (glue, temp_dir) }) }, @@ -113,18 +118,18 @@ fn bench_git_sql_integration(c: &mut Criterion) { for v in 0..3 { for i in 0..size { let sql = format!( - "INSERT INTO versioned_data (id, key, value, version, timestamp) + "INSERT INTO versioned_data (id, key, value, version, timestamp) VALUES ({}, 'key_{}', 'value_{}_v{}', {}, TIMESTAMP '2024-01-{:02} 12:00:00')", v * size + i, i, i, v, v, (i % 28) + 1 ); glue.execute(&sql).await.unwrap(); } } - + // Query latest version let result = glue.execute( - "SELECT key, value, MAX(version) as latest_version - FROM versioned_data + "SELECT key, value, MAX(version) as latest_version + FROM versioned_data GROUP BY key" ).await.unwrap(); black_box(result); @@ -163,7 +168,7 @@ fn bench_git_operations(c: &mut Criterion) { let value = format!("git_value_{:06}", i).into_bytes(); tree.insert(key, value); } - + // For benchmarking, we'll just measure the tree operations // Git commit operations would require more complex setup }, @@ -186,16 +191,18 @@ fn bench_git_branch_operations(c: &mut Criterion) { || { let temp_dir = TempDir::new().unwrap(); let mut store = VersionedKvStore::<32>::init(temp_dir.path()).unwrap(); - + // Initialize with some data for i in 0..size { - store.insert( - format!("key_{:04}", i).into_bytes(), - format!("value_{:04}", i).into_bytes() - ).unwrap(); + store + .insert( + format!("key_{:04}", i).into_bytes(), + format!("value_{:04}", i).into_bytes(), + ) + .unwrap(); } store.commit("Initial commit").unwrap(); - + (store, temp_dir) }, |(mut store, _temp_dir)| { @@ -204,18 +211,22 @@ fn bench_git_branch_operations(c: &mut Criterion) { let branch_name = format!("feature-{}", branch_num); store.create_branch(&branch_name).unwrap(); store.checkout(&branch_name).unwrap(); - + // Make changes on branch for i in 0..10 { - store.insert( - format!("branch_{}_key_{}", branch_num, i).into_bytes(), - format!("branch_{}_value_{}", branch_num, i).into_bytes() - ).unwrap(); + store + .insert( + format!("branch_{}_key_{}", branch_num, i).into_bytes(), + format!("branch_{}_value_{}", branch_num, i).into_bytes(), + ) + .unwrap(); } - - store.commit(&format!("Branch {} changes", branch_num)).unwrap(); + + store + .commit(&format!("Branch {} changes", branch_num)) + .unwrap(); } - + // Switch back to main store.checkout("main").unwrap(); }, @@ -235,14 +246,14 @@ fn bench_sql_time_travel_queries(c: &mut Criterion) { for size in &[100, 500] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || { runtime.block_on(async { let temp_dir = TempDir::new().unwrap(); let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); let mut glue = Glue::new(storage); - + // Create table glue.execute( "CREATE TABLE time_series ( @@ -252,19 +263,19 @@ fn bench_sql_time_travel_queries(c: &mut Criterion) { timestamp TIMESTAMP )" ).await.unwrap(); - + // Insert time series data for i in 0..size { for hour in 0..24 { let sql = format!( - "INSERT INTO time_series (id, metric, value, timestamp) + "INSERT INTO time_series (id, metric, value, timestamp) VALUES ({}, 'metric_{}', {}, TIMESTAMP '2024-01-01 {:02}:00:00')", i * 24 + hour, i % 10, 100.0 + (i as f64) + (hour as f64 * 0.1), hour ); glue.execute(&sql).await.unwrap(); } } - + (glue, temp_dir) }) }, @@ -272,7 +283,7 @@ fn bench_sql_time_travel_queries(c: &mut Criterion) { runtime.block_on(async { // Time-based aggregation query let result = glue.execute( - "SELECT + "SELECT metric, DATE_TRUNC('hour', timestamp) as hour, AVG(value) as avg_value, @@ -303,13 +314,13 @@ fn bench_concurrent_operations(c: &mut Criterion) { for size in &[50, 100] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter(|| { runtime.block_on(async { let temp_dir = TempDir::new().unwrap(); let storage = ProllyStorage::<32>::init(temp_dir.path()).unwrap(); let mut glue = Glue::new(storage); - + // Create tables sequentially (GlueSQL doesn't support concurrent operations well) for table_num in 0..4 { // Create table @@ -321,7 +332,7 @@ fn bench_concurrent_operations(c: &mut Criterion) { table_num ); glue.execute(&create_sql).await.unwrap(); - + // Insert data for i in 0..size { let insert_sql = format!( @@ -357,4 +368,4 @@ criterion_main!(git_prolly_benches); fn main() { println!("Git-Prolly benchmarks require both 'git' and 'sql' features to be enabled."); println!("Run with: cargo bench --features git,sql"); -} \ No newline at end of file +} diff --git a/benches/sql_bench.rs b/benches/sql_bench.rs index 1d43e2b..62b3b8d 100644 --- a/benches/sql_bench.rs +++ b/benches/sql_bench.rs @@ -13,7 +13,7 @@ limitations under the License. */ #[cfg(feature = "sql")] -use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; #[cfg(feature = "sql")] use gluesql_core::prelude::Glue; #[cfg(feature = "sql")] @@ -61,7 +61,7 @@ fn bench_sql_insert(c: &mut Criterion) { for size in &[100, 500, 1000] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter(|| { runtime.block_on(async { let temp_dir = TempDir::new().unwrap(); @@ -69,9 +69,9 @@ fn bench_sql_insert(c: &mut Criterion) { let mut glue = Glue::new(storage); // Create table - glue.execute( - "CREATE TABLE bench_table (id INTEGER PRIMARY KEY, data TEXT)" - ).await.unwrap(); + glue.execute("CREATE TABLE bench_table (id INTEGER PRIMARY KEY, data TEXT)") + .await + .unwrap(); // Insert records for i in 0..size { @@ -97,13 +97,14 @@ fn bench_sql_select(c: &mut Criterion) { for size in &[100, 500, 1000] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || runtime.block_on(setup_database(size)), |(mut glue, _temp_dir)| { runtime.block_on(async { // Simple SELECT - let result = glue.execute("SELECT * FROM users WHERE age > 30") + let result = glue + .execute("SELECT * FROM users WHERE age > 30") .await .unwrap(); black_box(result); @@ -125,12 +126,12 @@ fn bench_sql_join(c: &mut Criterion) { for size in &[100, 500] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || { runtime.block_on(async { let (mut glue, temp_dir) = setup_database(size).await; - + // Create orders table glue.execute( "CREATE TABLE orders ( @@ -138,32 +139,39 @@ fn bench_sql_join(c: &mut Criterion) { user_id INTEGER, amount DECIMAL, status TEXT - )" - ).await.unwrap(); + )", + ) + .await + .unwrap(); // Insert orders - for i in 0..size*2 { + for i in 0..size * 2 { let sql = format!( "INSERT INTO orders (id, user_id, amount, status) VALUES ({}, {}, {}, '{}')", - i, i % size, 100.0 + (i as f64), + i, + i % size, + 100.0 + (i as f64), if i % 2 == 0 { "completed" } else { "pending" } ); glue.execute(&sql).await.unwrap(); } - + (glue, temp_dir) }) }, |(mut glue, _temp_dir)| { runtime.block_on(async { - let result = glue.execute( - "SELECT u.name, COUNT(o.id) as order_count, SUM(o.amount) as total + let result = glue + .execute( + "SELECT u.name, COUNT(o.id) as order_count, SUM(o.amount) as total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.status = 'completed' - GROUP BY u.name" - ).await.unwrap(); + GROUP BY u.name", + ) + .await + .unwrap(); black_box(result); }) }, @@ -183,13 +191,14 @@ fn bench_sql_aggregation(c: &mut Criterion) { for size in &[100, 500, 1000] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || runtime.block_on(setup_database(size)), |(mut glue, _temp_dir)| { runtime.block_on(async { - let result = glue.execute( - "SELECT city, + let result = glue + .execute( + "SELECT city, COUNT(*) as user_count, AVG(age) as avg_age, MIN(age) as min_age, @@ -197,8 +206,10 @@ fn bench_sql_aggregation(c: &mut Criterion) { FROM users GROUP BY city HAVING COUNT(*) > 5 - ORDER BY user_count DESC" - ).await.unwrap(); + ORDER BY user_count DESC", + ) + .await + .unwrap(); black_box(result); }) }, @@ -218,18 +229,21 @@ fn bench_sql_update(c: &mut Criterion) { for size in &[100, 500, 1000] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || runtime.block_on(setup_database(size)), |(mut glue, _temp_dir)| { runtime.block_on(async { // Update multiple records - let result = glue.execute( - "UPDATE users + let result = glue + .execute( + "UPDATE users SET age = age + 1, city = 'UpdatedCity' - WHERE age < 30" - ).await.unwrap(); + WHERE age < 30", + ) + .await + .unwrap(); black_box(result); }) }, @@ -249,15 +263,16 @@ fn bench_sql_delete(c: &mut Criterion) { for size in &[100, 500, 1000] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || runtime.block_on(setup_database(size)), |(mut glue, _temp_dir)| { runtime.block_on(async { // Delete records - let result = glue.execute( - "DELETE FROM users WHERE age > 50" - ).await.unwrap(); + let result = glue + .execute("DELETE FROM users WHERE age > 50") + .await + .unwrap(); black_box(result); }) }, @@ -277,19 +292,21 @@ fn bench_sql_index_operations(c: &mut Criterion) { for size in &[100, 500] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || runtime.block_on(setup_database(size)), |(mut glue, _temp_dir)| { runtime.block_on(async { // Create index glue.execute("CREATE INDEX idx_users_age ON users(age)") - .await.unwrap(); - + .await + .unwrap(); + // Query using index - let result = glue.execute( - "SELECT * FROM users WHERE age = 25" - ).await.unwrap(); + let result = glue + .execute("SELECT * FROM users WHERE age = 25") + .await + .unwrap(); black_box(result); }) }, @@ -309,23 +326,20 @@ fn bench_sql_transaction(c: &mut Criterion) { for size in &[10, 50, 100] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || runtime.block_on(setup_database(100)), |(mut glue, _temp_dir)| { runtime.block_on(async { // Begin transaction glue.execute("BEGIN").await.unwrap(); - + // Multiple operations in transaction for i in 0..size { - let sql = format!( - "UPDATE users SET age = age + 1 WHERE id = {}", - i - ); + let sql = format!("UPDATE users SET age = age + 1 WHERE id = {}", i); glue.execute(&sql).await.unwrap(); } - + // Commit transaction glue.execute("COMMIT").await.unwrap(); }) @@ -346,14 +360,15 @@ fn bench_sql_complex_query(c: &mut Criterion) { for size in &[100, 500] { group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { let runtime = tokio::runtime::Runtime::new().unwrap(); - + b.iter_batched( || runtime.block_on(setup_database(size)), |(mut glue, _temp_dir)| { runtime.block_on(async { // Complex query with subqueries - let result = glue.execute( - "SELECT + let result = glue + .execute( + "SELECT u.city, COUNT(DISTINCT u.id) as user_count, (SELECT COUNT(*) @@ -366,8 +381,10 @@ fn bench_sql_complex_query(c: &mut Criterion) { WHERE age BETWEEN 25 AND 45 ) GROUP BY u.city - ORDER BY user_count DESC, avg_age ASC" - ).await.unwrap(); + ORDER BY user_count DESC, avg_age ASC", + ) + .await + .unwrap(); black_box(result); }) }, @@ -400,4 +417,4 @@ criterion_main!(sql_benches); fn main() { println!("SQL benchmarks require the 'sql' feature to be enabled."); println!("Run with: cargo bench --features sql"); -} \ No newline at end of file +} From c084538cb7d79cfdddcb0dad16ad9a5d3256784e Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Sat, 19 Jul 2025 17:21:26 -0700 Subject: [PATCH 8/8] limit run --- .github/workflows/ci.yml | 2 +- .github/workflows/python.yml | 89 ++---------------------------------- 2 files changed, 6 insertions(+), 85 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ac4af2..aa411dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: CI on: pull_request: - types: [opened, synchronize, reopened, ready_for_review] + types: [ready_for_review] push: branches: - main diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index d9e3316..0a0f6ab 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -2,7 +2,7 @@ name: Build Python Package on: pull_request: - types: [opened, synchronize, reopened, ready_for_review] + types: [ready_for_review] push: branches: - main @@ -28,10 +28,10 @@ jobs: # - os: windows-latest # target: x86 # manylinux: false - # macOS - - os: macos-latest - target: x86_64 - manylinux: false +# # macOS +# - os: macos-latest +# target: x86_64 +# manylinux: false # - os: macos-14 # target: aarch64 # manylinux: false @@ -61,82 +61,3 @@ jobs: with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: dist - - build-sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Build sdist - uses: PyO3/maturin-action@v1 - with: - command: sdist - args: --out dist - rust-toolchain: stable - - - name: Upload sdist - uses: actions/upload-artifact@v4 - with: - name: wheels-sdist - path: dist - - test-wheel: - name: Test built wheel - runs-on: ubuntu-latest - needs: [build-wheels] - - steps: - - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Download Linux wheel - uses: actions/download-artifact@v4 - with: - name: wheels-ubuntu-latest-x86_64 - path: dist - - - name: Install and test wheel - run: | - pip install dist/*.whl - python -c "from prollytree import ProllyTree; tree = ProllyTree(); tree.insert(b'test', b'value'); assert tree.find(b'test') == b'value'" - echo "✅ Wheel test passed!" - - collect-artifacts: - name: Collect all artifacts - runs-on: ubuntu-latest - needs: [build-wheels, build-sdist, test-wheel] - - steps: - - name: Download all artifacts - uses: actions/download-artifact@v4 - with: - pattern: wheels-* - path: dist - merge-multiple: true - - - name: List all built artifacts - run: | - echo "📦 Built artifacts:" - ls -la dist/ - echo "" - echo "Total artifacts: $(ls -1 dist/ | wc -l)" - echo "" - echo "Artifact details:" - du -h dist/* - - - name: Upload combined artifacts - uses: actions/upload-artifact@v4 - with: - name: all-python-artifacts - path: dist/ - retention-days: 90 \ No newline at end of file