diff --git a/.github/workflows/publish-bench-server.yml b/.github/workflows/publish-bench-server.yml new file mode 100644 index 00000000000..0bfcb6d3293 --- /dev/null +++ b/.github/workflows/publish-bench-server.yml @@ -0,0 +1,46 @@ +name: Publish Bench Server + +on: + push: + branches: [develop] + paths: + - "benchmarks-website/server/**" + - "vortex-bench/**" + - "Cargo.lock" + - ".github/workflows/publish-bench-server.yml" + workflow_dispatch: + +jobs: + publish: + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + packages: write + id-token: write + steps: + - uses: actions/checkout@v6 + + - name: Log in to GHCR + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Build and push + uses: docker/build-push-action@v7 + with: + context: . + file: ./benchmarks-website/server/Dockerfile + platforms: linux/arm64 + push: true + tags: | + ghcr.io/${{ github.repository }}/vortex-bench-server:latest + ghcr.io/${{ github.repository }}/vortex-bench-server:${{ github.sha }} diff --git a/Cargo.lock b/Cargo.lock index 697368980de..df22be4de9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,9 @@ name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "arc-swap" @@ -225,24 +228,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" -[[package]] -name = "arrow" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" -dependencies = [ - "arrow-arith 56.2.0", - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-cast 56.2.0", - "arrow-data 56.2.0", - "arrow-ord 56.2.0", - "arrow-row 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", - "arrow-string 56.2.0", -] - [[package]] name = "arrow" version = "57.3.0" @@ -285,20 +270,6 @@ dependencies = [ "arrow-string 58.1.0", ] -[[package]] -name = "arrow-arith" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "chrono", - "num", -] - [[package]] name = "arrow-arith" version = "57.3.0" @@ -327,22 +298,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-array" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" -dependencies = [ - "ahash 0.8.12", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "chrono", - "half", - "hashbrown 0.16.1", - "num", -] - [[package]] name = "arrow-array" version = "57.3.0" @@ -381,17 +336,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-buffer" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" -dependencies = [ - "bytes", - "half", - "num", -] - [[package]] name = "arrow-buffer" version = "57.3.0" @@ -416,27 +360,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-cast" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", - "atoi", - "base64", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num", - "ryu", -] - [[package]] name = "arrow-cast" version = "57.3.0" @@ -511,18 +434,6 @@ dependencies = [ "regex", ] -[[package]] -name = "arrow-data" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" -dependencies = [ - "arrow-buffer 56.2.0", - "arrow-schema 56.2.0", - "half", - "num", -] - [[package]] name = "arrow-data" version = "57.3.0" @@ -629,19 +540,6 @@ dependencies = [ "simdutf8", ] -[[package]] -name = "arrow-ord" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", -] - [[package]] name = "arrow-ord" version = "57.3.0" @@ -668,19 +566,6 @@ dependencies = [ "arrow-select 58.1.0", ] -[[package]] -name = "arrow-row" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "half", -] - [[package]] name = "arrow-row" version = "57.3.0" @@ -707,15 +592,6 @@ dependencies = [ "half", ] -[[package]] -name = "arrow-schema" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" -dependencies = [ - "bitflags", -] - [[package]] name = "arrow-schema" version = "57.3.0" @@ -738,20 +614,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "arrow-select" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" -dependencies = [ - "ahash 0.8.12", - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "num", -] - [[package]] name = "arrow-select" version = "57.3.0" @@ -780,23 +642,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-string" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", - "memchr", - "num", - "regex", - "regex-syntax", -] - [[package]] name = "arrow-string" version = "57.3.0" @@ -1813,6 +1658,8 @@ version = "7.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ + "crossterm 0.27.0", + "crossterm 0.28.1", "strum 0.26.3", "strum_macros 0.26.4", "unicode-width 0.2.2", @@ -2108,6 +1955,30 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags", + "crossterm_winapi", + "libc", + "parking_lot", + "winapi", +] + +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags", + "parking_lot", + "rustix 0.38.44", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -3739,6 +3610,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -3801,7 +3683,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3849,12 +3731,13 @@ checksum = "ab23e69df104e2fd85ee63a533a22d2132ef5975dc6b36f9f3e5a7305e4a8ed7" [[package]] name = "duckdb" -version = "1.4.1" +version = "1.10502.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a093eed1c714143b257b95fa323e38527fabf05fbf02bb0d5d2045275ffdaef" +checksum = "0fdc796383b176dd5a45353fbb5e64583c0ee4da12cb62c9e510b785324b2488" dependencies = [ - "arrow 56.2.0", + "arrow 58.1.0", "cast", + "comfy-table", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -4000,7 +3883,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4719,6 +4602,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -5022,7 +4906,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5094,7 +4978,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5872,17 +5756,19 @@ checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" [[package]] name = "libduckdb-sys" -version = "1.4.1" +version = "1.10502.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b93c3ff279601516f01531cadf2ccba50394fbb5f7bf685c6e6b9b07c8dca6f" +checksum = "8d7401630ae2abcff642f7156294289e50f2d222e061c026ad797b01bf20c215" dependencies = [ "cc", "flate2", "pkg-config", + "reqwest 0.12.28", "serde", "serde_json", "tar", "vcpkg", + "zip 6.0.0", ] [[package]] @@ -6485,21 +6371,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", -] - -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", + "windows-sys 0.61.2", ] [[package]] @@ -6537,28 +6409,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -7185,9 +7035,9 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "044b1fa4f259f4df9ad5078e587b208f5d288a25407575fcddb9face30c7c692" dependencies = [ - "rand 0.8.6", + "rand 0.9.4", "socket2", - "thiserror 1.0.69", + "thiserror 2.0.18", ] [[package]] @@ -7910,7 +7760,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" dependencies = [ "cfg-if", - "crossterm", + "crossterm 0.29.0", "instability", "ratatui-core", ] @@ -8149,6 +7999,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams 0.4.2", "web-sys", + "webpki-roots", ] [[package]] @@ -8369,7 +8220,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -8427,7 +8278,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -9517,7 +9368,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -9536,7 +9387,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ "rustix 1.1.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -10793,7 +10644,7 @@ dependencies = [ "vortex-runend", "vortex-sequence", "vortex-utils", - "zip", + "zip 8.5.1", ] [[package]] @@ -11297,7 +11148,7 @@ dependencies = [ "arrow-schema 58.1.0", "clap", "console_error_panic_hook", - "crossterm", + "crossterm 0.29.0", "datafusion 53.1.0", "env_logger", "flatbuffers", @@ -11566,6 +11417,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "8.0.2" @@ -11597,7 +11457,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -12169,6 +12029,20 @@ dependencies = [ "num-traits", ] +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap", + "memchr", + "zopfli", +] + [[package]] name = "zip" version = "8.5.1" diff --git a/benchmarks-website/docker-compose.yml b/benchmarks-website/docker-compose.yml index 4c2e9682329..b97482a230a 100644 --- a/benchmarks-website/docker-compose.yml +++ b/benchmarks-website/docker-compose.yml @@ -5,6 +5,20 @@ services: - "80:3000" restart: unless-stopped + vortex-bench-server: + image: ghcr.io/vortex-data/vortex/vortex-bench-server:latest + ports: + - "3001:3000" + environment: + VORTEX_BENCH_DB: "/app/data/bench.duckdb" + VORTEX_BENCH_BIND: "0.0.0.0:3000" + VORTEX_BENCH_LOG: "info,vortex_bench_server=debug" + env_file: + - /etc/vortex-bench/secrets.env + volumes: + - /opt/benchmarks-website/data:/app/data + restart: unless-stopped + watchtower: image: containrrr/watchtower volumes: diff --git a/benchmarks-website/ec2-init.txt b/benchmarks-website/ec2-init.txt index 1c2459b3bee..4e1377cc014 100644 --- a/benchmarks-website/ec2-init.txt +++ b/benchmarks-website/ec2-init.txt @@ -14,4 +14,57 @@ sudo mkdir -p /opt/benchmarks-website sudo cp docker-compose.yml /opt/benchmarks-website/ cd /opt/benchmarks-website - docker compose up -d \ No newline at end of file + docker compose up -d + + ==================================================================== + v3 (vortex-bench-server) — additive setup, runs alongside v2 + ==================================================================== + + v2 stays on port 80 until DNS is flipped. v3 runs on port 3001 from + the same docker-compose.yml on this host. + + 4. Create the bearer-token env file (root:root, mode 600) + sudo mkdir -p /etc/vortex-bench + sudo install -m 600 -o root -g root /dev/null /etc/vortex-bench/secrets.env + # Edit and set INGEST_BEARER_TOKEN=: + sudo vi /etc/vortex-bench/secrets.env + # File contents: + # INGEST_BEARER_TOKEN= + + 5. Create the EBS-backed DuckDB data directory + # Assumes an EBS volume is already mounted at /opt/benchmarks-website/data. + sudo mkdir -p /opt/benchmarks-website/data + sudo chown root:root /opt/benchmarks-website/data + sudo chmod 755 /opt/benchmarks-website/data + + 6. Pull and start v3 (watchtower already polls ghcr.io for refreshes) + cd /opt/benchmarks-website + docker compose pull vortex-bench-server + docker compose up -d vortex-bench-server + # Smoke-check on the host: + curl -sf http://127.0.0.1:3001/health || echo "v3 not responding" + + 7. Install the daily DuckDB backup cron + # Copy the backup script from the repo checkout to a stable location. + sudo install -m 755 -o root -g root \ + benchmarks-website/server/scripts/backup.sh \ + /usr/local/bin/vortex-bench-backup.sh + # Cron entry: 06:00 UTC daily, after the nightly bench finishes. + sudo tee /etc/cron.d/vortex-bench-backup >/dev/null <<'CRON' + 0 6 * * * root /usr/local/bin/vortex-bench-backup.sh >> /var/log/vortex-bench-backup.log 2>&1 + CRON + sudo chmod 644 /etc/cron.d/vortex-bench-backup + # The instance IAM role already permits writes to + # s3://vortex-ci-benchmark-results/ (same role v2's cat-s3.sh uses). + + 8. Bearer-token rotation procedure + # When rotating INGEST_BEARER_TOKEN: + # a. Generate a new token (e.g. `openssl rand -hex 32`). + # b. Update the GitHub Actions Environment secret INGEST_BEARER_TOKEN + # so CI dual-writes use the new value. + # c. On this EC2 host, edit the env file and restart only the v3 + # container so v2 traffic on port 80 is unaffected: + # sudo vi /etc/vortex-bench/secrets.env + # cd /opt/benchmarks-website + # docker compose up -d --force-recreate vortex-bench-server + # d. Verify with `curl` against /health and a token-gated endpoint. \ No newline at end of file diff --git a/benchmarks-website/server/Cargo.toml b/benchmarks-website/server/Cargo.toml index 5b501adf0cc..07d1746a5e5 100644 --- a/benchmarks-website/server/Cargo.toml +++ b/benchmarks-website/server/Cargo.toml @@ -26,7 +26,8 @@ path = "src/main.rs" anyhow = { workspace = true } axum = "0.8" base64 = "0.22" -duckdb = { version = "1.4", features = ["bundled"] } +# track vortex-duckdb's bundled engine version (build.rs) +duckdb = { version = "1.10502", features = ["bundled"] } maud = { version = "0.27", features = ["axum"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/benchmarks-website/server/Dockerfile b/benchmarks-website/server/Dockerfile new file mode 100644 index 00000000000..81c2c4860b9 --- /dev/null +++ b/benchmarks-website/server/Dockerfile @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors +# +# Build context: repository root (the server is a workspace member). +# Build: docker build -f benchmarks-website/server/Dockerfile . +# Toolchain pinned to match rust-toolchain.toml. + +FROM rust:1.91-bookworm AS build + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build +COPY . . + +RUN cargo build --release -p vortex-bench-server --bin vortex-bench-server + +FROM debian:bookworm-slim + +# Keep this in lockstep with libduckdb-sys in Cargo.lock. +ARG DUCKDB_VERSION=1.5.2 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + libstdc++6 \ + unzip \ + wget \ + && wget -q "https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-aarch64.zip" -O /tmp/duckdb.zip \ + && unzip -q /tmp/duckdb.zip -d /usr/local/bin/ \ + && chmod +x /usr/local/bin/duckdb \ + && rm /tmp/duckdb.zip \ + && apt-get purge -y --auto-remove unzip wget \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /build/target/release/vortex-bench-server /usr/local/bin/vortex-bench-server + +WORKDIR /app/data + +EXPOSE 3000 + +CMD ["/usr/local/bin/vortex-bench-server"] diff --git a/benchmarks-website/server/scripts/backup.sh b/benchmarks-website/server/scripts/backup.sh new file mode 100755 index 00000000000..ca4a35f891f --- /dev/null +++ b/benchmarks-website/server/scripts/backup.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors +# +# Daily DuckDB backup for the vortex-bench-server v3 instance. +# Runs on the EC2 host via cron (see benchmarks-website/ec2-init.txt). +# +# Exports the running container's DuckDB to a local directory and uploads +# it to s3://vortex-ci-benchmark-results/v3-backups//. The instance +# IAM role already grants write access to that bucket (it is the same +# bucket cat-s3.sh uses for v2). +# +# At alpha this is a convenience backup: the data is also reproducible +# from CI dual-writes to the v3 ingest endpoint, so RPO is bounded by +# what CI has posted, not by this script's cadence. + +set -euo pipefail + +CONTAINER="${CONTAINER:-vortex-bench-server}" +DB_PATH="${DB_PATH:-/app/data/bench.duckdb}" +DATA_DIR="${DATA_DIR:-/opt/benchmarks-website/data}" +S3_PREFIX="${S3_PREFIX:-s3://vortex-ci-benchmark-results/v3-backups}" + +date_stamp="$(date -u +%Y%m%d)" +export_dir="backup-${date_stamp}" +host_export_dir="${DATA_DIR}/${export_dir}" + +# Run EXPORT DATABASE inside the container so we hit the same DuckDB +# build that wrote the file. The container path mirrors the host path +# under /app/data, so the export lands on the EBS volume. +docker exec "${CONTAINER}" \ + duckdb "${DB_PATH}" \ + -c "EXPORT DATABASE '/app/data/${export_dir}'" + +aws s3 cp \ + --recursive \ + "${host_export_dir}" \ + "${S3_PREFIX}/${date_stamp}/" + +# Keep the latest local export, drop older ones to bound disk use. +find "${DATA_DIR}" \ + -maxdepth 1 \ + -type d \ + -name "backup-*" \ + ! -path "${host_export_dir}" \ + -exec rm -rf {} +