From 2c57ebbdeac059e5c863c5f951dd34f4b233f403 Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 14:55:21 +0800 Subject: [PATCH 01/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- Cargo.lock | 2376 ++++++++++++++++++++- Cargo.toml | 2 +- README.md | 2 +- crates/{ov_cli => }/LICENSE | 0 crates/ragfs-python/Cargo.toml | 16 + crates/ragfs-python/pyproject.toml | 11 + crates/ragfs-python/src/lib.rs | 453 ++++ crates/ragfs/Cargo.toml | 95 + crates/ragfs/MIGRATION_PLAN.md | 965 +++++++++ crates/ragfs/src/core/errors.rs | 142 ++ crates/ragfs/src/core/filesystem.rs | 220 ++ crates/ragfs/src/core/mod.rs | 21 + crates/ragfs/src/core/mountable.rs | 625 ++++++ crates/ragfs/src/core/plugin.rs | 276 +++ crates/ragfs/src/core/types.rs | 246 +++ crates/ragfs/src/lib.rs | 60 + crates/ragfs/src/plugins/kvfs/mod.rs | 521 +++++ crates/ragfs/src/plugins/memfs/mod.rs | 620 ++++++ crates/ragfs/src/plugins/mod.rs | 17 + crates/ragfs/src/plugins/queuefs/mod.rs | 519 +++++ crates/ragfs/src/plugins/s3fs/cache.rs | 300 +++ crates/ragfs/src/plugins/s3fs/client.rs | 523 +++++ crates/ragfs/src/plugins/s3fs/mod.rs | 776 +++++++ crates/ragfs/src/plugins/sqlfs/backend.rs | 492 +++++ crates/ragfs/src/plugins/sqlfs/cache.rs | 350 +++ crates/ragfs/src/plugins/sqlfs/mod.rs | 865 ++++++++ crates/ragfs/src/server/config.rs | 125 ++ crates/ragfs/src/server/handlers.rs | 359 ++++ crates/ragfs/src/server/main.rs | 88 + crates/ragfs/src/server/mod.rs | 9 + crates/ragfs/src/server/router.rs | 73 + crates/ragfs/src/shell/main.rs | 8 + openviking/pyagfs/__init__.py | 20 +- 33 files changed, 11099 insertions(+), 76 deletions(-) rename crates/{ov_cli => }/LICENSE (100%) create mode 100644 crates/ragfs-python/Cargo.toml create mode 100644 crates/ragfs-python/pyproject.toml create mode 100644 crates/ragfs-python/src/lib.rs create mode 100644 crates/ragfs/Cargo.toml create mode 100644 crates/ragfs/MIGRATION_PLAN.md create mode 100644 crates/ragfs/src/core/errors.rs create mode 100644 crates/ragfs/src/core/filesystem.rs create mode 100644 crates/ragfs/src/core/mod.rs create mode 100644 crates/ragfs/src/core/mountable.rs create mode 100644 crates/ragfs/src/core/plugin.rs create mode 100644 crates/ragfs/src/core/types.rs create mode 100644 crates/ragfs/src/lib.rs create mode 100644 crates/ragfs/src/plugins/kvfs/mod.rs create mode 100644 crates/ragfs/src/plugins/memfs/mod.rs create mode 100644 crates/ragfs/src/plugins/mod.rs create mode 100644 crates/ragfs/src/plugins/queuefs/mod.rs create mode 100644 crates/ragfs/src/plugins/s3fs/cache.rs create mode 100644 crates/ragfs/src/plugins/s3fs/client.rs create mode 100644 crates/ragfs/src/plugins/s3fs/mod.rs create mode 100644 crates/ragfs/src/plugins/sqlfs/backend.rs create mode 100644 crates/ragfs/src/plugins/sqlfs/cache.rs create mode 100644 crates/ragfs/src/plugins/sqlfs/mod.rs create mode 100644 crates/ragfs/src/server/config.rs create mode 100644 crates/ragfs/src/server/handlers.rs create mode 100644 crates/ragfs/src/server/main.rs create mode 100644 crates/ragfs/src/server/mod.rs create mode 100644 crates/ragfs/src/server/router.rs create mode 100644 crates/ragfs/src/shell/main.rs diff --git a/Cargo.lock b/Cargo.lock index ae50a74b9..d4554e4d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,18 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -34,6 +46,21 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.21" @@ -99,23 +126,599 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-waker" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-config" +version = "1.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.4.0", + "sha1", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-lc-rs" +version = "1.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.39.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "aws-runtime" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http 0.63.6", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d65fddc3844f902dfe1864acb8494db5f9342015ee3ab7890270d36fbd2e01c" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http 0.62.6", + "aws-smithy-json 0.61.9", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "lru", + "percent-encoding", + "regex-lite", + "sha2", + "tracing", + "url", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.97.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.99.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.101.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http 0.63.6", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint 0.5.5", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "p256", + "percent-encoding", + "ring", + "sha2", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.63.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87294a084b43d649d967efe58aa1f9e0adc260e13a6938eb904c0ae9b45824ae" +dependencies = [ + "aws-smithy-http 0.62.6", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 0.2.12", + "http-body 0.4.6", + "md-5", + "pin-project-lite", + "sha1", + "sha2", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf09d74e5e32f76b8762da505a3cd59303e367a664ca67295387baa8c1d7548" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.62.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.13", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.8.1", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9648b0bb82a2eedd844052c6ad2a1a822d1f8e3adee5fbf668366717e428856a" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "028999056d2d2fd58a697232f9eec4a643cf73a71cf327690a7edad1d2af2110" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876ab3c9c29791ba4ba02b780a3049e21ec63dabda09268b175272c3733a79e6" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bitflags" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +dependencies = [ + "serde_core", +] [[package]] name = "block-buffer" @@ -144,6 +747,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "bzip2" version = "0.5.2" @@ -169,6 +782,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "castaway" version = "0.2.4" @@ -208,6 +827,47 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -267,6 +927,15 @@ dependencies = [ "error-code", ] +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -297,6 +966,21 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -321,6 +1005,22 @@ dependencies = [ "crossterm 0.29.0", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "cpufeatures" version = "0.2.17" @@ -345,13 +1045,62 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc-fast" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ddc2d09feefeee8bd78101665bd8645637828fa9317f9f292496dbbd8c65ff3" +dependencies = [ + "crc", + "digest", + "rand 0.9.2", + "regex", + "rustversion", +] + [[package]] name = "crc32fast" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ - "cfg-if", + "cast", + "itertools 0.10.5", ] [[package]] @@ -479,6 +1228,34 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.7" @@ -529,6 +1306,27 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "807800ff3288b621186fe0a8f3392c4652068257302709c24efd918c3dffcdc2" +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid", + "zeroize", +] + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.8" @@ -578,6 +1376,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -623,11 +1422,58 @@ dependencies = [ "litrs", ] +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der 0.6.1", + "elliptic-curve", + "rfc6979", + "signature 1.6.4", +] + [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] + +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der 0.6.1", + "digest", + "ff", + "generic-array", + "group", + "pkcs8 0.9.0", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] [[package]] name = "endian-type" @@ -657,6 +1503,40 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -674,6 +1554,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -690,6 +1580,23 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -705,6 +1612,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" version = "0.3.32" @@ -747,6 +1660,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.32" @@ -843,6 +1767,75 @@ dependencies = [ "wasip3", ] +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -860,18 +1853,51 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -890,6 +1916,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -900,6 +1937,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -907,7 +1955,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -918,8 +1966,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -929,6 +1977,36 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.8.1" @@ -939,9 +2017,11 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "http", - "http-body", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -950,19 +2030,35 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.4.0", + "hyper 1.8.1", "hyper-util", - "rustls", + "rustls 0.23.37", + "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots", ] @@ -977,19 +2073,43 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", - "hyper", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.1.1" @@ -1163,11 +2283,31 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] [[package]] name = "itertools" @@ -1232,6 +2372,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] [[package]] name = "leb128fmt" @@ -1245,13 +2388,33 @@ version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "libredox" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ + "bitflags", "libc", + "plain", + "redox_syscall 0.7.3", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", ] [[package]] @@ -1340,12 +2503,46 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -1414,12 +2611,67 @@ dependencies = [ "libc", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-conv" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1432,12 +2684,30 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + [[package]] name = "option-ext" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "ov_cli" version = "0.2.6" @@ -1468,6 +2738,23 @@ dependencies = [ "zip", ] +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.5" @@ -1486,7 +2773,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link", ] @@ -1497,6 +2784,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + [[package]] name = "pbkdf2" version = "0.12.2" @@ -1507,6 +2800,15 @@ dependencies = [ "hmac", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1525,12 +2827,83 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der 0.7.10", + "pkcs8 0.10.2", + "spki 0.7.3", +] + +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der 0.6.1", + "spki 0.6.0", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der 0.7.10", + "spki 0.7.3", +] + [[package]] name = "pkg-config" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1574,6 +2947,69 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pyo3" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + [[package]] name = "quinn" version = "0.11.9" @@ -1586,8 +3022,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", - "socket2", + "rustls 0.23.37", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -1603,10 +3039,10 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.37", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -1624,7 +3060,7 @@ dependencies = [ "cfg_aliases 0.2.1", "libc", "once_cell", - "socket2", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -1660,14 +3096,78 @@ dependencies = [ "nibble_vec", ] +[[package]] +name = "ragfs" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "aws-config", + "aws-sdk-s3", + "aws-types", + "axum", + "bytes", + "chrono", + "clap", + "criterion", + "hyper 1.8.1", + "lru", + "path-clean", + "radix_trie", + "rusqlite", + "serde", + "serde_json", + "serde_yaml", + "sqlx", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tower", + "tower-http 0.5.2", + "tracing", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "ragfs-python" +version = "0.1.0" +dependencies = [ + "pyo3", + "ragfs", + "serde_json", + "tokio", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", ] [[package]] @@ -1677,7 +3177,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -1690,31 +3199,60 @@ dependencies = [ ] [[package]] -name = "ratatui" -version = "0.29.0" +name = "ratatui" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +dependencies = [ + "bitflags", + "cassowary", + "compact_str", + "crossterm 0.28.1", + "indoc", + "instability", + "itertools 0.13.0", + "lru", + "paste", + "strum", + "unicode-segmentation", + "unicode-truncate", + "unicode-width 0.2.0", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", - "cassowary", - "compact_str", - "crossterm 0.28.1", - "indoc", - "instability", - "itertools", - "lru", - "paste", - "strum", - "unicode-segmentation", - "unicode-truncate", - "unicode-width 0.2.0", ] [[package]] name = "redox_syscall" -version = "0.5.18" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" dependencies = [ "bitflags", ] @@ -1753,6 +3291,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.10" @@ -1769,11 +3313,11 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.8.1", + "hyper-rustls 0.27.7", "hyper-util", "js-sys", "log", @@ -1781,16 +3325,16 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.37", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", - "tower-http", + "tower-http 0.6.8", "tower-service", "url", "wasm-bindgen", @@ -1799,6 +3343,17 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + [[package]] name = "ring" version = "0.17.14" @@ -1813,6 +3368,40 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8 0.10.2", + "rand_core 0.6.4", + "signature 2.2.0", + "spki 0.7.3", + "subtle", + "zeroize", +] + +[[package]] +name = "rusqlite" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink 0.9.1", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustc-hash" version = "2.1.1" @@ -1854,20 +3443,45 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ + "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.9", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -1878,12 +3492,23 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -1932,12 +3557,68 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der 0.6.1", + "generic-array", + "pkcs8 0.9.0", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.27" @@ -1988,6 +3669,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2000,6 +3692,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2011,6 +3716,26 @@ dependencies = [ "digest", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2048,6 +3773,26 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.8" @@ -2055,25 +3800,255 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] -name = "slab" -version = "0.4.12" +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der 0.6.1", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der 0.7.10", +] + +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.5", + "hashlink 0.10.0", + "indexmap", + "log", + "memchr", + "once_cell", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami", +] [[package]] -name = "smallvec" -version = "1.15.1" +name = "sqlx-postgres" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami", +] [[package]] -name = "socket2" -version = "0.6.3" +name = "sqlx-sqlite" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" dependencies = [ - "libc", - "windows-sys 0.61.2", + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.18", + "tracing", + "url", ] [[package]] @@ -2094,6 +4069,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006" +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strsim" version = "0.11.1" @@ -2159,6 +4145,12 @@ dependencies = [ "syn", ] +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "tempfile" version = "3.26.0" @@ -2228,6 +4220,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "time" version = "0.3.47" @@ -2239,6 +4240,7 @@ dependencies = [ "powerfmt", "serde_core", "time-core", + "time-macros", ] [[package]] @@ -2247,6 +4249,16 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -2257,6 +4269,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.10.0" @@ -2284,7 +4306,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -2300,13 +4322,47 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.37", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", "tokio", ] @@ -2323,6 +4379,24 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +dependencies = [ + "bitflags", + "bytes", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", ] [[package]] @@ -2334,8 +4408,8 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower", @@ -2361,10 +4435,23 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ + "log", "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.36" @@ -2372,6 +4459,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", ] [[package]] @@ -2392,12 +4522,33 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -2410,7 +4561,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf" dependencies = [ - "itertools", + "itertools 0.13.0", "unicode-segmentation", "unicode-width 0.1.14", ] @@ -2433,6 +4584,18 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -2451,6 +4614,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -2475,12 +4644,30 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "walkdir" version = "2.5.0" @@ -2524,6 +4711,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.114" @@ -2646,6 +4839,16 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2677,6 +4880,41 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -3037,6 +5275,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "xz2" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index c09add8cd..ce34f9e19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["crates/ov_cli"] +members = ["crates/ov_cli", "crates/ragfs", "crates/ragfs-python"] resolver = "2" [profile.release] diff --git a/README.md b/README.md index 3ea775d60..6ec0bb2c8 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ English / [中文](README_CN.md) / [日本語](README_JA.md) -Website · GitHub · Issues · Docs +Website · GitHub · Issues · Docs [![][release-shield]][release-link] [![][github-stars-shield]][github-stars-link] diff --git a/crates/ov_cli/LICENSE b/crates/LICENSE similarity index 100% rename from crates/ov_cli/LICENSE rename to crates/LICENSE diff --git a/crates/ragfs-python/Cargo.toml b/crates/ragfs-python/Cargo.toml new file mode 100644 index 000000000..c132835cf --- /dev/null +++ b/crates/ragfs-python/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "ragfs-python" +version = "0.1.0" +edition = "2021" +description = "Python bindings for RAGFS - Rust AGFS filesystem" +publish = false + +[lib] +name = "ragfs_python" +crate-type = ["cdylib"] + +[dependencies] +ragfs = { path = "../ragfs" } +pyo3 = { version = "0.23", features = ["extension-module"] } +tokio = { version = "1", features = ["full"] } +serde_json = "1.0" diff --git a/crates/ragfs-python/pyproject.toml b/crates/ragfs-python/pyproject.toml new file mode 100644 index 000000000..560397e40 --- /dev/null +++ b/crates/ragfs-python/pyproject.toml @@ -0,0 +1,11 @@ +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "ragfs-python" +version = "0.1.0" +requires-python = ">=3.10" + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/crates/ragfs-python/src/lib.rs b/crates/ragfs-python/src/lib.rs new file mode 100644 index 000000000..16b3b8736 --- /dev/null +++ b/crates/ragfs-python/src/lib.rs @@ -0,0 +1,453 @@ +//! Python bindings for RAGFS - Rust AGFS filesystem +//! +//! Provides `RAGFSBindingClient`, a PyO3 native class that is API-compatible +//! with the existing Go-based `AGFSBindingClient`. This embeds the ragfs +//! filesystem engine directly in the Python process (no HTTP server needed). + +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict, PyList}; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::UNIX_EPOCH; + +use ragfs::core::{ConfigValue, FileInfo, FileSystem, MountableFS, PluginConfig, WriteFlag}; +use ragfs::plugins::{KVFSPlugin, MemFSPlugin, QueueFSPlugin, SQLFSPlugin}; + +/// Convert a ragfs error into a Python RuntimeError +fn to_py_err(e: ragfs::core::Error) -> PyErr { + PyRuntimeError::new_err(e.to_string()) +} + +/// Convert FileInfo to a Python dict matching the Go binding JSON format: +/// {"name": str, "size": int, "mode": int, "modTime": str, "isDir": bool} +fn file_info_to_py_dict(py: Python<'_>, info: &FileInfo) -> PyResult> { + let dict = PyDict::new(py); + dict.set_item("name", &info.name)?; + dict.set_item("size", info.size)?; + dict.set_item("mode", info.mode)?; + + // modTime as RFC3339 string (Go binding format) + let secs = info + .mod_time + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let mod_time = format_rfc3339(secs); + dict.set_item("modTime", mod_time)?; + + dict.set_item("isDir", info.is_dir)?; + Ok(dict.into()) +} + +/// Format unix timestamp as RFC3339 string (simplified, UTC) +fn format_rfc3339(secs: u64) -> String { + let s = secs; + let days = s / 86400; + let time_of_day = s % 86400; + let h = time_of_day / 3600; + let m = (time_of_day % 3600) / 60; + let sec = time_of_day % 60; + + // Calculate date from days since epoch (simplified) + let (year, month, day) = days_to_ymd(days); + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", + year, month, day, h, m, sec + ) +} + +/// Convert days since Unix epoch to (year, month, day) +fn days_to_ymd(days: u64) -> (u64, u64, u64) { + // Algorithm from http://howardhinnant.github.io/date_algorithms.html + let z = days + 719468; + let era = z / 146097; + let doe = z - era * 146097; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y, m, d) +} + +/// Convert a Python dict to HashMap +fn py_dict_to_config(dict: &Bound<'_, PyDict>) -> PyResult> { + let mut params = HashMap::new(); + for (k, v) in dict.iter() { + let key: String = k.extract()?; + let value = if let Ok(s) = v.extract::() { + ConfigValue::String(s) + } else if let Ok(b) = v.extract::() { + ConfigValue::Bool(b) + } else if let Ok(i) = v.extract::() { + ConfigValue::Int(i) + } else { + ConfigValue::String(v.str()?.to_string()) + }; + params.insert(key, value); + } + Ok(params) +} + +/// RAGFS Python Binding Client. +/// +/// Embeds the ragfs filesystem engine directly in the Python process. +/// API-compatible with the Go-based AGFSBindingClient. +#[pyclass] +struct RAGFSBindingClient { + fs: Arc, + rt: tokio::runtime::Runtime, +} + +#[pymethods] +impl RAGFSBindingClient { + /// Create a new RAGFS binding client. + /// + /// Initializes the filesystem engine with all built-in plugins registered. + #[new] + #[pyo3(signature = (config_path=None))] + fn new(config_path: Option<&str>) -> PyResult { + let _ = config_path; // reserved for future use + + let rt = tokio::runtime::Runtime::new() + .map_err(|e| PyRuntimeError::new_err(format!("Failed to create runtime: {}", e)))?; + + let fs = Arc::new(MountableFS::new()); + + // Register all built-in plugins + rt.block_on(async { + fs.register_plugin(MemFSPlugin).await; + fs.register_plugin(KVFSPlugin).await; + fs.register_plugin(QueueFSPlugin).await; + fs.register_plugin(SQLFSPlugin::new()).await; + }); + + Ok(Self { fs, rt }) + } + + /// Check client health. + fn health(&self) -> PyResult> { + let mut m = HashMap::new(); + m.insert("status".to_string(), "healthy".to_string()); + Ok(m) + } + + /// Get client capabilities. + fn get_capabilities(&self) -> PyResult> { + Python::with_gil(|py| { + let mut m = HashMap::new(); + m.insert("version".to_string(), "ragfs-python".into_pyobject(py)?.into_any().unbind()); + let features = vec!["memfs", "kvfs", "queuefs", "sqlfs"]; + m.insert("features".to_string(), features.into_pyobject(py)?.into_any().unbind()); + Ok(m) + }) + } + + /// List directory contents. + /// + /// Returns a list of file info dicts with keys: + /// name, size, mode, modTime, isDir + fn ls(&self, path: String) -> PyResult { + let fs = self.fs.clone(); + let entries = self.rt.block_on(async move { + fs.read_dir(&path).await + }).map_err(to_py_err)?; + + Python::with_gil(|py| { + let list = PyList::empty(py); + for entry in &entries { + let dict = file_info_to_py_dict(py, entry)?; + list.append(dict)?; + } + Ok(list.into()) + }) + } + + /// Read file content. + /// + /// Args: + /// path: File path + /// offset: Starting position (default: 0) + /// size: Number of bytes to read (default: -1, read all) + /// stream: Not supported in binding mode + #[pyo3(signature = (path, offset=0, size=-1, stream=false))] + fn read(&self, path: String, offset: i64, size: i64, stream: bool) -> PyResult { + if stream { + return Err(PyRuntimeError::new_err( + "Streaming not supported in binding mode", + )); + } + + let fs = self.fs.clone(); + let off = if offset < 0 { 0u64 } else { offset as u64 }; + let sz = if size < 0 { 0u64 } else { size as u64 }; + + let data = self.rt.block_on(async move { + fs.read(&path, off, sz).await + }).map_err(to_py_err)?; + + Python::with_gil(|py| { + Ok(PyBytes::new(py, &data).into()) + }) + } + + /// Read file content (alias for read). + #[pyo3(signature = (path, offset=0, size=-1, stream=false))] + fn cat(&self, path: String, offset: i64, size: i64, stream: bool) -> PyResult { + self.read(path, offset, size, stream) + } + + /// Write data to file. + /// + /// Args: + /// path: File path + /// data: File content as bytes + #[pyo3(signature = (path, data, max_retries=3))] + fn write(&self, path: String, data: Vec, max_retries: i32) -> PyResult { + let _ = max_retries; // not applicable for local binding + let fs = self.fs.clone(); + let len = data.len(); + self.rt.block_on(async move { + fs.write(&path, &data, 0, WriteFlag::Create).await + }).map_err(to_py_err)?; + + Ok(format!("Written {} bytes", len)) + } + + /// Create a new empty file. + fn create(&self, path: String) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.create(&path).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "created".to_string()); + Ok(m) + } + + /// Create a directory. + #[pyo3(signature = (path, mode="755"))] + fn mkdir(&self, path: String, mode: &str) -> PyResult> { + let mode_int = u32::from_str_radix(mode, 8) + .map_err(|e| PyRuntimeError::new_err(format!("Invalid mode '{}': {}", mode, e)))?; + + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.mkdir(&path, mode_int).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "created".to_string()); + Ok(m) + } + + /// Remove a file or directory. + #[pyo3(signature = (path, recursive=false))] + fn rm(&self, path: String, recursive: bool) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + if recursive { + fs.remove_all(&path).await + } else { + fs.remove(&path).await + } + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "deleted".to_string()); + Ok(m) + } + + /// Get file/directory information. + fn stat(&self, path: String) -> PyResult { + let fs = self.fs.clone(); + let info = self.rt.block_on(async move { + fs.stat(&path).await + }).map_err(to_py_err)?; + + Python::with_gil(|py| { + let dict = file_info_to_py_dict(py, &info)?; + Ok(dict.into()) + }) + } + + /// Rename/move a file or directory. + fn mv(&self, old_path: String, new_path: String) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.rename(&old_path, &new_path).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "renamed".to_string()); + Ok(m) + } + + /// Change file permissions. + fn chmod(&self, path: String, mode: u32) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.chmod(&path, mode).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "chmod ok".to_string()); + Ok(m) + } + + /// Touch a file (create if not exists, or update timestamp). + fn touch(&self, path: String) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + // Try create; if already exists, write empty to update mtime + match fs.create(&path).await { + Ok(_) => Ok(()), + Err(_) => { + // File exists, write empty bytes to update timestamp + fs.write(&path, &[], 0, WriteFlag::None).await.map(|_| ()) + } + } + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "touched".to_string()); + Ok(m) + } + + /// List all mounted plugins. + fn mounts(&self) -> PyResult>> { + let fs = self.fs.clone(); + let mount_list = self.rt.block_on(async move { + fs.list_mounts().await + }); + + let result: Vec> = mount_list + .into_iter() + .map(|(path, fstype)| { + let mut m = HashMap::new(); + m.insert("path".to_string(), path); + m.insert("fstype".to_string(), fstype); + m + }) + .collect(); + + Ok(result) + } + + /// Mount a plugin dynamically. + /// + /// Args: + /// fstype: Filesystem type (e.g., "memfs", "sqlfs", "kvfs", "queuefs") + /// path: Mount path + /// config: Plugin configuration as dict + #[pyo3(signature = (fstype, path, config=None))] + fn mount( + &self, + fstype: String, + path: String, + config: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let params = match config { + Some(dict) => py_dict_to_config(dict)?, + None => HashMap::new(), + }; + + let plugin_config = PluginConfig { + name: fstype.clone(), + mount_path: path.clone(), + params, + }; + + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.mount(plugin_config).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert( + "message".to_string(), + format!("mounted {} at {}", fstype, path), + ); + Ok(m) + } + + /// Unmount a plugin. + fn unmount(&self, path: String) -> PyResult> { + let fs = self.fs.clone(); + let path_clone = path.clone(); + self.rt.block_on(async move { + fs.unmount(&path_clone).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), format!("unmounted {}", path)); + Ok(m) + } + + /// List all registered plugin names. + fn list_plugins(&self) -> PyResult> { + // Return the names of built-in plugins + Ok(vec![ + "memfs".to_string(), + "kvfs".to_string(), + "queuefs".to_string(), + "sqlfs".to_string(), + ]) + } + + /// Get detailed plugin information. + fn get_plugins_info(&self) -> PyResult> { + self.list_plugins() + } + + /// Load an external plugin (not supported in Rust binding). + fn load_plugin(&self, _library_path: String) -> PyResult> { + Err(PyRuntimeError::new_err( + "External plugin loading not supported in ragfs-python binding", + )) + } + + /// Unload an external plugin (not supported in Rust binding). + fn unload_plugin(&self, _library_path: String) -> PyResult> { + Err(PyRuntimeError::new_err( + "External plugin unloading not supported in ragfs-python binding", + )) + } + + /// Search for pattern in files (not yet implemented in ragfs). + #[pyo3(signature = (path, pattern, recursive=false, case_insensitive=false, stream=false, node_limit=None))] + fn grep( + &self, + path: String, + pattern: String, + recursive: bool, + case_insensitive: bool, + stream: bool, + node_limit: Option, + ) -> PyResult { + let _ = (path, pattern, recursive, case_insensitive, stream, node_limit); + Err(PyRuntimeError::new_err( + "grep not yet implemented in ragfs-python", + )) + } + + /// Calculate file digest (not yet implemented in ragfs). + #[pyo3(signature = (path, algorithm="xxh3"))] + fn digest(&self, path: String, algorithm: &str) -> PyResult> { + let _ = (path, algorithm); + Err(PyRuntimeError::new_err( + "digest not yet implemented in ragfs-python", + )) + } +} + +/// Python module definition +#[pymodule] +fn ragfs_python(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} diff --git a/crates/ragfs/Cargo.toml b/crates/ragfs/Cargo.toml new file mode 100644 index 000000000..4e2569c12 --- /dev/null +++ b/crates/ragfs/Cargo.toml @@ -0,0 +1,95 @@ +[package] +name = "ragfs" +version = "0.1.0" +edition = "2021" +authors = ["OpenViking Contributors"] +description = "Rust implementation of AGFS - Aggregated File System for AI Agents" +license = "Apache-2.0" +repository = "https://github.com/OpenViking/openviking" +keywords = ["filesystem", "agents", "rest-api", "plugin-system"] +categories = ["filesystem", "network-programming"] + +[lib] +name = "ragfs" +path = "src/lib.rs" + +[[bin]] +name = "ragfs-server" +path = "src/server/main.rs" + +[[bin]] +name = "ragfs-shell" +path = "src/shell/main.rs" + +[dependencies] +# Async runtime +tokio = { version = "1.38", features = ["full"] } +async-trait = "0.1" + +# HTTP server +axum = "0.7" +tower = "0.5" +tower-http = { version = "0.5", features = ["trace", "cors"] } +hyper = "1.0" + +# Serialization +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +serde_yaml = "0.9" + +# Configuration +clap = { version = "4.5", features = ["derive", "env"] } + +# Logging +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } + +# Path handling and filesystem +path-clean = "1.0" + +# Data structures +radix_trie = "0.2" + +# Error handling +anyhow = "1.0" +thiserror = "1.0" + +# UUIDs +uuid = { version = "1.0", features = ["v4", "serde"] } + +# Time +chrono = { version = "0.4", features = ["serde"] } + +# Bytes handling +bytes = "1.5" + +# Database +rusqlite = { version = "0.32", features = ["bundled"] } +sqlx = { version = "0.8", features = ["runtime-tokio", "sqlite", "mysql"], optional = true } + +# AWS S3 +aws-config = { version = "1", features = ["behavior-version-latest"], optional = true } +aws-sdk-s3 = { version = "1", optional = true } +aws-types = { version = "1", optional = true } + +# Cache +lru = "0.12" + +# Development dependencies +[dev-dependencies] +tempfile = "3.12" +criterion = "0.5" + +[features] +default = [] +s3 = ["aws-sdk-s3", "aws-config", "aws-types"] +full = ["s3"] + +[profile.release] +opt-level = 3 +lto = true +strip = true +codegen-units = 1 + +[profile.dev] +opt-level = 0 diff --git a/crates/ragfs/MIGRATION_PLAN.md b/crates/ragfs/MIGRATION_PLAN.md new file mode 100644 index 000000000..ad7cab553 --- /dev/null +++ b/crates/ragfs/MIGRATION_PLAN.md @@ -0,0 +1,965 @@ +# RAGFS Migration Plan +# Rust 实现的 AGFS 重构计划 + +**文档版本**: v1.0 +**创建日期**: 2026-04-03 +**目标**: 将 third_party/agfs (Go 实现迁移到 crates/ragfs (Rust 实现)) 以支持后续扩展 +**策略**: 允许 Go/Rust 双实现并存,支持自由切换和回退 +**致谢**: 谢谢 c44pt0r 的 AGFS 项目贡献了开源的 Go 实现,为本计划提供基础,我们会持续保持对原项目的 License 引用。 + +--- + +## 目录 + +1. [概述](#概述) +2. [架构设计](#架构设计) +3. [迁移阶段](#迁移阶段) +4. [纯 Rust 插件系统设计](#纯rust-插件系统设计) +5. [Go/Rust 切换机制](#go-rust-切换机制) +6. [技术选型](#技术选型) +7. [风险评估](#风险评估) +8. [里程碑](#里程碑) + +--- + +## 概述 + +### 背景 + +当前 AGFS 完全使用 Go 实现,包含约 33,000 行代码、14 种内置插件。随着 OpenViking 项目引入 Rust 生态(ov_cli),使用 Rust 重写 AGFS 可以: + +- 提升性能(无 GC,编译时优化) +- 增强安全性(内存安全保证) +- 统一技术栈(与 ov_cli 一致,移除项目对 Golang 的依赖) +- 简化部署(无需编译 Go 代码) + +### 设计原则 + +1. **渐进式迁移**: Go 和 Rust 版本可同时存在,独立运行 +2. **API 兼容性**: HTTP API 和 Python SDK 保持完全兼容 +3. **纯 Rust 插件**: 使用 trait 定义插件系统,不依赖 C ABI +4. **可观测性**: 完善的日志、指标和追踪支持,文件目录结构尽量与 Go 实现保持一致 +5. **测试驱动**: 每个模块都有对应的测试用例,确保功能一致 + +### 项目结构 + +``` +crates/ragfs/ +├── Cargo.toml # Workspace 配置 +├── MIGRATION_PLAN.md # 本文档 +├── src/ +│ ├── lib.rs # 库入口,导出公共 API +│ ├── core/ # 核心文件系统抽象 +│ │ ├── mod.rs +│ │ ├── filesystem.rs # FileSystem trait +│ │ ├── mountable.rs # MountableFS 实现 +│ │ ├── handle.rs # 文件句柄管理 +│ │ └── errors.rs # 错误类型定义 +│ ├── server/ # HTTP 服务器 +│ │ ├── mod.rs +│ │ ├── main.rs # 服务器入口 +│ │ ├── config.rs # 配置管理 +│ │ ├── handlers.rs # HTTP 处理器 +│ │ └── middleware.rs # 中间件 +│ ├── shell/ # 交互式 shell +│ │ ├── mod.rs +│ │ └── main.rs +│ └── plugins/ # 内置插件 +│ ├── mod.rs +│ ├── memfs/ +│ ├── kvfs/ +│ ├── queuefs/ +│ ├── s3fs/ +│ ├── sqlfs/ +│ └── ... +└── tests/ # 集成测试 +``` + +--- + +## 架构设计 + +### 核心组件 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ HTTP Client / SDK │ +└────────────────────────┬────────────────────────────────────┘ + │ HTTP/JSON API + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ HTTP Server │ +│ ┌────────────────────────────────────────────────────┐ │ +│ │ Router (axum) │ │ +│ │ - /api/v1/files │ │ +│ │ - /api/v1/directories │ │ +│ │ - /api/v1/mounts │ │ +│ │ - /api/v1/plugins │ │ +│ └────────────────────────────────────────────────────┘ │ +└────────────────────────┬────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ MountableFS │ +│ ┌────────────────────────────────────────────────────┐ │ +│ │ Radix Trie (路径路由) │ │ +│ │ /memfs -> MemFS Plugin │ │ +│ │ /kvfs -> KVFS Plugin │ │ +│ │ /queuefs -> QueueFS Plugin │ │ +│ └────────────────────────────────────────────────────┘ │ +└────────────────────────┬────────────────────────────────────┘ + │ + ┌──────────┼──────────┐ + ▼ ▼ ▼ + ┌──────┐ ┌──────┐ ┌──────┐ + │MemFS │ │KVFS │ │QueueFS│ + └──────┘ └──────┘ └──────┘ +``` + +### 核心数据结构 + +#### FileSystem Trait + +```rust +/// 文件系统抽象接口 +#[async_trait] +pub trait FileSystem: Send + Sync { + /// 创建空文件 + async fn create(&self, path: &str) -> Result<()>; + + /// 创建目录 + async fn mkdir(&self, path: &str, mode: u32) -> Result<()>; + + /// 删除文件 + async fn remove(&self, path: &str) -> Result<()>; + + /// 递归删除 + async fn remove_all(&self, path: &str) -> Result<()>; + + /// 读取文件内容 + async fn read(&self, path: &str, offset: u64, size: u64) -> Result>; + + /// 写入文件 + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result; + + /// 列出目录 + async fn read_dir(&self, path: &str) -> Result>; + + /// 获取文件信息 + async fn stat(&self, path: &str) -> Result; + + /// 重命名 + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()>; + + /// 修改权限 + async fn chmod(&self, path: &str, mode: u32) -> Result<()>; +} +``` + +#### ServicePlugin Trait + +```rust +/// 服务插件接口 +#[async_trait] +pub trait ServicePlugin: Send + Sync { + /// 插件名称 + fn name(&self) -> &str; + + /// 验证配置 + async fn validate(&self, config: &PluginConfig) -> Result<()>; + + /// 初始化插件 + async fn initialize(&self, config: PluginConfig) -> Result>; + + /// 获取文档 + fn get_readme(&self) -> &str; + + /// 获取配置参数定义 + fn get_config_params(&self) -> Vec; + + /// 关闭插件 + async fn shutdown(&self) -> Result<()>; +} +``` + +--- + +## 迁移阶段 + +### 阶段 1: 基础设施 (2-3 周) ✅ 已完成 + +**目标**: 建立项目骨架和核心抽象 + +#### 任务清单 + +- [x] 创建 Cargo.toml 和项目结构 +- [x] 定义 `FileSystem` trait (`src/core/filesystem.rs`) +- [x] 定义 `ServicePlugin` trait (`src/core/plugin.rs`) +- [x] 定义错误类型 (`src/core/errors.rs`) +- [x] 定义文件信息结构 (`src/core/types.rs`) +- [x] 创建配置模块 (`src/server/config.rs`) +- [x] 编写基础单元测试 + +#### 验收标准 + +- ✅ 可以编译 `ragfs` 库 +- ✅ 所有 trait 有清晰的文档和测试 +- ✅ 错误处理完善 + +--- + +### 阶段 2: MountableFS 核心实现 (2 周) ✅ 已完成 + +**目标**: 实现核心的挂载文件系统 + +#### 任务清单 + +- [x] 实现 Radix Trie 路由 (使用 `radix_trie` crate) +- [x] 实现 `MountableFS` 结构体 +- [x] 实现插件注册机制 +- [x] 实现路径解析和路由 (`find_mount`) +- [x] 实现所有 `FileSystem` 方法的委托 +- [x] 实现并发安全 (使用 `Arc>`) +- [x] 编写并发测试 + +#### 对应 Go 代码 + +- `third_party/agfs/agfs-server/pkg/mountablefs/mountablefs.go` (967 行) + +#### 验收标准 + +- ✅ 可以动态挂载/卸载插件 +- [x] 高并发场景下无数据竞争 +- [x] 所有文件系统操作正确委托到挂载点 + +--- + +### 阶段 3: HTTP 服务器 (2 周) ✅ 已完成 + +**目标**: 实现与 Go 版本兼容的 HTTP API + +#### 任务清单 + +- [x] 实现 axum 路由 +- [x] 实现文件操作处理器 (`/api/v1/files`) +- [x] 实现目录操作处理器 (`/api/v1/directories`) +- [x] 实现挂载管理处理器 (`/api/v1/mount`, `/api/v1/unmount`) +- [x] 实现健康检查 (`/api/v1/health`) +- [x] 实现日志中间件 (tower TraceLayer) +- [ ] 实现指标收集 +- [x] 添加 CORS 支持 + +#### API 兼容性 + +必须与 Go 版本保持完全兼容的端点: + +``` +GET /api/v1/files?path=... # 读取文件 +PUT /api/v1/files?path=... # 写入文件 +POST /api/v1/files?path=... # 创建文件 +DELETE /api/v1/files?path=... # 删除文件 +GET /api/v1/stat?path=... # 获取元数据 + +GET /api/v1/directories?path=... # 列出目录 +POST /api/v1/directories?path=... # 创建目录 + +GET /api/v1/mounts # 列出挂载点 +POST /api/v1/mount # 挂载插件 +POST /api/v1/unmount # 卸载插件 + +GET /api/v1/health # 健康检查 +``` + +#### 验收标准 + +- ✅ 所有 API 端点可访问 +- ✅ API 响应格式与 Go 版本一致 +- ✅ 可以使用现有的 Python SDK 和 agfs-shell 连接 + +--- + +### 阶段 4: 基础插件 - MemFS (1 周) ✅ 已完成 + +**目标**: 实现最简单的内存文件系统 + +#### 任务清单 + +- [x] 实现 `MemFSPlugin` 结构体 +- [x] 实现内存存储 (使用 `Arc>>`) +- [x] 实现所有文件系统操作 +- [x] 编写完整的测试用例 +- [ ] 添加性能基准测试 + +#### 对应 Go 代码 + +- `third_party/agfs/agfs-server/pkg/plugins/memfs/memfs.go` +- `third_party/agfs/agfs-server/pkg/plugins/memfs/memoryfs.go` + +#### 验收标准 + +- ✅ 可以创建/读取/写入/删除文件 +- ✅ 可以列出目录 +- ✅ 数据存储在内存中,重启后丢失 +- [ ] 性能与 Go 版本相当或更好 + +--- + +### 阶段 5: 基础插件 - KVFS (1 周) ✅ 已完成 + +**目标**:**: 实现键值存储文件系统 + +#### 任务清单 + +- [x] 实现 `KVFSPlugin` 结构体 +- [x] 实现键值存储后端 (内存) +- [x] 实现文件名到键的映射 +- [ ] 实现持久化(可选) + +#### 对应 Go 代码 + +- `third_party/agfs/agfs-server/pkg/plugins/kvfs/` + +#### 验收标准 + +- ✅ 写入操作将文件内容作为值存储 +- ✅ 读取操作返回对应键的值 +- ✅ 列出目录返回所有键 + +--- + +### 阶段 6: 基础插件 - QueueFS (1-2 周) ✅ 已完成 + +**目标**: 实现消息队列文件系统 + +#### 任务清单 + +- [x] 实现 `QueueFSPlugin` 结构体 +- [x] 实现队列后端 (内存 VecDeque) +- [x] 实现控制文件: + - `enqueue`: 写入消息 + - `dequeue`: 读取并移除消息 + - `peek`: 查看但不移除 + - `size`: 获取队列长度 + - `clear`: 清空队列 +- [x] 实现消息 ID 生成 (UUID) +- [x] 编写并发测试 (多生产者/多消费者) + +#### 对应 Go 代码 + +- `third_party/agfs/agfs-server/pkg/plugins/queuefs/` + +#### 验收标准 + +- ✅ 多个写入者可以并发入队 +- ✅ 多个读取者可以并发出队 +- ✅ 消息不丢失、不重复 +- ✅ 性能满足实际使用需求 + +--- + +### 阶段 7: 高级插件 🔄 进行中 + +**目标**: 实现生产环境必需的插件 + +#### S3FS ✅ 已完成 + +- [x] 集成 AWS SDK for Rust (`aws-sdk-s3`, feature-gated under `s3`) +- [x] 实现文件上传/下载 (get_object, put_object, get_object_range) +- [x] 实现目录列举 (list_objects with prefix/delimiter, pagination) +- [x] 支持大文件分片上传 (batch delete 1000 per call) +- [x] S3Client wrapper (client.rs): 全面支持 AWS S3/MinIO/LocalStack/TOS +- [x] Dual-layer LRU cache (cache.rs): ListDirCache (30s TTL) + StatCache (60s TTL, 5x capacity) +- [x] S3FileSystem: 完整 FileSystem trait 实现 +- [x] S3FSPlugin: 13 个配置参数, validate, readme +- [x] 3 种 Directory Marker Modes: none/empty/nonempty (TOS 兼容) +- [x] Feature-gated: `cargo build --features s3` (不影响无 S3 需求的构建) +- [x] 9 个单元测试 (cache + path normalization + plugin validate) + +#### SQLFS ✅ 已完成 + +- [x] 使用 `rusqlite` +- [x] 支持 SQLite (MySQL/TiDB 预留接口) +- [x] 实现文件元数据存储 +- [x] 实现文件数据存储 (数据库 BLOB) +- [x] LRU 缓存 (目录列表) +- [x] Mutex 线程安全 +- [x] 17 个单元测试 + +#### ProxyFS + +- [ ] 实现对远程 AGFS 的代理 +- [ ] 实现请求转发 +- [ ] 实现连接池 + +#### 其他插件 + +- [ ] HTTPFS (提供文件下载服务) +- [ ] StreamFS (流式数据) +- [ ] HeartbeatFS (心跳监控) +- [ ] LocalFS (本地文件系统挂载) + +--- + +### 阶段 8: 配置系统 ⏳ 部分完成 + +**目标**: 支持与 Go 版本兼容的 YAML 配置 + +#### 任务清单 + +- [x] 定义配置结构体 (使用 `serde`) +- [x] 实现配置文件加载 (`config.yaml`) +- [x] 支持环境变量覆盖 +- [x] 实现配置验证 (基础) +- [ ] 提供示例配置文件 +- [ ] 支持从 YAML 配置自动挂载插件 + +#### 配置格式 (兼容 Go 版本) + +```yaml +server: + address: ":8080" + log_level: "info" + +plugins: + memfs: + enabled: true + path: "/memfs" + + kvfs: + enabled: true + path: "/kvfs" + + queuefs: + enabled: true + path: "/queuefs" +``` + +--- + +### 阶段 9: Shell 客户端 (2 周) + +**目标**: 实现交互式 shell (可选) + +#### 任务清单 + +- [ ] 实现 REPL (使用 `rustyline`) +- [ ] 实现内置命令 (`ls`, `cat`, `echo`, `mkdir`, `rm`, 等) +- [ ] 实现命令补全 +- [ ] 支持脚本执行 +- [ ] 添加颜色输出 + +--- + +### 阶段 10: 测试与优化 (2-3 周) + +**目标**: 完善测试覆盖和性能优化 + +#### 任务清单 + +- [ ] 编写集成测试 (端到端) +- [ ] 性能基准测试 (与 Go 版本对比) +- [ ] 压力测试 (高并发场景) +- [ ] 内存泄漏检测 +- [ ] 代码覆盖率 > 80% +- [ ] 文档完善 + +--- + +## 纯 Rust 插件系统设计 + +### 设计理念 + +1. **类型安全**: 使用 trait 确保编译时类型检查 +2. **零抽象成本**: 没有虚函数调用开销 (通过 monomorphization) +3. **异步优先**: 所有操作都是异步的 +4. **易于测试**: 插件可以 mock 和单元测试 + +### 插件接口 + +```rust +/// 插件配置参数元数据 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConfigParameter { + pub name: String, + pub r#type: String, // "string", "int", "bool", "string_list" + pub required: bool, + pub default: Option, + pub description: String, +} + +/// 插件配置值 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PluginConfig { + pub name: String, + pub mount_path: String, + pub params: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum ConfigValue { + String(String), + Int(i64), + Bool(bool), + StringList(Vec), +} + +/// 服务插件 trait +#[async_trait] +pub trait ServicePlugin: Send + Sync + 'static { + /// 插件唯一名称 + fn name(&self) -> &str; + + /// 插件版本 + fn version(&self) -> &str { + "0.1.0" + } + + /// 插件描述 + fn description(&self) -> &str { + "" + } + + /// 获取 README 文档 + fn readme(&self) -> &str; + + /// 验证配置 + async fn validate(&self, config: &PluginConfig) -> Result<()>; + + /// 初始化插件,返回文件系统实例 + async fn initialize( + &self, + config: PluginConfig + ) -> Result>; + + /// 关闭插件 + async fn shutdown(&self) -> Result<()>; + + /// 获取支持的配置参数 + fn config_params(&self) -> &[ConfigParameter]; +} + +/// 插件注册表 +pub struct PluginRegistry { + plugins: HashMap>, +} + +impl PluginRegistry { + pub fn new() -> Self { + Self { + plugins: HashMap::new(), + } + } + + /// 注册插件 + pub fn register(&mut self, plugin: P) { + let name = plugin.name().to_string(); + self.plugins.insert(name, Box::new(plugin)); + } + + /// 获取插件 + pub fn get(&self, name: &str) -> Option<&dyn ServicePlugin> { + self.plugins.get(name).map(|p| p.as_ref()) + } + + /// 列出所有插件 + pub fn list(&self) -> Vec<&str> { + self.plugins.keys().map(|s| s.as_str()).collect() + } +} +``` + +### 插件实现示例: MemFS + +```rust +use crate::core::{FileSystem, ServicePlugin, PluginConfig, ConfigParameter}; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; + +/// MemFS 插件 +pub struct MemFSPlugin; + +impl ServicePlugin for MemFSPlugin { + fn name(&self) -> &str { + "memfs" + } + + fn readme(&self) -> &str { + r#"MemFS - In-memory File System + +A simple file system that stores data in memory. All data is lost +when the server restarts. + +Usage: + echo "hello" > /memfs/test.txt + cat /memfs/test.txt +"# + } + + async fn validate(&self, config: &PluginConfig) -> Result<()> { + // MemFS 没有必需的配置参数 + Ok(()) + } + + async fn initialize( + &self, + _config: PluginConfig + ) -> Result> { + Ok(Box::new(MemFileSystem::new())) + } + + async fn shutdown(&self) -> Result<()> { + Ok(()) + } + + fn config_params(&self) -> &[ConfigParameter] { + &[] + } +} + +/// MemFS 实现文件系统 +struct MemFileSystem { + files: Arc>>>, +} + +impl MemFileSystem { + fn new() -> Self { + Self { + files: Arc::new(RwLock::new(HashMap::new())), + } + } +} + +#[async_trait] +impl FileSystem for MemFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let mut files = self.files.write().await; + files.insert(path.to_string(), vec![]); + Ok(()) + } + + async fn read(&self, path: &str, _offset: u64, _size: u64) -> Result> { + let files = self.files.read().await; + files.get(path) + .cloned() + .ok_or_else(|| Error::NotFound(path.to_string())) + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let mut files = self.files.write().await; + let file = files.entry(path.to_string()).or_insert_with(Vec::new); + + let offset = offset as usize; + if offset >= file.len() { + file.resize(offset, 0); + } + + let write_end = offset + data.len(); + file[offset..write_end].copy_from_slice(data); + Ok(data.len() as u64) + } + + // ... 其他方法实现 +} +``` + +--- + +## Go/Rust 切换机制 + +### 策略 + +Go 和 Rust 版本作为独立进程运行,通过以下方式切换: + +1. **环境变量控制** + ```bash + export AGFS_IMPL=rust # 使用 Rust 版本 + export AGFS_IMPL=go # 使用 Go 版本 (默认) + ``` + +2. **统一入口脚本** + ```bash + # scripts/agfs-server + if [[ "$AGFS_IMPL" == "rust" ]]; then + cargo run --bin ragfs-server -- "$@" + else + go run ./third_party/agfs/agfs-server/cmd/server/main.go "$@" + fi + ``` + +3. **Makefile 目标** + ```makefile + # 运行 Go 版本 + run-agfs-go: + cd third_party/agfs/agfs-server && make run + + # 运行 Rust 版本 + run-agfs-rust: + cargo run --bin ragfs-server + + # 运行 (根据 AGFS_IMPL 环境变量) + run-agfs: + @echo "Running AGFS (impl=$(AGFS_IMPL))..." + @if [ "$(AGFS_IMPL)" = "rust" ]; then \ + cargo run --bin ragfs-server; \ + else \ + cd third_party/agfs/agfs-server && make run; \ + fi + ``` + +### 兼容性保证 + +#### 1. API 兼容 + +两个版本必须实现完全相同的 HTTP API,包括: +- 请求格式 +- 响应格式 +- 错误码 +- 超时行为 + +#### 2. 配置兼容 + +使用相同的 `config.yaml` 格式,Rust 版本可以读取 Go 版本的配置。 + +#### 3. SDK 兼容 + +Python SDK 应该能够无缝连接到任一版本。 + +### 回退机制 + +如果 Rust 版本出现问题,可以通过以下方式快速回退: + +```bash +# 立即切换回 Go 版本 +export AGFS_IMPL=go + +# 重启服务 +systemctl restart agfs +``` + +### A/B 测试 + +支持同时运行两个版本进行对比: + +```bash +# 在不同端口运行 +cargo run --bin ragfs-server -- --port 8081 +cd third_party/agfs/agfs-server && go run ./cmd/server --port 8080 + +# 运行对比测试 +./scripts/compare_servers.sh http://localhost:8080 http://localhost:8081 +``` + +--- + +## 技术选型 + +### 运行时与网络 + +| 组件 | Go 实现 | Rust 选择 | 理由 | +|------|---------|-----------|------| +| HTTP 服务器 | net/http | axum | 基于 tower 生态,类型安全,性能优秀 | +| 异步运行时 | goroutines | tokio | 最成熟,生态完善 | +| HTTP 客户端 | net/http | reqwest/hyper | 与 tokio 集成良好 | + +### 数据结构 + +| 组件 | Go 实现 | Rust 选择 | 理由 | +|------|---------|-----------|------| +| 并发控制 | sync.RWMutex | tokio::sync::RwLock | 异步安全 | +| 路径路由 | go-immutable-radix | radix_trie | 无锁读取,性能好 | +| UUID | google/uuid | uuid (serde) | 功能完整 | + +### 序列化与配置 + +| 组件 | Go 实现 | Rust 选择 | 理由 | +|------|---------|-----------|------| +| JSON | encoding/json | serde_json | serde 生态,编译时检查 | +| YAML | gopkg.in/yaml.v3 | serde_yaml | 基于 serde | +| TOML | - | toml (可选) | 配置文件可选格式 | + +### 数据库 + +| 组件 | Go 实现 | Rust 选择 | 理由 | +|------|---------|-----------|------| +| SQLite | mattn/go-sqlite3 | rusqlite | 纯 Rust,无 CGO | +| SQL 通用 | - | sqlx | 编译时查询检查 | + +### 云服务 + +| 组件 | Go 实现 | Rust 选择 | 理由 | +|------|---------|-----------|------| +| AWS SDK | aws-sdk-go-v2 | aws-sdk-rust | 官方支持,性能好 | +| S3 | aws-sdk-go-v2/service/s3 | aws-s3 | 简化的 S3 客户端 | + +### 日志与追踪 + +| 组件 | Go 实现 | Rust 选择 | 理由 | +|------|---------|-----------|------| +| 日志 | logrus | tracing | 结构化日志,与 tokio 集成 | +| 追踪 | - | tracing-opentelemetry | OpenTelemetry 支持 | + +### 开发工具 + +| 组件 | Go 实现 | Rust 选择 | 理由 | +|------|---------|-----------|------| +| CLI 解析 | flag | clap | 功能强大,derive 宏 | +| 测试 | testing | built-in + criterion | 内置测试 + 性能基准 | +| 格式化 | gofmt | rustfmt | 官方工具 | +| Linting | golangci-lint | clippy | 编译器内建 | + +--- + +## 风险评估 + +### 高风险 + +1. **插件系统设计变更** + - 风险: 从 C ABI 改为 Rust trait,外部插件需要重写 + - 缓解: 提供 Go 版本插件作为参考,提供迁移指南 + +2. **性能回归** + - 风险: 初始实现可能性能不如 Go 版本 + - 缓解: 并行开发,进行性能对比和优化 + +3. **API 不兼容** + - 风险: 实现细节差异导致行为不同 + - 缓解: 使用相同的测试套件测试两个版本 + +### 中风险 + +1. **异步编程复杂度** + - 风险: Tokio 异步模型比 goroutine 更复杂 + - 缓解: 团队培训,使用成熟的模式 + +2. **学习曲线** + - 风险: 团队成员不熟悉 Rust + - 缓解: 提供培训,结对编程 + +3. **依赖更新** + - 风险: Rust 生态快速变化,API 可能变动 + - 缓解: 使用稳定版本,定期更新 + +### 低风险 + +1. **测试覆盖** + - 风险: 重写时遗漏测试 + - 缓解: 测试驱动开发,代码评审 + +2. **文档缺失** + - 风险: 代码复杂但文档不完善 + - 缓解: 要求所有公共 API 有文档 + +--- + +## 里程碑 + +### Milestone 0.1: MVP (4 周) ✅ 已完成 + +**目标**: 可以运行的最小可用版本 + +- [x] 基础设施和核心 trait +- [x] MountableFS 实现 +- [x] HTTP 服务器 +- [x] MemFS 插件 +- [x] API 兼容性验证 + +**验收**: 可以运行 Rust 版本服务器,与 Python SDK 交互 + +**完成情况**: +- `core/` 模块完整: filesystem.rs, mountable.rs, plugin.rs, errors.rs, types.rs +- MountableFS 支持动态 mount/unmount,路径路由,并发安全 +- HTTP 服务器 (axum): 完整 REST API (files/directories/mounts/health) +- MemFS 插件: 完整文件系统操作 + 6 个测试 +- 所有 62 个单元测试通过 + +--- + +### Milestone 0.2: 基础插件完整 (2 周) ✅ 已完成 + +**目标**: 完成所有基础插件 + +- [x] KVFS — 内存键值存储,支持嵌套 key,6 个测试 +- [x] QueueFS — 基于控制文件的消息队列 (enqueue/dequeue/peek/size/clear),UUID 消息 ID,并发安全,8 个测试 +- [x] 基础配置系统 — CLI args (clap) + YAML 配置文件加载 + 环境变量 + +**验收**: 可以使用所有基础插件功能 ✅ + +--- + +### Milestone 0.3: 生产就绪 (4 周) 🔄 进行中 + +**目标**: 可以在生产环境使用 + +- [x] SQLFS — SQLite 后端,Mutex 线程安全,LRU 缓存,5MB 文件限制,17 个测试 +- [x] S3FS — AWS SDK for Rust, S3/MinIO/TOS 兼容, dual-layer cache, feature-gated, 9 个测试 +- [ ] 完善的日志和指标 +- [ ] 完整的测试覆盖 +- [ ] 文档完善 + +**当前进展**: +- SQLFS 已完成并通过所有测试 (backend.rs + cache.rs + mod.rs) +- S3FS 已完成: client.rs + cache.rs + mod.rs, feature-gated under `s3` +- SQLFSPlugin 和 S3FSPlugin 已注册到 server/main.rs +- 全部 71 个单元测试通过 (含 s3 feature) +- 下一步: 完善日志/指标、测试覆盖、文档 + +**验收**: 可以在生产环境部署并切换 + +--- + +### Milestone 1.0: 功能完整 (8 周) + +**目标**: 功能与 Go 版本对等 + +- [ ] 提供 python wrapper,用于 OpenViking 内联集成 +- [ ] 支持切换和功能回滚,将默认实现切换为 Rust 版本 + +--- + +## 参考资源 + +### Go 源代码 + +- Server: `third_party/agfs/agfs-server/` +- SDK: `third_party/agfs/agfs-sdk/` +- Shell: `third_party/agfs/agfs-shell/` +- FUSE: `third_party/agfs/agfs-fuse/` + +### Rust 生态 + +- axum: https://docs.rs/axum/latest/axum/ +- tokio: https://tokio.rs/ +- sqlx: https://docs.rs/sqlx/latest/sqlx/ +- aws-sdk-rust: https://github.com/awslabs/aws-sdk-rust + +### 相关项目 + +- Riker: https://github.com/riker-rs/riker (Actor 模型) +- async-std: https://async.rs/ (替代 tokio 的选择) + +--- + +## 更新日志 + +| 日期 | 版本 | 变更内容 | +|------|------|---------| +| 2026-04-03 | v1.0 | 初始计划创建 | +| 2026-04-03 | v1.1 | 标注 Milestone 0.1/0.2 完成,阶段 1-6 完成;SQLFS 修复 18 个编译错误并通过所有测试;开始 Milestone 0.3 | + +--- + +## 贡献 + +本计划是动态文档,随着项目进展持续更新。更新时请: + +1. 在更新日志中记录变更 +2. 更新相关章节 +3. 同步到团队 + +--- + +## 联系方式 + +如有问题或建议,请联系 OpenViking 团队。 diff --git a/crates/ragfs/src/core/errors.rs b/crates/ragfs/src/core/errors.rs new file mode 100644 index 000000000..53c58ea8e --- /dev/null +++ b/crates/ragfs/src/core/errors.rs @@ -0,0 +1,142 @@ +//! Error types for RAGFS +//! +//! This module defines all error types used throughout the RAGFS system. +//! We use `thiserror` for structured error definitions to ensure type safety +//! and clear error messages. + +use std::io; + +/// Result type alias for RAGFS operations +pub type Result = std::result::Result; + +/// Main error type for RAGFS operations +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// File or directory not found + #[error("not found: {0}")] + NotFound(String), + + /// File or directory already exists + #[error("already exists: {0}")] + AlreadyExists(String), + + /// Permission denied + #[error("permission denied: {0}")] + PermissionDenied(String), + + /// Invalid path + #[error("invalid path: {0}")] + InvalidPath(String), + + /// Not a directory + #[error("not a directory: {0}")] + NotADirectory(String), + + /// Is a directory (when file operation expected) + #[error("is a directory: {0}")] + IsADirectory(String), + + /// Directory not empty + #[error("directory not empty: {0}")] + DirectoryNotEmpty(String), + + /// Invalid operation + #[error("invalid operation: {0}")] + InvalidOperation(String), + + /// I/O error + #[error("I/O error: {0}")] + Io(#[from] io::Error), + + /// Plugin error + #[error("plugin error: {0}")] + Plugin(String), + + /// Configuration error + #[error("configuration error: {0}")] + Config(String), + + /// Mount point not found + #[error("mount point not found: {0}")] + MountPointNotFound(String), + + /// Mount point already exists + #[error("mount point already exists: {0}")] + MountPointExists(String), + + /// Serialization error + #[error("serialization error: {0}")] + Serialization(String), + + /// Network error + #[error("network error: {0}")] + Network(String), + + /// Timeout error + #[error("operation timed out: {0}")] + Timeout(String), + + /// Internal error + #[error("internal error: {0}")] + Internal(String), +} + +impl Error { + /// Create a NotFound error + pub fn not_found(path: impl Into) -> Self { + Self::NotFound(path.into()) + } + + /// Create an AlreadyExists error + pub fn already_exists(path: impl Into) -> Self { + Self::AlreadyExists(path.into()) + } + + /// Create a PermissionDenied error + pub fn permission_denied(path: impl Into) -> Self { + Self::PermissionDenied(path.into()) + } + + /// Create an InvalidPath error + pub fn invalid_path(path: impl Into) -> Self { + Self::InvalidPath(path.into()) + } + + /// Create a Plugin error + pub fn plugin(msg: impl Into) -> Self { + Self::Plugin(msg.into()) + } + + /// Create a Config error + pub fn config(msg: impl Into) -> Self { + Self::Config(msg.into()) + } + + /// Create an Internal error + pub fn internal(msg: impl Into) -> Self { + Self::Internal(msg.into()) + } + + /// Create an InvalidOperation error + pub fn invalid_operation(msg: impl Into) -> Self { + Self::InvalidOperation(msg.into()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_creation() { + let err = Error::not_found("/test/path"); + assert!(matches!(err, Error::NotFound(_))); + assert_eq!(err.to_string(), "not found: /test/path"); + } + + #[test] + fn test_error_display() { + let err = Error::permission_denied("/protected"); + assert_eq!(err.to_string(), "permission denied: /protected"); + } +} diff --git a/crates/ragfs/src/core/filesystem.rs b/crates/ragfs/src/core/filesystem.rs new file mode 100644 index 000000000..de79ab329 --- /dev/null +++ b/crates/ragfs/src/core/filesystem.rs @@ -0,0 +1,220 @@ +//! FileSystem trait definition +//! +//! This module defines the core FileSystem trait that all filesystem implementations +//! must implement. This provides a unified interface for file operations across +//! different storage backends. + +use async_trait::async_trait; + +use super::errors::Result; +use super::types::{FileInfo, WriteFlag}; + +/// Core filesystem abstraction trait +/// +/// All filesystem plugins must implement this trait to provide file operations. +/// All methods are async to support I/O-bound operations efficiently. +#[async_trait] +pub trait FileSystem: Send + Sync { + /// Create an empty file at the specified path + /// + /// # Arguments + /// * `path` - The path where the file should be created + /// + /// # Errors + /// * `Error::AlreadyExists` - If a file already exists at the path + /// * `Error::NotFound` - If the parent directory doesn't exist + /// * `Error::PermissionDenied` - If permission is denied + async fn create(&self, path: &str) -> Result<()>; + + /// Create a directory at the specified path + /// + /// # Arguments + /// * `path` - The path where the directory should be created + /// * `mode` - Unix-style permissions (e.g., 0o755) + /// + /// # Errors + /// * `Error::AlreadyExists` - If a directory already exists at the path + /// * `Error::NotFound` - If the parent directory doesn't exist + async fn mkdir(&self, path: &str, mode: u32) -> Result<()>; + + /// Remove a file at the specified path + /// + /// # Arguments + /// * `path` - The path of the file to remove + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist + /// * `Error::IsADirectory` - If the path points to a directory + async fn remove(&self, path: &str) -> Result<()>; + + /// Recursively remove a file or directory + /// + /// # Arguments + /// * `path` - The path to remove + /// + /// # Errors + /// * `Error::NotFound` - If the path doesn't exist + async fn remove_all(&self, path: &str) -> Result<()>; + + /// Read file contents + /// + /// # Arguments + /// * `path` - The path of the file to read + /// * `offset` - Byte offset to start reading from + /// * `size` - Number of bytes to read (0 means read all) + /// + /// # Returns + /// The file contents as a byte vector + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist + /// * `Error::IsADirectory` - If the path points to a directory + async fn read(&self, path: &str, offset: u64, size: u64) -> Result>; + + /// Write data to a file + /// + /// # Arguments + /// * `path` - The path of the file to write + /// * `data` - The data to write + /// * `offset` - Byte offset to start writing at + /// * `flags` - Write flags (create, append, truncate, etc.) + /// + /// # Returns + /// The number of bytes written + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist and Create flag not set + /// * `Error::IsADirectory` - If the path points to a directory + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result; + + /// List directory contents + /// + /// # Arguments + /// * `path` - The path of the directory to list + /// + /// # Returns + /// A vector of FileInfo for each entry in the directory + /// + /// # Errors + /// * `Error::NotFound` - If the directory doesn't exist + /// * `Error::NotADirectory` - If the path is not a directory + async fn read_dir(&self, path: &str) -> Result>; + + /// Get file or directory metadata + /// + /// # Arguments + /// * `path` - The path to get metadata for + /// + /// # Returns + /// FileInfo containing metadata + /// + /// # Errors + /// * `Error::NotFound` - If the path doesn't exist + async fn stat(&self, path: &str) -> Result; + + /// Rename/move a file or directory + /// + /// # Arguments + /// * `old_path` - The current path + /// * `new_path` - The new path + /// + /// # Errors + /// * `Error::NotFound` - If old_path doesn't exist + /// * `Error::AlreadyExists` - If new_path already exists + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()>; + + /// Change file permissions + /// + /// # Arguments + /// * `path` - The path of the file + /// * `mode` - New Unix-style permissions + /// + /// # Errors + /// * `Error::NotFound` - If the path doesn't exist + async fn chmod(&self, path: &str, mode: u32) -> Result<()>; + + /// Truncate a file to a specified size + /// + /// # Arguments + /// * `path` - The path of the file + /// * `size` - The new size in bytes + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist + /// * `Error::IsADirectory` - If the path points to a directory + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + // Default implementation: read, resize, write back + let mut data = self.read(path, 0, 0).await?; + data.resize(size as usize, 0); + self.write(path, &data, 0, WriteFlag::Truncate).await?; + Ok(()) + } + + /// Check if a path exists + /// + /// # Arguments + /// * `path` - The path to check + /// + /// # Returns + /// true if the path exists, false otherwise + async fn exists(&self, path: &str) -> bool { + self.stat(path).await.is_ok() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Mock filesystem for testing + struct MockFS; + + #[async_trait] + impl FileSystem for MockFS { + async fn create(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn remove_all(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> { + Ok(vec![]) + } + + async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Ok(_data.len() as u64) + } + + async fn read_dir(&self, _path: &str) -> Result> { + Ok(vec![]) + } + + async fn stat(&self, _path: &str) -> Result { + Ok(FileInfo::new_file("test".to_string(), 0, 0o644)) + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Ok(()) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + } + + #[tokio::test] + async fn test_filesystem_trait() { + let fs = MockFS; + assert!(fs.exists("/test").await); + } +} diff --git a/crates/ragfs/src/core/mod.rs b/crates/ragfs/src/core/mod.rs new file mode 100644 index 000000000..9b1e1730e --- /dev/null +++ b/crates/ragfs/src/core/mod.rs @@ -0,0 +1,21 @@ +//! Core module for RAGFS +//! +//! This module contains the fundamental abstractions and types used throughout RAGFS: +//! - Error types and Result alias +//! - FileSystem trait for filesystem implementations +//! - ServicePlugin trait for plugin system +//! - MountableFS for routing operations to mounted plugins +//! - Core data types (FileInfo, ConfigParameter, etc.) + +pub mod errors; +pub mod filesystem; +pub mod mountable; +pub mod plugin; +pub mod types; + +// Re-export commonly used types +pub use errors::{Error, Result}; +pub use filesystem::FileSystem; +pub use mountable::MountableFS; +pub use plugin::{HealthStatus, PluginRegistry, ServicePlugin}; +pub use types::{ConfigParameter, ConfigValue, FileInfo, PluginConfig, WriteFlag}; diff --git a/crates/ragfs/src/core/mountable.rs b/crates/ragfs/src/core/mountable.rs new file mode 100644 index 000000000..29bdd25f0 --- /dev/null +++ b/crates/ragfs/src/core/mountable.rs @@ -0,0 +1,625 @@ +//! MountableFS - A filesystem that routes operations to mounted plugins +//! +//! This module implements the core MountableFS which acts as a router, +//! directing filesystem operations to the appropriate mounted plugin based +//! on the path prefix. + +use async_trait::async_trait; +use radix_trie::{Trie, TrieCommon}; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; + +use super::errors::{Error, Result}; +use super::filesystem::FileSystem; +use super::plugin::ServicePlugin; +use super::types::{FileInfo, PluginConfig, WriteFlag}; + +/// Information about a mounted filesystem +#[derive(Clone)] +struct MountInfo { + /// The mount path (e.g., "/memfs") + path: String, + + /// The filesystem instance + fs: Arc, + + /// The plugin that created this filesystem + plugin_name: String, +} + +/// MountableFS routes filesystem operations to mounted plugins +/// +/// This is the core component that allows multiple filesystem implementations +/// to coexist at different mount points. It uses a radix trie for efficient +/// path-based routing. +pub struct MountableFS { + /// Radix trie for fast path lookup + mounts: Arc>>, + + /// Plugin registry for creating new filesystem instances + registry: Arc>>>, +} + +impl MountableFS { + /// Create a new MountableFS + pub fn new() -> Self { + Self { + mounts: Arc::new(RwLock::new(Trie::new())), + registry: Arc::new(RwLock::new(HashMap::new())), + } + } + + /// Register a plugin + /// + /// # Arguments + /// * `plugin` - The plugin to register + pub async fn register_plugin(&self, plugin: P) { + let name = plugin.name().to_string(); + let mut registry = self.registry.write().await; + registry.insert(name, Arc::new(plugin)); + } + + /// Mount a filesystem at the specified path + /// + /// # Arguments + /// * `config` - Plugin configuration including mount path + /// + /// # Errors + /// * `Error::MountPointExists` - If a filesystem is already mounted at this path + /// * `Error::Plugin` - If the plugin is not registered or initialization fails + pub async fn mount(&self, config: PluginConfig) -> Result<()> { + let mount_path = config.mount_path.clone(); + + // Normalize path (ensure it starts with / and doesn't end with /) + let normalized_path = normalize_path(&mount_path); + + // Check if already mounted + { + let mounts = self.mounts.read().await; + if mounts.get(&normalized_path).is_some() { + return Err(Error::MountPointExists(normalized_path)); + } + } + + // Get plugin from registry + let plugin = { + let registry = self.registry.read().await; + registry + .get(&config.name) + .cloned() + .ok_or_else(|| Error::plugin(format!("Plugin '{}' not registered", config.name)))? + }; + + // Validate configuration + plugin.validate(&config).await?; + + // Initialize filesystem + let fs = plugin.initialize(config.clone()).await?; + + // Add to mounts + let mount_info = MountInfo { + path: normalized_path.clone(), + fs: Arc::from(fs), + plugin_name: config.name.clone(), + }; + + let mut mounts = self.mounts.write().await; + mounts.insert(normalized_path, mount_info); + + Ok(()) + } + + /// Unmount a filesystem at the specified path + /// + /// # Arguments + /// * `path` - The mount path to unmount + /// + /// # Errors + /// * `Error::MountPointNotFound` - If no filesystem is mounted at this path + pub async fn unmount(&self, path: &str) -> Result<()> { + let normalized_path = normalize_path(path); + + let mut mounts = self.mounts.write().await; + if mounts.remove(&normalized_path).is_none() { + return Err(Error::MountPointNotFound(normalized_path)); + } + + Ok(()) + } + + /// List all mount points + /// + /// # Returns + /// A vector of tuples containing (mount_path, plugin_name) + pub async fn list_mounts(&self) -> Vec<(String, String)> { + let mounts = self.mounts.read().await; + mounts + .iter() + .map(|(path, info)| (path.clone(), info.plugin_name.clone())) + .collect() + } + + /// Find the mount point for a given path + /// + /// # Arguments + /// * `path` - The path to look up + /// + /// # Returns + /// A tuple of (mount_info, relative_path) where relative_path is the path + /// relative to the mount point + /// + /// # Errors + /// * `Error::MountPointNotFound` - If no mount point matches the path + async fn find_mount(&self, path: &str) -> Result<(MountInfo, String)> { + let normalized_path = normalize_path(path); + let mounts = self.mounts.read().await; + + // Find the longest matching prefix + let mut best_match: Option<(&String, &MountInfo)> = None; + + for (mount_path, mount_info) in mounts.iter() { + if normalized_path.starts_with(mount_path.as_str()) { + if let Some((best_path, _)) = best_match { + if mount_path.len() > best_path.len() { + best_match = Some((mount_path, mount_info)); + } + } else { + best_match = Some((mount_path, mount_info)); + } + } + } + + match best_match { + Some((mount_path, mount_info)) => { + // Calculate relative path + let relative_path = if normalized_path.len() == mount_path.len() { + "/".to_string() + } else { + normalized_path[mount_path.len()..].to_string() + }; + + Ok((mount_info.clone(), relative_path)) + } + None => Err(Error::MountPointNotFound(normalized_path)), + } + } +} + +impl Default for MountableFS { + fn default() -> Self { + Self::new() + } +} + +/// Normalize a path by ensuring it starts with / and doesn't end with / +fn normalize_path(path: &str) -> String { + let mut normalized = path.trim().to_string(); + + // Ensure starts with / + if !normalized.starts_with('/') { + normalized.insert(0, '/'); + } + + // Remove trailing / (except for root) + if normalized.len() > 1 && normalized.ends_with('/') { + normalized.pop(); + } + + normalized +} + +// Implement FileSystem trait for MountableFS by delegating to mounted filesystems +#[async_trait] +impl FileSystem for MountableFS { + async fn create(&self, path: &str) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.create(&rel_path).await + } + + async fn mkdir(&self, path: &str, mode: u32) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.mkdir(&rel_path, mode).await + } + + async fn remove(&self, path: &str) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.remove(&rel_path).await + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.remove_all(&rel_path).await + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.read(&rel_path, offset, size).await + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.write(&rel_path, data, offset, flags).await + } + + async fn read_dir(&self, path: &str) -> Result> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.read_dir(&rel_path).await + } + + async fn stat(&self, path: &str) -> Result { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.stat(&rel_path).await + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let (mount_info_old, rel_old) = self.find_mount(old_path).await?; + let (mount_info_new, rel_new) = self.find_mount(new_path).await?; + + // Ensure both paths are on the same mount + if mount_info_old.path != mount_info_new.path { + return Err(Error::InvalidOperation( + "Cannot rename across different mount points".to_string(), + )); + } + + mount_info_old.fs.rename(&rel_old, &rel_new).await + } + + async fn chmod(&self, path: &str, mode: u32) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.chmod(&rel_path, mode).await + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.truncate(&rel_path, size).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + // Mock filesystem for testing + struct MockFS { + name: String, + } + + impl MockFS { + fn new(name: &str) -> Self { + Self { + name: name.to_string(), + } + } + } + + #[async_trait] + impl FileSystem for MockFS { + async fn create(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn remove_all(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> { + Ok(self.name.as_bytes().to_vec()) + } + + async fn write(&self, _path: &str, data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Ok(data.len() as u64) + } + + async fn read_dir(&self, _path: &str) -> Result> { + Ok(vec![]) + } + + async fn stat(&self, path: &str) -> Result { + Ok(FileInfo::new_file(path.to_string(), 0, 0o644)) + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Ok(()) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + } + + // Mock plugin for testing + struct MockPlugin { + name: String, + } + + impl MockPlugin { + fn new(name: &str) -> Self { + Self { + name: name.to_string(), + } + } + } + + #[async_trait] + impl ServicePlugin for MockPlugin { + fn name(&self) -> &str { + &self.name + } + + fn readme(&self) -> &str { + "Mock plugin for testing" + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(MockFS::new(&self.name))) + } + + fn config_params(&self) -> &[super::super::types::ConfigParameter] { + &[] + } + } + + #[test] + fn test_normalize_path() { + assert_eq!(normalize_path("/test"), "/test"); + assert_eq!(normalize_path("/test/"), "/test"); + assert_eq!(normalize_path("test"), "/test"); + assert_eq!(normalize_path("/"), "/"); + assert_eq!(normalize_path(""), "/"); + } + + #[tokio::test] + async fn test_mountable_fs_creation() { + let mfs = MountableFS::new(); + let mounts = mfs.list_mounts().await; + assert!(mounts.is_empty()); + } + + #[tokio::test] + async fn test_mount_and_unmount() { + let mfs = MountableFS::new(); + + // Register plugin + mfs.register_plugin(MockPlugin::new("mock")).await; + + // Mount filesystem + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + assert!(mfs.mount(config).await.is_ok()); + + // Check mount list + let mounts = mfs.list_mounts().await; + assert_eq!(mounts.len(), 1); + assert_eq!(mounts[0].0, "/mock"); + assert_eq!(mounts[0].1, "mock"); + + // Unmount + assert!(mfs.unmount("/mock").await.is_ok()); + + // Check mount list is empty + let mounts = mfs.list_mounts().await; + assert!(mounts.is_empty()); + } + + #[tokio::test] + async fn test_mount_duplicate_error() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock")).await; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + // First mount should succeed + assert!(mfs.mount(config.clone()).await.is_ok()); + + // Second mount at same path should fail + let result = mfs.mount(config).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), Error::MountPointExists(_))); + } + + #[tokio::test] + async fn test_unmount_not_found() { + let mfs = MountableFS::new(); + + let result = mfs.unmount("/nonexistent").await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), Error::MountPointNotFound(_))); + } + + #[tokio::test] + async fn test_filesystem_operations() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock")).await; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config).await.unwrap(); + + // Test read operation + let data = mfs.read("/mock/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"mock"); + + // Test write operation + let written = mfs.write("/mock/test.txt", b"hello", 0, WriteFlag::Create).await.unwrap(); + assert_eq!(written, 5); + + // Test stat operation + let info = mfs.stat("/mock/test.txt").await.unwrap(); + assert_eq!(info.name, "/test.txt"); + } + + #[tokio::test] + async fn test_path_routing() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock1")).await; + mfs.register_plugin(MockPlugin::new("mock2")).await; + + // Mount two filesystems + let config1 = PluginConfig { + name: "mock1".to_string(), + mount_path: "/fs1".to_string(), + params: HashMap::new(), + }; + + let config2 = PluginConfig { + name: "mock2".to_string(), + mount_path: "/fs2".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config1).await.unwrap(); + mfs.mount(config2).await.unwrap(); + + // Test routing to different filesystems + let data1 = mfs.read("/fs1/file.txt", 0, 0).await.unwrap(); + assert_eq!(data1, b"mock1"); + + let data2 = mfs.read("/fs2/file.txt", 0, 0).await.unwrap(); + assert_eq!(data2, b"mock2"); + } + + #[tokio::test] + async fn test_rename_across_mounts_error() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock1")).await; + mfs.register_plugin(MockPlugin::new("mock2")).await; + + let config1 = PluginConfig { + name: "mock1".to_string(), + mount_path: "/fs1".to_string(), + params: HashMap::new(), + }; + + let config2 = PluginConfig { + name: "mock2".to_string(), + mount_path: "/fs2".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config1).await.unwrap(); + mfs.mount(config2).await.unwrap(); + + // Try to rename across different mounts - should fail + let result = mfs.rename("/fs1/file.txt", "/fs2/file.txt").await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), Error::InvalidOperation(_))); + } + + #[tokio::test] + async fn test_concurrent_operations() { + use tokio::task; + + let mfs = Arc::new(MountableFS::new()); + mfs.register_plugin(MockPlugin::new("mock")).await; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config).await.unwrap(); + + // Spawn multiple concurrent read operations + let mut handles = vec![]; + for i in 0..10 { + let mfs_clone = Arc::clone(&mfs); + let handle = task::spawn(async move { + let path = format!("/mock/file{}.txt", i); + mfs_clone.read(&path, 0, 0).await + }); + handles.push(handle); + } + + // Wait for all operations to complete + for handle in handles { + let result = handle.await.unwrap(); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), b"mock"); + } + } + + #[tokio::test] + async fn test_concurrent_mount_unmount() { + use tokio::task; + + let mfs = Arc::new(MountableFS::new()); + + // Register multiple plugins + for i in 0..5 { + mfs.register_plugin(MockPlugin::new(&format!("mock{}", i))).await; + } + + // Spawn concurrent mount operations + let mut handles = vec![]; + for i in 0..5 { + let mfs_clone = Arc::clone(&mfs); + let handle = task::spawn(async move { + let config = PluginConfig { + name: format!("mock{}", i), + mount_path: format!("/mock{}", i), + params: HashMap::new(), + }; + mfs_clone.mount(config).await + }); + handles.push(handle); + } + + // Wait for all mounts to complete + for handle in handles { + let result = handle.await.unwrap(); + assert!(result.is_ok()); + } + + // Verify all mounts + let mounts = mfs.list_mounts().await; + assert_eq!(mounts.len(), 5); + + // Concurrent unmount + let mut handles = vec![]; + for i in 0..5 { + let mfs_clone = Arc::clone(&mfs); + let handle = task::spawn(async move { + mfs_clone.unmount(&format!("/mock{}", i)).await + }); + handles.push(handle); + } + + // Wait for all unmounts + for handle in handles { + let result = handle.await.unwrap(); + assert!(result.is_ok()); + } + + // Verify all unmounted + let mounts = mfs.list_mounts().await; + assert!(mounts.is_empty()); + } +} diff --git a/crates/ragfs/src/core/plugin.rs b/crates/ragfs/src/core/plugin.rs new file mode 100644 index 000000000..2bbcaf1cc --- /dev/null +++ b/crates/ragfs/src/core/plugin.rs @@ -0,0 +1,276 @@ +//! Plugin system for RAGFS +//! +//! This module defines the ServicePlugin trait that all plugins must implement. +//! Plugins provide filesystem implementations that can be dynamically mounted +//! at different paths. + +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; + +use super::errors::Result; +use super::filesystem::FileSystem; +use super::types::{ConfigParameter, PluginConfig}; + +/// Service plugin trait +/// +/// All filesystem plugins must implement this trait to be registered +/// and used within RAGFS. The plugin is responsible for validating +/// configuration and creating filesystem instances. +#[async_trait] +pub trait ServicePlugin: Send + Sync { + /// Get the unique name of this plugin + /// + /// This name is used to identify the plugin in configuration + /// and mount operations. + fn name(&self) -> &str; + + /// Get the plugin version + fn version(&self) -> &str { + "0.1.0" + } + + /// Get a brief description of the plugin + fn description(&self) -> &str { + "" + } + + /// Get the README documentation for this plugin + /// + /// This should include usage examples, configuration parameters, + /// and any special considerations. + fn readme(&self) -> &str; + + /// Validate plugin configuration + /// + /// This is called before initialize() to ensure the configuration + /// is valid. Should check for required parameters, valid values, etc. + /// + /// # Arguments + /// * `config` - The configuration to validate + /// + /// # Errors + /// Returns an error if the configuration is invalid + async fn validate(&self, config: &PluginConfig) -> Result<()>; + + /// Initialize the plugin and return a filesystem instance + /// + /// This is called after validate() succeeds. The plugin should + /// create and return a new filesystem instance configured according + /// to the provided configuration. + /// + /// # Arguments + /// * `config` - The validated configuration + /// + /// # Returns + /// A boxed FileSystem implementation + /// + /// # Errors + /// Returns an error if initialization fails + async fn initialize(&self, config: PluginConfig) -> Result>; + + /// Shutdown the plugin + /// + /// This is called when the plugin is being unmounted or the server + /// is shutting down. The plugin should clean up any resources. + async fn shutdown(&self) -> Result<()> { + Ok(()) + } + + /// Get the configuration parameters supported by this plugin + /// + /// Returns a list of parameter definitions that describe what + /// configuration this plugin accepts. + fn config_params(&self) -> &[ConfigParameter]; + + /// Health check for the plugin + /// + /// Returns whether the plugin is healthy and operational. + async fn health_check(&self) -> Result { + Ok(HealthStatus::Healthy) + } +} + +/// Health status of a plugin +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum HealthStatus { + /// Plugin is healthy and operational + Healthy, + + /// Plugin is degraded but still functional + Degraded(String), + + /// Plugin is unhealthy and not functional + Unhealthy(String), +} + +/// Plugin registry +/// +/// Manages all registered plugins and provides lookup functionality. +pub struct PluginRegistry { + plugins: HashMap>, +} + +impl PluginRegistry { + /// Create a new empty plugin registry + pub fn new() -> Self { + Self { + plugins: HashMap::new(), + } + } + + /// Register a plugin + /// + /// # Arguments + /// * `plugin` - The plugin to register + /// + /// # Panics + /// Panics if a plugin with the same name is already registered + pub fn register(&mut self, plugin: P) { + let name = plugin.name().to_string(); + if self.plugins.contains_key(&name) { + panic!("Plugin '{}' is already registered", name); + } + self.plugins.insert(name, Arc::new(plugin)); + } + + /// Get a plugin by name + /// + /// # Arguments + /// * `name` - The name of the plugin to retrieve + /// + /// # Returns + /// An Arc to the plugin, or None if not found + pub fn get(&self, name: &str) -> Option> { + self.plugins.get(name).cloned() + } + + /// List all registered plugin names + pub fn list(&self) -> Vec<&str> { + self.plugins.keys().map(|s| s.as_str()).collect() + } + + /// Get the number of registered plugins + pub fn len(&self) -> usize { + self.plugins.len() + } + + /// Check if the registry is empty + pub fn is_empty(&self) -> bool { + self.plugins.is_empty() + } +} + +impl Default for PluginRegistry { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Mock plugin for testing + struct MockPlugin; + + #[async_trait] + impl ServicePlugin for MockPlugin { + fn name(&self) -> &str { + "mock" + } + + fn readme(&self) -> &str { + "Mock plugin for testing" + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + use crate::core::filesystem::FileSystem; + use crate::core::types::{FileInfo, WriteFlag}; + + struct MockFS; + + #[async_trait] + impl FileSystem for MockFS { + async fn create(&self, _path: &str) -> Result<()> { + Ok(()) + } + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + async fn remove(&self, _path: &str) -> Result<()> { + Ok(()) + } + async fn remove_all(&self, _path: &str) -> Result<()> { + Ok(()) + } + async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> { + Ok(vec![]) + } + async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Ok(_data.len() as u64) + } + async fn read_dir(&self, _path: &str) -> Result> { + Ok(vec![]) + } + async fn stat(&self, _path: &str) -> Result { + Ok(FileInfo::new_file("test".to_string(), 0, 0o644)) + } + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Ok(()) + } + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + } + + Ok(Box::new(MockFS)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &[] + } + } + + #[test] + fn test_plugin_registry() { + let mut registry = PluginRegistry::new(); + assert!(registry.is_empty()); + + registry.register(MockPlugin); + assert_eq!(registry.len(), 1); + assert!(registry.get("mock").is_some()); + assert!(registry.get("nonexistent").is_none()); + + let names = registry.list(); + assert_eq!(names, vec!["mock"]); + } + + #[tokio::test] + async fn test_plugin_lifecycle() { + let plugin = MockPlugin; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + assert!(plugin.validate(&config).await.is_ok()); + assert!(plugin.initialize(config).await.is_ok()); + assert!(plugin.shutdown().await.is_ok()); + } + + #[test] + fn test_health_status() { + let healthy = HealthStatus::Healthy; + assert_eq!(healthy, HealthStatus::Healthy); + + let degraded = HealthStatus::Degraded("slow".to_string()); + assert!(matches!(degraded, HealthStatus::Degraded(_))); + } +} diff --git a/crates/ragfs/src/core/types.rs b/crates/ragfs/src/core/types.rs new file mode 100644 index 000000000..fa81de729 --- /dev/null +++ b/crates/ragfs/src/core/types.rs @@ -0,0 +1,246 @@ +//! Core types for RAGFS +//! +//! This module defines the fundamental data structures used throughout RAGFS, +//! including file metadata, write flags, and configuration types. + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::time::SystemTime; + +/// File metadata information +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileInfo { + /// File name (without path) + pub name: String, + + /// File size in bytes + pub size: u64, + + /// File mode/permissions (Unix-style) + pub mode: u32, + + /// Last modification time + #[serde(with = "systemtime_serde")] + pub mod_time: SystemTime, + + /// Whether this is a directory + pub is_dir: bool, +} + +impl FileInfo { + /// Create a new FileInfo for a file + pub fn new_file(name: String, size: u64, mode: u32) -> Self { + Self { + name, + size, + mode, + mod_time: SystemTime::now(), + is_dir: false, + } + } + + /// Create a new FileInfo for a directory + pub fn new_dir(name: String, mode: u32) -> Self { + Self { + name, + size: 0, + mode, + mod_time: SystemTime::now(), + is_dir: true, + } + } +} + +/// Write operation flags +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WriteFlag { + /// Create new file or truncate existing + Create, + + /// Append to existing file + Append, + + /// Truncate file before writing + Truncate, + + /// Write at specific offset (default) + None, +} + +impl Default for WriteFlag { + fn default() -> Self { + Self::None + } +} + +/// Plugin configuration parameter metadata +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConfigParameter { + /// Parameter name + pub name: String, + + /// Parameter type: "string", "int", "bool", "string_list" + #[serde(rename = "type")] + pub param_type: String, + + /// Whether this parameter is required + pub required: bool, + + /// Default value (if not required) + #[serde(skip_serializing_if = "Option::is_none")] + pub default: Option, + + /// Human-readable description + pub description: String, +} + +impl ConfigParameter { + /// Create a required string parameter + pub fn required_string(name: impl Into, description: impl Into) -> Self { + Self { + name: name.into(), + param_type: "string".to_string(), + required: true, + default: None, + description: description.into(), + } + } + + /// Create an optional parameter with default + pub fn optional( + name: impl Into, + param_type: impl Into, + default: impl Into, + description: impl Into, + ) -> Self { + Self { + name: name.into(), + param_type: param_type.into(), + required: false, + default: Some(default.into()), + description: description.into(), + } + } +} + +/// Plugin configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PluginConfig { + /// Plugin name + pub name: String, + + /// Mount path + pub mount_path: String, + + /// Configuration parameters + pub params: HashMap, +} + +/// Configuration value types +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(untagged)] +pub enum ConfigValue { + /// String value + String(String), + + /// Integer value + Int(i64), + + /// Boolean value + Bool(bool), + + /// List of strings + StringList(Vec), +} + +impl ConfigValue { + /// Try to get as string + pub fn as_string(&self) -> Option<&str> { + match self { + ConfigValue::String(s) => Some(s), + _ => None, + } + } + + /// Try to get as integer + pub fn as_int(&self) -> Option { + match self { + ConfigValue::Int(i) => Some(*i), + _ => None, + } + } + + /// Try to get as boolean + pub fn as_bool(&self) -> Option { + match self { + ConfigValue::Bool(b) => Some(*b), + _ => None, + } + } + + /// Try to get as string list + pub fn as_string_list(&self) -> Option<&[String]> { + match self { + ConfigValue::StringList(list) => Some(list), + _ => None, + } + } +} + +/// Custom serde module for SystemTime +mod systemtime_serde { + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + use std::time::{SystemTime, UNIX_EPOCH}; + + pub fn serialize(time: &SystemTime, serializer: S) -> Result + where + S: Serializer, + { + let duration = time.duration_since(UNIX_EPOCH).map_err(serde::ser::Error::custom)?; + duration.as_secs().serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let secs = u64::deserialize(deserializer)?; + Ok(UNIX_EPOCH + std::time::Duration::from_secs(secs)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_file_info_creation() { + let file = FileInfo::new_file("test.txt".to_string(), 1024, 0o644); + assert_eq!(file.name, "test.txt"); + assert_eq!(file.size, 1024); + assert!(!file.is_dir); + + let dir = FileInfo::new_dir("testdir".to_string(), 0o755); + assert_eq!(dir.name, "testdir"); + assert!(dir.is_dir); + } + + #[test] + fn test_config_value() { + let val = ConfigValue::String("test".to_string()); + assert_eq!(val.as_string(), Some("test")); + assert_eq!(val.as_int(), None); + + let val = ConfigValue::Int(42); + assert_eq!(val.as_int(), Some(42)); + assert_eq!(val.as_string(), None); + } + + #[test] + fn test_config_parameter() { + let param = ConfigParameter::required_string("host", "Database host"); + assert_eq!(param.name, "host"); + assert!(param.required); + assert_eq!(param.param_type, "string"); + } +} diff --git a/crates/ragfs/src/lib.rs b/crates/ragfs/src/lib.rs new file mode 100644 index 000000000..fa3464ad9 --- /dev/null +++ b/crates/ragfs/src/lib.rs @@ -0,0 +1,60 @@ +//! RAGFS - Rust implementation of AGFS (Aggregated File System) +//! +//! RAGFS provides a unified filesystem abstraction that allows multiple +//! filesystem implementations (plugins) to be mounted at different paths. +//! It exposes these filesystems through a REST API, making them accessible +//! to AI agents and other clients. +//! +//! # Architecture +//! +//! - **Core**: Fundamental traits and types (FileSystem, ServicePlugin, etc.) +//! - **Plugins**: Filesystem implementations (MemFS, KVFS, QueueFS, etc.) +//! - **Server**: HTTP API server for remote access +//! - **Shell**: Interactive command-line interface +//! +//! # Example +//! +//! ```rust,no_run +//! use ragfs::core::{PluginRegistry, FileSystem}; +//! +//! #[tokio::main] +//! async fn main() -> ragfs::core::Result<()> { +//! // Create a plugin registry +//! let mut registry = PluginRegistry::new(); +//! +//! // Register plugins +//! // registry.register(MemFSPlugin); +//! +//! Ok(()) +//! } +//! ``` + +#![warn(missing_docs)] +#![warn(clippy::all)] + +pub mod core; +pub mod plugins; +pub mod server; + +// Re-export core types for convenience +pub use core::{ + ConfigParameter, ConfigValue, Error, FileInfo, FileSystem, HealthStatus, MountableFS, + PluginConfig, PluginRegistry, Result, ServicePlugin, WriteFlag, +}; + +/// Version of RAGFS +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// Name of the package +pub const NAME: &str = env!("CARGO_PKG_NAME"); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_version() { + assert!(!VERSION.is_empty()); + assert_eq!(NAME, "ragfs"); + } +} diff --git a/crates/ragfs/src/plugins/kvfs/mod.rs b/crates/ragfs/src/plugins/kvfs/mod.rs new file mode 100644 index 000000000..6d981ee3f --- /dev/null +++ b/crates/ragfs/src/plugins/kvfs/mod.rs @@ -0,0 +1,521 @@ +//! KVFS - Key-Value File System +//! +//! A file system that treats files as key-value pairs. Each file's path +//! becomes a key, and the file content becomes the value. This is useful +//! for simple key-value storage scenarios. + +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::RwLock; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// Key-value entry +#[derive(Clone)] +struct KVEntry { + /// Value (file content) + value: Vec, + /// Last modification time + mod_time: SystemTime, +} + +impl KVEntry { + fn new(value: Vec) -> Self { + Self { + value, + mod_time: SystemTime::now(), + } + } + + fn touch(&mut self) { + self.mod_time = SystemTime::now(); + } +} + +/// Key-Value file system implementation +pub struct KVFileSystem { + /// Storage for key-value pairs + store: Arc>>, +} + +impl KVFileSystem { + /// Create a new KVFileSystem + pub fn new() -> Self { + Self { + store: Arc::new(RwLock::new(HashMap::new())), + } + } + + /// Normalize path to key (remove leading /) + fn path_to_key(path: &str) -> String { + let normalized = if path.starts_with('/') { + &path[1..] + } else { + path + }; + + if normalized.is_empty() { + "/".to_string() + } else { + normalized.to_string() + } + } + + /// Get parent directory path + fn parent_key(key: &str) -> Option { + if key == "/" || !key.contains('/') { + return Some("/".to_string()); + } + + let parts: Vec<&str> = key.split('/').collect(); + if parts.len() <= 1 { + return Some("/".to_string()); + } + + Some(parts[..parts.len() - 1].join("/")) + } + + /// List all keys with a given prefix + fn list_keys_with_prefix(&self, store: &HashMap, prefix: &str) -> Vec { + let search_prefix = if prefix == "/" { + "" + } else { + prefix + }; + + store + .keys() + .filter(|k| { + if search_prefix.is_empty() { + // Root: only keys without '/' + !k.contains('/') + } else { + // Keys that start with prefix/ and have no further / + k.starts_with(&format!("{}/", search_prefix)) + && !k[search_prefix.len() + 1..].contains('/') + } + }) + .cloned() + .collect() + } +} + +impl Default for KVFileSystem { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl FileSystem for KVFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + if store.contains_key(&key) { + return Err(Error::already_exists(path)); + } + + store.insert(key, KVEntry::new(Vec::new())); + Ok(()) + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + // KVFS doesn't have real directories, but we accept mkdir for compatibility + // We just create an empty entry to mark the "directory" + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + if store.contains_key(&key) { + return Err(Error::already_exists(path)); + } + + // Mark as directory by using empty value + store.insert(key, KVEntry::new(Vec::new())); + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + if store.remove(&key).is_none() { + return Err(Error::not_found(path)); + } + + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + // Remove the key itself + if !store.contains_key(&key) { + return Err(Error::not_found(path)); + } + + // Remove all keys with this prefix + let prefix = if key == "/" { "" } else { &key }; + let to_remove: Vec = store + .keys() + .filter(|k| *k == &key || k.starts_with(&format!("{}/", prefix))) + .cloned() + .collect(); + + for k in to_remove { + store.remove(&k); + } + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let key = Self::path_to_key(path); + let store = self.store.read().await; + + match store.get(&key) { + Some(entry) => { + let offset = offset as usize; + let data_len = entry.value.len(); + + if offset >= data_len { + return Ok(Vec::new()); + } + + let end = if size == 0 { + data_len + } else { + std::cmp::min(offset + size as usize, data_len) + }; + + Ok(entry.value[offset..end].to_vec()) + } + None => Err(Error::not_found(path)), + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + match store.get_mut(&key) { + Some(entry) => { + entry.touch(); + + match flags { + WriteFlag::Create | WriteFlag::Truncate => { + entry.value = data.to_vec(); + } + WriteFlag::Append => { + entry.value.extend_from_slice(data); + } + WriteFlag::None => { + let offset = offset as usize; + let end = offset + data.len(); + + if end > entry.value.len() { + entry.value.resize(end, 0); + } + + entry.value[offset..end].copy_from_slice(data); + } + } + + Ok(data.len() as u64) + } + None => { + if matches!(flags, WriteFlag::Create) { + store.insert(key, KVEntry::new(data.to_vec())); + Ok(data.len() as u64) + } else { + Err(Error::not_found(path)) + } + } + } + } + + async fn read_dir(&self, path: &str) -> Result> { + let key = Self::path_to_key(path); + let store = self.store.read().await; + + // Check if the directory exists (or root) + if key != "/" && !store.contains_key(&key) { + return Err(Error::not_found(path)); + } + + let keys = self.list_keys_with_prefix(&store, &key); + let mut result = Vec::new(); + + for k in keys { + if let Some(entry) = store.get(&k) { + let name = k.split('/').last().unwrap_or(&k).to_string(); + result.push(FileInfo { + name, + size: entry.value.len() as u64, + mode: 0o644, + mod_time: entry.mod_time, + is_dir: false, + }); + } + } + + Ok(result) + } + + async fn stat(&self, path: &str) -> Result { + let key = Self::path_to_key(path); + let store = self.store.read().await; + + match store.get(&key) { + Some(entry) => { + let name = key.split('/').last().unwrap_or(&key).to_string(); + Ok(FileInfo { + name, + size: entry.value.len() as u64, + mode: 0o644, + mod_time: entry.mod_time, + is_dir: false, + }) + } + None => Err(Error::not_found(path)), + } + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_key = Self::path_to_key(old_path); + let new_key = Self::path_to_key(new_path); + let mut store = self.store.write().await; + + let entry = store + .get(&old_key) + .ok_or_else(|| Error::not_found(old_path))? + .clone(); + + if store.contains_key(&new_key) { + return Err(Error::already_exists(new_path)); + } + + store.remove(&old_key); + store.insert(new_key, entry); + + Ok(()) + } + + async fn chmod(&self, path: &str, _mode: u32) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + match store.get_mut(&key) { + Some(entry) => { + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(path)), + } + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + match store.get_mut(&key) { + Some(entry) => { + entry.value.resize(size as usize, 0); + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(path)), + } + } +} + +/// KVFS plugin +pub struct KVFSPlugin; + +#[async_trait] +impl ServicePlugin for KVFSPlugin { + fn name(&self) -> &str { + "kvfs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "Key-value file system for simple storage" + } + + fn readme(&self) -> &str { + r#"# KVFS - Key-Value File System + +A file system that treats files as key-value pairs. Each file's path +becomes a key, and the file content becomes the value. + +## Features + +- Simple key-value storage +- File paths map to keys +- Fast lookups +- In-memory storage (no persistence) + +## Usage + +Mount the filesystem: +```bash +curl -X POST http://localhost:8080/api/v1/mount \ + -H "Content-Type: application/json" \ + -d '{"plugin": "kvfs", "path": "/kvfs"}' +``` + +Store a value: +```bash +echo "value123" | curl -X PUT \ + "http://localhost:8080/api/v1/files?path=/kvfs/mykey" \ + --data-binary @- +``` + +Retrieve a value: +```bash +curl "http://localhost:8080/api/v1/files?path=/kvfs/mykey" +``` + +List all keys: +```bash +curl "http://localhost:8080/api/v1/directories?path=/kvfs" +``` + +## Use Cases + +- Configuration storage +- Cache storage +- Session data +- Temporary key-value storage + +## Configuration + +KVFS has no configuration parameters. +"# + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(KVFileSystem::new())) + } + + fn config_params(&self) -> &[ConfigParameter] { + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_kvfs_basic_operations() { + let fs = KVFileSystem::new(); + + // Create and write + fs.write("/key1", b"value1", 0, WriteFlag::Create) + .await + .unwrap(); + + // Read + let data = fs.read("/key1", 0, 0).await.unwrap(); + assert_eq!(data, b"value1"); + + // Update + fs.write("/key1", b"value2", 0, WriteFlag::Truncate) + .await + .unwrap(); + + let data = fs.read("/key1", 0, 0).await.unwrap(); + assert_eq!(data, b"value2"); + } + + #[tokio::test] + async fn test_kvfs_list_keys() { + let fs = KVFileSystem::new(); + + fs.write("/key1", b"val1", 0, WriteFlag::Create) + .await + .unwrap(); + fs.write("/key2", b"val2", 0, WriteFlag::Create) + .await + .unwrap(); + fs.write("/key3", b"val3", 0, WriteFlag::Create) + .await + .unwrap(); + + let entries = fs.read_dir("/").await.unwrap(); + assert_eq!(entries.len(), 3); + } + + #[tokio::test] + async fn test_kvfs_nested_keys() { + let fs = KVFileSystem::new(); + + // Create parent "directory" first + fs.mkdir("/user", 0o755).await.unwrap(); + + fs.write("/user/123", b"alice", 0, WriteFlag::Create) + .await + .unwrap(); + fs.write("/user/456", b"bob", 0, WriteFlag::Create) + .await + .unwrap(); + + let entries = fs.read_dir("/user").await.unwrap(); + assert_eq!(entries.len(), 2); + } + + #[tokio::test] + async fn test_kvfs_delete() { + let fs = KVFileSystem::new(); + + fs.write("/key1", b"value1", 0, WriteFlag::Create) + .await + .unwrap(); + fs.remove("/key1").await.unwrap(); + + assert!(fs.read("/key1", 0, 0).await.is_err()); + } + + #[tokio::test] + async fn test_kvfs_rename() { + let fs = KVFileSystem::new(); + + fs.write("/oldkey", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + fs.rename("/oldkey", "/newkey").await.unwrap(); + + assert!(fs.read("/oldkey", 0, 0).await.is_err()); + let data = fs.read("/newkey", 0, 0).await.unwrap(); + assert_eq!(data, b"data"); + } + + #[tokio::test] + async fn test_kvfs_plugin() { + let plugin = KVFSPlugin; + assert_eq!(plugin.name(), "kvfs"); + + let config = PluginConfig { + name: "kvfs".to_string(), + mount_path: "/kvfs".to_string(), + params: HashMap::new(), + }; + + assert!(plugin.validate(&config).await.is_ok()); + assert!(plugin.initialize(config).await.is_ok()); + } +} diff --git a/crates/ragfs/src/plugins/memfs/mod.rs b/crates/ragfs/src/plugins/memfs/mod.rs new file mode 100644 index 000000000..301461269 --- /dev/null +++ b/crates/ragfs/src/plugins/memfs/mod.rs @@ -0,0 +1,620 @@ +//! MemFS - In-memory File System +//! +//! A simple file system that stores all data in memory. All data is lost +//! when the server restarts. This is useful for temporary storage and testing. + +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::RwLock; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// File entry in memory +#[derive(Clone)] +struct FileEntry { + /// File data + data: Vec, + /// File mode/permissions + mode: u32, + /// Last modification time + mod_time: SystemTime, + /// Whether this is a directory + is_dir: bool, +} + +impl FileEntry { + /// Create a new file entry + fn new_file(mode: u32) -> Self { + Self { + data: Vec::new(), + mode, + mod_time: SystemTime::now(), + is_dir: false, + } + } + + /// Create a new directory entry + fn new_dir(mode: u32) -> Self { + Self { + data: Vec::new(), + mode, + mod_time: SystemTime::now(), + is_dir: true, + } + } + + /// Update modification time + fn touch(&mut self) { + self.mod_time = SystemTime::now(); + } +} + +/// In-memory file system implementation +pub struct MemFileSystem { + /// Storage for files and directories + entries: Arc>>, +} + +impl MemFileSystem { + /// Create a new MemFileSystem + pub fn new() -> Self { + let mut entries = HashMap::new(); + + // Create root directory + entries.insert( + "/".to_string(), + FileEntry::new_dir(0o755), + ); + + Self { + entries: Arc::new(RwLock::new(entries)), + } + } + + /// Normalize path (ensure it starts with /) + fn normalize_path(path: &str) -> String { + if path.is_empty() || path == "/" { + return "/".to_string(); + } + + let mut normalized = path.to_string(); + if !normalized.starts_with('/') { + normalized.insert(0, '/'); + } + + // Remove trailing slash (except for root) + if normalized.len() > 1 && normalized.ends_with('/') { + normalized.pop(); + } + + normalized + } + + /// Get parent directory path + fn parent_path(path: &str) -> Option { + if path == "/" { + return None; + } + + let normalized = Self::normalize_path(path); + let parts: Vec<&str> = normalized.split('/').collect(); + + if parts.len() <= 2 { + return Some("/".to_string()); + } + + Some(parts[..parts.len() - 1].join("/")) + } + + /// Get file name from path + fn file_name(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + let normalized = Self::normalize_path(path); + normalized + .split('/') + .last() + .unwrap_or("") + .to_string() + } + + /// List entries in a directory + fn list_entries(&self, entries: &HashMap, dir_path: &str) -> Vec { + let normalized_dir = Self::normalize_path(dir_path); + let prefix = if normalized_dir == "/" { + "/".to_string() + } else { + format!("{}/", normalized_dir) + }; + + entries + .keys() + .filter(|path| { + if *path == &normalized_dir { + return false; + } + + if !path.starts_with(&prefix) { + return false; + } + + // Only direct children (no nested paths) + let relative = &path[prefix.len()..]; + !relative.contains('/') + }) + .cloned() + .collect() + } +} + +impl Default for MemFileSystem { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl FileSystem for MemFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if already exists + if entries.contains_key(&normalized) { + return Err(Error::already_exists(&normalized)); + } + + // Check parent directory exists + if let Some(parent) = Self::parent_path(&normalized) { + match entries.get(&parent) { + Some(entry) if entry.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + // Create file + entries.insert(normalized, FileEntry::new_file(0o644)); + Ok(()) + } + + async fn mkdir(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if already exists + if entries.contains_key(&normalized) { + return Err(Error::already_exists(&normalized)); + } + + // Check parent directory exists + if let Some(parent) = Self::parent_path(&normalized) { + match entries.get(&parent) { + Some(entry) if entry.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + // Create directory + entries.insert(normalized, FileEntry::new_dir(mode)); + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if exists + match entries.get(&normalized) { + Some(entry) if entry.is_dir => { + return Err(Error::IsADirectory(normalized)); + } + Some(_) => {} + None => return Err(Error::not_found(&normalized)), + } + + // Remove file + entries.remove(&normalized); + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if exists + if !entries.contains_key(&normalized) { + return Err(Error::not_found(&normalized)); + } + + // Remove entry and all children + let to_remove: Vec = entries + .keys() + .filter(|p| *p == &normalized || p.starts_with(&format!("{}/", normalized))) + .cloned() + .collect(); + + for path in to_remove { + entries.remove(&path); + } + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let normalized = Self::normalize_path(path); + let entries = self.entries.read().await; + + match entries.get(&normalized) { + Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)), + Some(entry) => { + let offset = offset as usize; + let data_len = entry.data.len(); + + if offset >= data_len { + return Ok(Vec::new()); + } + + let end = if size == 0 { + data_len + } else { + std::cmp::min(offset + size as usize, data_len) + }; + + Ok(entry.data[offset..end].to_vec()) + } + None => Err(Error::not_found(&normalized)), + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + match entries.get_mut(&normalized) { + Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)), + Some(entry) => { + entry.touch(); + + match flags { + WriteFlag::Create | WriteFlag::Truncate => { + entry.data = data.to_vec(); + } + WriteFlag::Append => { + entry.data.extend_from_slice(data); + } + WriteFlag::None => { + let offset = offset as usize; + let end = offset + data.len(); + + // Extend if necessary + if end > entry.data.len() { + entry.data.resize(end, 0); + } + + entry.data[offset..end].copy_from_slice(data); + } + } + + Ok(data.len() as u64) + } + None => { + // Create file if Create flag is set + if matches!(flags, WriteFlag::Create) { + // Check parent exists + if let Some(parent) = Self::parent_path(&normalized) { + match entries.get(&parent) { + Some(entry) if entry.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + let mut entry = FileEntry::new_file(0o644); + entry.data = data.to_vec(); + entries.insert(normalized, entry); + Ok(data.len() as u64) + } else { + Err(Error::not_found(&normalized)) + } + } + } + } + + async fn read_dir(&self, path: &str) -> Result> { + let normalized = Self::normalize_path(path); + let entries = self.entries.read().await; + + // Check if directory exists + match entries.get(&normalized) { + Some(entry) if !entry.is_dir => return Err(Error::NotADirectory(normalized)), + Some(_) => {} + None => return Err(Error::not_found(&normalized)), + } + + // List entries + let children = self.list_entries(&entries, &normalized); + let mut result = Vec::new(); + + for child_path in children { + if let Some(entry) = entries.get(&child_path) { + let name = Self::file_name(&child_path); + result.push(FileInfo { + name, + size: entry.data.len() as u64, + mode: entry.mode, + mod_time: entry.mod_time, + is_dir: entry.is_dir, + }); + } + } + + Ok(result) + } + + async fn stat(&self, path: &str) -> Result { + let normalized = Self::normalize_path(path); + let entries = self.entries.read().await; + + match entries.get(&normalized) { + Some(entry) => Ok(FileInfo { + name: Self::file_name(&normalized), + size: entry.data.len() as u64, + mode: entry.mode, + mod_time: entry.mod_time, + is_dir: entry.is_dir, + }), + None => Err(Error::not_found(&normalized)), + } + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_normalized = Self::normalize_path(old_path); + let new_normalized = Self::normalize_path(new_path); + let mut entries = self.entries.write().await; + + // Check old path exists + let entry = entries + .get(&old_normalized) + .ok_or_else(|| Error::not_found(&old_normalized))? + .clone(); + + // Check new path doesn't exist + if entries.contains_key(&new_normalized) { + return Err(Error::already_exists(&new_normalized)); + } + + // Check new parent exists + if let Some(parent) = Self::parent_path(&new_normalized) { + match entries.get(&parent) { + Some(e) if e.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + // Move entry + entries.remove(&old_normalized); + entries.insert(new_normalized, entry); + + Ok(()) + } + + async fn chmod(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + match entries.get_mut(&normalized) { + Some(entry) => { + entry.mode = mode; + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(&normalized)), + } + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + match entries.get_mut(&normalized) { + Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)), + Some(entry) => { + entry.data.resize(size as usize, 0); + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(&normalized)), + } + } +} + +/// MemFS plugin +pub struct MemFSPlugin; + +#[async_trait] +impl ServicePlugin for MemFSPlugin { + fn name(&self) -> &str { + "memfs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "In-memory file system for temporary storage" + } + + fn readme(&self) -> &str { + r#"# MemFS - In-memory File System + +A simple file system that stores all data in memory. All data is lost +when the server restarts. + +## Features + +- Fast in-memory storage +- Full POSIX-like file operations +- Directory support +- No persistence (data lost on restart) + +## Usage + +Mount the filesystem: +```bash +curl -X POST http://localhost:8080/api/v1/mount \ + -H "Content-Type: application/json" \ + -d '{"plugin": "memfs", "path": "/memfs"}' +``` + +Create and write to a file: +```bash +echo "hello world" | curl -X PUT \ + "http://localhost:8080/api/v1/files?path=/memfs/test.txt" \ + --data-binary @- +``` + +Read the file: +```bash +curl "http://localhost:8080/api/v1/files?path=/memfs/test.txt" +``` + +## Configuration + +MemFS has no configuration parameters. +"# + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + // MemFS has no required configuration + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(MemFileSystem::new())) + } + + fn config_params(&self) -> &[ConfigParameter] { + // No configuration parameters + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_create_and_read_file() { + let fs = MemFileSystem::new(); + + // Create file + fs.create("/test.txt").await.unwrap(); + + // Write data + let data = b"hello world"; + fs.write("/test.txt", data, 0, WriteFlag::None) + .await + .unwrap(); + + // Read data + let read_data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(read_data, data); + } + + #[tokio::test] + async fn test_mkdir_and_list() { + let fs = MemFileSystem::new(); + + // Create directory + fs.mkdir("/testdir", 0o755).await.unwrap(); + + // Create files in directory + fs.create("/testdir/file1.txt").await.unwrap(); + fs.create("/testdir/file2.txt").await.unwrap(); + + // List directory + let entries = fs.read_dir("/testdir").await.unwrap(); + assert_eq!(entries.len(), 2); + } + + #[tokio::test] + async fn test_remove_file() { + let fs = MemFileSystem::new(); + + fs.create("/test.txt").await.unwrap(); + fs.remove("/test.txt").await.unwrap(); + + // Should not exist + assert!(fs.stat("/test.txt").await.is_err()); + } + + #[tokio::test] + async fn test_rename() { + let fs = MemFileSystem::new(); + + fs.create("/old.txt").await.unwrap(); + fs.write("/old.txt", b"data", 0, WriteFlag::None) + .await + .unwrap(); + + fs.rename("/old.txt", "/new.txt").await.unwrap(); + + // Old should not exist + assert!(fs.stat("/old.txt").await.is_err()); + + // New should exist with same data + let data = fs.read("/new.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"data"); + } + + #[tokio::test] + async fn test_write_flags() { + let fs = MemFileSystem::new(); + + // Create with data + fs.write("/test.txt", b"hello", 0, WriteFlag::Create) + .await + .unwrap(); + + // Append + fs.write("/test.txt", b" world", 0, WriteFlag::Append) + .await + .unwrap(); + + let data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"hello world"); + + // Truncate + fs.write("/test.txt", b"new", 0, WriteFlag::Truncate) + .await + .unwrap(); + + let data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"new"); + } + + #[tokio::test] + async fn test_plugin() { + let plugin = MemFSPlugin; + assert_eq!(plugin.name(), "memfs"); + + let config = PluginConfig { + name: "memfs".to_string(), + mount_path: "/memfs".to_string(), + params: HashMap::new(), + }; + + assert!(plugin.validate(&config).await.is_ok()); + assert!(plugin.initialize(config).await.is_ok()); + } +} diff --git a/crates/ragfs/src/plugins/mod.rs b/crates/ragfs/src/plugins/mod.rs new file mode 100644 index 000000000..c84424db9 --- /dev/null +++ b/crates/ragfs/src/plugins/mod.rs @@ -0,0 +1,17 @@ +//! Plugins module +//! +//! This module contains all built-in filesystem plugins. + +pub mod kvfs; +pub mod memfs; +pub mod queuefs; +#[cfg(feature = "s3")] +pub mod s3fs; +pub mod sqlfs; + +pub use kvfs::{KVFSPlugin, KVFileSystem}; +pub use memfs::{MemFSPlugin, MemFileSystem}; +pub use queuefs::{QueueFSPlugin, QueueFileSystem}; +#[cfg(feature = "s3")] +pub use s3fs::{S3FSPlugin, S3FileSystem}; +pub use sqlfs::{SQLFSPlugin, SQLFileSystem}; diff --git a/crates/ragfs/src/plugins/queuefs/mod.rs b/crates/ragfs/src/plugins/queuefs/mod.rs new file mode 100644 index 000000000..8dd3ba3ab --- /dev/null +++ b/crates/ragfs/src/plugins/queuefs/mod.rs @@ -0,0 +1,519 @@ +//! QueueFS Plugin +//! +//! A filesystem-based message queue where operations are performed through control files: +//! - `/enqueue` - Write to this file to add a message to the queue +//! - `/dequeue` - Read from this file to remove and return the first message +//! - `/peek` - Read from this file to view the first message without removing it +//! - `/size` - Read from this file to get the current queue size +//! - `/clear` - Write to this file to clear all messages from the queue + +use crate::core::{ + errors::{Error, Result}, + filesystem::FileSystem, + plugin::ServicePlugin, + types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag}, +}; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; +use std::collections::VecDeque; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::Mutex; +use uuid::Uuid; + +/// A message in the queue +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + /// Unique identifier for the message + pub id: String, + /// Message data + pub data: Vec, + /// Timestamp when the message was enqueued + pub timestamp: SystemTime, +} + +impl Message { + /// Create a new message with the given data + fn new(data: Vec) -> Self { + Self { + id: Uuid::new_v4().to_string(), + data, + timestamp: SystemTime::now(), + } + } +} + +/// QueueFS - A filesystem-based message queue +pub struct QueueFileSystem { + /// The message queue + queue: Arc>>, +} + +impl QueueFileSystem { + /// Create a new QueueFileSystem + pub fn new() -> Self { + Self { + queue: Arc::new(Mutex::new(VecDeque::new())), + } + } + + /// Check if a path is a control file + fn is_control_file(path: &str) -> bool { + matches!( + path, + "/enqueue" | "/dequeue" | "/peek" | "/size" | "/clear" + ) + } + + /// Normalize path by removing trailing slashes and ensuring it starts with / + fn normalize_path(path: &str) -> String { + let path = path.trim_end_matches('/'); + if path.is_empty() || path == "/" { + "/".to_string() + } else if !path.starts_with('/') { + format!("/{}", path) + } else { + path.to_string() + } + } +} + +#[async_trait] +impl FileSystem for QueueFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let path = Self::normalize_path(path); + if Self::is_control_file(&path) { + // Control files always exist + Ok(()) + } else { + Err(Error::InvalidOperation( + "QueueFS only supports control files".to_string(), + )) + } + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + let path = Self::normalize_path(path); + if path == "/" { + Ok(()) + } else { + Err(Error::InvalidOperation( + "QueueFS does not support directories".to_string(), + )) + } + } + + async fn read(&self, path: &str, _offset: u64, _size: u64) -> Result> { + let path = Self::normalize_path(path); + + match path.as_str() { + "/dequeue" => { + let mut queue = self.queue.lock().await; + let msg = queue + .pop_front() + .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; + Ok(msg.data) + } + "/peek" => { + let queue = self.queue.lock().await; + let msg = queue + .front() + .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; + Ok(msg.data.clone()) + } + "/size" => { + let queue = self.queue.lock().await; + let size = queue.len(); + Ok(size.to_string().into_bytes()) + } + _ => Err(Error::InvalidOperation(format!( + "Cannot read from '{}'. Use /dequeue, /peek, or /size", + path + ))), + } + } + + async fn write( + &self, + path: &str, + data: &[u8], + _offset: u64, + _flags: WriteFlag, + ) -> Result { + let path = Self::normalize_path(path); + + match path.as_str() { + "/enqueue" => { + let msg = Message::new(data.to_vec()); + let len = data.len() as u64; + self.queue.lock().await.push_back(msg); + Ok(len) + } + "/clear" => { + self.queue.lock().await.clear(); + Ok(0) + } + _ => Err(Error::InvalidOperation(format!( + "Cannot write to '{}'. Use /enqueue or /clear", + path + ))), + } + } + + async fn read_dir(&self, path: &str) -> Result> { + let path = Self::normalize_path(path); + + if path != "/" { + return Err(Error::NotFound(format!("directory not found: {}", path))); + } + + let now = SystemTime::now(); + Ok(vec![ + FileInfo { + name: "enqueue".to_string(), + size: 0, + mode: 0o666, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "dequeue".to_string(), + size: 0, + mode: 0o666, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "peek".to_string(), + size: 0, + mode: 0o666, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "size".to_string(), + size: 0, + mode: 0o666, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "clear".to_string(), + size: 0, + mode: 0o666, + mod_time: now, + is_dir: false, + }, + ]) + } + + async fn stat(&self, path: &str) -> Result { + let path = Self::normalize_path(path); + + if path == "/" { + return Ok(FileInfo { + name: "/".to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }); + } + + if Self::is_control_file(&path) { + let name = path.trim_start_matches('/').to_string(); + Ok(FileInfo { + name, + size: 0, + mode: 0o666, + mod_time: SystemTime::now(), + is_dir: false, + }) + } else { + Err(Error::NotFound(format!("file not found: {}", path))) + } + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support rename".to_string(), + )) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support chmod".to_string(), + )) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support remove".to_string(), + )) + } + + async fn remove_all(&self, _path: &str) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support remove_all".to_string(), + )) + } + + async fn truncate(&self, _path: &str, _size: u64) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support truncate".to_string(), + )) + } +} + +/// QueueFS Plugin +pub struct QueueFSPlugin; + +#[async_trait] +impl ServicePlugin for QueueFSPlugin { + fn name(&self) -> &str { + "queuefs" + } + + fn readme(&self) -> &str { + "QueueFS - A filesystem-based message queue\n\ + \n\ + Control files:\n\ + - /enqueue: Write to add a message to the queue\n\ + - /dequeue: Read to remove and return the first message\n\ + - /peek: Read to view the first message without removing it\n\ + - /size: Read to get the current queue size\n\ + - /clear: Write to clear all messages from the queue" + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + // No configuration parameters required + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(QueueFileSystem::new())) + } + + fn config_params(&self) -> &[ConfigParameter] { + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_queuefs_enqueue_dequeue() { + let fs = QueueFileSystem::new(); + + // Enqueue messages + let data1 = b"message 1"; + let data2 = b"message 2"; + + fs.write("/enqueue", data1, 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/enqueue", data2, 0, WriteFlag::None) + .await + .unwrap(); + + // Dequeue messages + let result1 = fs.read("/dequeue", 0, 0).await.unwrap(); + assert_eq!(result1, data1); + + let result2 = fs.read("/dequeue", 0, 0).await.unwrap(); + assert_eq!(result2, data2); + + // Queue should be empty + let result = fs.read("/dequeue", 0, 0).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_peek() { + let fs = QueueFileSystem::new(); + + // Enqueue a message + let data = b"test message"; + fs.write("/enqueue", data, 0, WriteFlag::None) + .await + .unwrap(); + + // Peek should return the message without removing it + let result1 = fs.read("/peek", 0, 0).await.unwrap(); + assert_eq!(result1, data); + + let result2 = fs.read("/peek", 0, 0).await.unwrap(); + assert_eq!(result2, data); + + // Dequeue should still work + let result3 = fs.read("/dequeue", 0, 0).await.unwrap(); + assert_eq!(result3, data); + } + + #[tokio::test] + async fn test_queuefs_size() { + let fs = QueueFileSystem::new(); + + // Initially empty + let size = fs.read("/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "0"); + + // Add messages + fs.write("/enqueue", b"msg1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/enqueue", b"msg2", 0, WriteFlag::None) + .await + .unwrap(); + + let size = fs.read("/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "2"); + + // Dequeue one + fs.read("/dequeue", 0, 0).await.unwrap(); + + let size = fs.read("/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "1"); + } + + #[tokio::test] + async fn test_queuefs_clear() { + let fs = QueueFileSystem::new(); + + // Add messages + fs.write("/enqueue", b"msg1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/enqueue", b"msg2", 0, WriteFlag::None) + .await + .unwrap(); + + // Clear the queue + fs.write("/clear", b"", 0, WriteFlag::None).await.unwrap(); + + // Queue should be empty + let size = fs.read("/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "0"); + + let result = fs.read("/dequeue", 0, 0).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_read_dir() { + let fs = QueueFileSystem::new(); + + let entries = fs.read_dir("/").await.unwrap(); + assert_eq!(entries.len(), 5); + + let names: Vec = entries.iter().map(|e| e.name.clone()).collect(); + assert!(names.contains(&"enqueue".to_string())); + assert!(names.contains(&"dequeue".to_string())); + assert!(names.contains(&"peek".to_string())); + assert!(names.contains(&"size".to_string())); + assert!(names.contains(&"clear".to_string())); + } + + #[tokio::test] + async fn test_queuefs_stat() { + let fs = QueueFileSystem::new(); + + // Stat root + let info = fs.stat("/").await.unwrap(); + assert!(info.is_dir); + + // Stat control files + let info = fs.stat("/enqueue").await.unwrap(); + assert!(!info.is_dir); + assert_eq!(info.name, "enqueue"); + + // Stat non-existent file + let result = fs.stat("/nonexistent").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_invalid_operations() { + let fs = QueueFileSystem::new(); + + // Cannot read from enqueue + let result = fs.read("/enqueue", 0, 0).await; + assert!(result.is_err()); + + // Cannot write to dequeue + let result = fs.write("/dequeue", b"data", 0, WriteFlag::None).await; + assert!(result.is_err()); + + // Cannot rename + let result = fs.rename("/enqueue", "/enqueue2").await; + assert!(result.is_err()); + + // Cannot remove + let result = fs.remove("/enqueue").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_concurrent_access() { + let fs = Arc::new(QueueFileSystem::new()); + + // Spawn multiple tasks to enqueue messages + let mut handles = vec![]; + for i in 0..10 { + let fs_clone = fs.clone(); + let handle = tokio::spawn(async move { + let data = format!("message {}", i); + fs_clone + .write("/enqueue", data.as_bytes(), 0, WriteFlag::None) + .await + .unwrap(); + }); + handles.push(handle); + } + + // Wait for all tasks to complete + for handle in handles { + handle.await.unwrap(); + } + + // Check size + let size = fs.read("/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "10"); + + // Dequeue all messages + for _ in 0..10 { + fs.read("/dequeue", 0, 0).await.unwrap(); + } + + // Queue should be empty + let size = fs.read("/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "0"); + } + + #[tokio::test] + async fn test_queuefs_plugin() { + let plugin = QueueFSPlugin; + + assert_eq!(plugin.name(), "queuefs"); + assert!(!plugin.readme().is_empty()); + assert_eq!(plugin.config_params().len(), 0); + + let config = PluginConfig { + name: "queuefs".to_string(), + mount_path: "/queue".to_string(), + params: std::collections::HashMap::new(), + }; + + plugin.validate(&config).await.unwrap(); + let fs = plugin.initialize(config).await.unwrap(); + + // Test basic operation + fs.write("/enqueue", b"test", 0, WriteFlag::None) + .await + .unwrap(); + let result = fs.read("/dequeue", 0, 0).await.unwrap(); + assert_eq!(result, b"test"); + } +} diff --git a/crates/ragfs/src/plugins/s3fs/cache.rs b/crates/ragfs/src/plugins/s3fs/cache.rs new file mode 100644 index 000000000..65e1c9e40 --- /dev/null +++ b/crates/ragfs/src/plugins/s3fs/cache.rs @@ -0,0 +1,300 @@ +//! Dual-layer cache for S3FS +//! +//! Provides two caches: +//! - **ListDirCache**: Caches directory listing results (default TTL: 30s) +//! - **StatCache**: Caches file/directory metadata (default TTL: 60s, 5x capacity) +//! +//! Both caches use LRU eviction with TTL-based expiry. + +use crate::core::types::FileInfo; +use lru::LruCache; +use std::num::NonZeroUsize; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::RwLock; + +/// Cache entry with timestamp for TTL +#[derive(Clone)] +struct CacheEntry { + value: T, + timestamp: Instant, +} + +/// Inner cache state (generic) +struct CacheInner { + cache: LruCache>, + ttl: Duration, + enabled: bool, +} + +/// Generic TTL-LRU cache +struct TtlLruCache { + inner: Arc>>, +} + +impl TtlLruCache { + fn new(max_size: usize, ttl: Duration, enabled: bool) -> Self { + let max_size = if max_size == 0 { 1000 } else { max_size }; + Self { + inner: Arc::new(RwLock::new(CacheInner { + cache: LruCache::new(NonZeroUsize::new(max_size).unwrap()), + ttl, + enabled, + })), + } + } + + async fn get(&self, key: &str) -> Option { + let mut inner = self.inner.write().await; + if !inner.enabled { + return None; + } + + let ttl = inner.ttl; + let result = inner.cache.get(key).and_then(|entry| { + if Instant::now().duration_since(entry.timestamp) > ttl { + None + } else { + Some(entry.value.clone()) + } + }); + + match result { + Some(value) => { + if let Some(entry) = inner.cache.get_mut(key) { + entry.timestamp = Instant::now(); + } + Some(value) + } + None => { + inner.cache.pop(key); + None + } + } + } + + async fn put(&self, key: String, value: T) { + let mut inner = self.inner.write().await; + if !inner.enabled { + return; + } + inner.cache.put( + key, + CacheEntry { + value, + timestamp: Instant::now(), + }, + ); + } + + async fn invalidate(&self, key: &str) { + let mut inner = self.inner.write().await; + inner.cache.pop(key); + } + + async fn invalidate_prefix(&self, prefix: &str) { + let mut inner = self.inner.write().await; + if !inner.enabled { + return; + } + + let to_remove: Vec = inner + .cache + .iter() + .filter(|(k, _)| *k == prefix || k.starts_with(&format!("{}/", prefix))) + .map(|(k, _)| k.clone()) + .collect(); + + for key in to_remove { + inner.cache.pop(&key); + } + } + + async fn invalidate_parent(&self, path: &str) { + if path == "/" { + self.invalidate("/").await; + return; + } + + let trimmed = path.trim_end_matches('/'); + if let Some(pos) = trimmed.rfind('/') { + let parent = if pos == 0 { + "/".to_string() + } else { + trimmed[..pos].to_string() + }; + self.invalidate(&parent).await; + } + } +} + +/// Directory listing cache +pub struct S3ListDirCache { + cache: TtlLruCache>, +} + +impl S3ListDirCache { + /// Create a new directory listing cache + pub fn new(max_size: usize, ttl_seconds: u64, enabled: bool) -> Self { + Self { + cache: TtlLruCache::new( + max_size, + Duration::from_secs(if ttl_seconds == 0 { 30 } else { ttl_seconds }), + enabled, + ), + } + } + + /// Get cached listing + pub async fn get(&self, path: &str) -> Option> { + self.cache.get(path).await + } + + /// Store listing + pub async fn put(&self, path: String, files: Vec) { + self.cache.put(path, files).await; + } + + /// Invalidate a specific path + pub async fn invalidate(&self, path: &str) { + self.cache.invalidate(path).await; + } + + /// Invalidate all entries with a prefix + pub async fn invalidate_prefix(&self, prefix: &str) { + self.cache.invalidate_prefix(prefix).await; + } + + /// Invalidate the parent of a path + pub async fn invalidate_parent(&self, path: &str) { + self.cache.invalidate_parent(path).await; + } +} + +/// File metadata (stat) cache +pub struct S3StatCache { + cache: TtlLruCache>, +} + +impl S3StatCache { + /// Create a new stat cache (5x the capacity of dir cache) + pub fn new(max_size: usize, ttl_seconds: u64, enabled: bool) -> Self { + let max_size = if max_size == 0 { 5000 } else { max_size * 5 }; + Self { + cache: TtlLruCache::new( + max_size, + Duration::from_secs(if ttl_seconds == 0 { 60 } else { ttl_seconds }), + enabled, + ), + } + } + + /// Get cached stat result + pub async fn get(&self, path: &str) -> Option> { + self.cache.get(path).await + } + + /// Store stat result (None means "does not exist") + pub async fn put(&self, path: String, info: Option) { + self.cache.put(path, info).await; + } + + /// Invalidate a specific path + pub async fn invalidate(&self, path: &str) { + self.cache.invalidate(path).await; + } + + /// Invalidate all entries with a prefix + pub async fn invalidate_prefix(&self, prefix: &str) { + self.cache.invalidate_prefix(prefix).await; + } + + /// Invalidate the parent of a path + pub async fn invalidate_parent(&self, path: &str) { + self.cache.invalidate_parent(path).await; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_list_dir_cache_basic() { + let cache = S3ListDirCache::new(10, 5, true); + + // Miss + assert!(cache.get("/test").await.is_none()); + + // Put and hit + let files = vec![FileInfo { + name: "file.txt".to_string(), + size: 100, + mode: 0o644, + mod_time: std::time::SystemTime::now(), + is_dir: false, + }]; + + cache.put("/test".to_string(), files.clone()).await; + let result = cache.get("/test").await; + assert!(result.is_some()); + assert_eq!(result.unwrap().len(), 1); + } + + #[tokio::test] + async fn test_stat_cache_basic() { + let cache = S3StatCache::new(10, 5, true); + + // Miss + assert!(cache.get("/test").await.is_none()); + + // Put file info + let info = FileInfo { + name: "file.txt".to_string(), + size: 100, + mode: 0o644, + mod_time: std::time::SystemTime::now(), + is_dir: false, + }; + + cache.put("/test".to_string(), Some(info)).await; + let result = cache.get("/test").await; + assert!(result.is_some()); + assert!(result.unwrap().is_some()); + } + + #[tokio::test] + async fn test_stat_cache_negative() { + let cache = S3StatCache::new(10, 5, true); + + // Cache a "not found" result + cache.put("/missing".to_string(), None).await; + let result = cache.get("/missing").await; + assert!(result.is_some()); // entry exists + assert!(result.unwrap().is_none()); // but value is None + } + + #[tokio::test] + async fn test_cache_invalidation() { + let cache = S3ListDirCache::new(10, 60, true); + + cache.put("/a".to_string(), vec![]).await; + cache.put("/a/b".to_string(), vec![]).await; + cache.put("/c".to_string(), vec![]).await; + + // Invalidate prefix /a + cache.invalidate_prefix("/a").await; + + assert!(cache.get("/a").await.is_none()); + assert!(cache.get("/a/b").await.is_none()); + assert!(cache.get("/c").await.is_some()); // unaffected + } + + #[tokio::test] + async fn test_cache_disabled() { + let cache = S3ListDirCache::new(10, 5, false); + + cache.put("/test".to_string(), vec![]).await; + assert!(cache.get("/test").await.is_none()); + } +} diff --git a/crates/ragfs/src/plugins/s3fs/client.rs b/crates/ragfs/src/plugins/s3fs/client.rs new file mode 100644 index 000000000..8a60ed54d --- /dev/null +++ b/crates/ragfs/src/plugins/s3fs/client.rs @@ -0,0 +1,523 @@ +//! S3 Client wrapper +//! +//! Provides a filesystem-oriented abstraction over the AWS S3 SDK. +//! Supports AWS S3 and S3-compatible services (MinIO, LocalStack, TOS). + +use crate::core::{ConfigValue, Error, Result}; +use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region}; +use aws_sdk_s3::primitives::ByteStream; +use aws_sdk_s3::Client; +use std::collections::HashMap; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +/// Directory marker mode +#[derive(Debug, Clone, PartialEq)] +pub enum DirectoryMarkerMode { + /// No directory markers (pure prefix-based) + None, + /// Zero-byte marker objects (default, works with AWS S3 and MinIO) + Empty, + /// Single-byte newline marker (for services that reject zero-byte objects like TOS) + NonEmpty, +} + +impl DirectoryMarkerMode { + /// Parse from string + pub fn from_str(s: &str) -> Self { + match s { + "none" => Self::None, + "nonempty" => Self::NonEmpty, + _ => Self::Empty, // default + } + } + + /// Get the marker data to write for directory creation + pub fn marker_data(&self) -> Option> { + match self { + Self::None => Option::None, + Self::Empty => Some(Vec::new()), + Self::NonEmpty => Some(b"\n".to_vec()), + } + } +} + +/// Object metadata from HeadObject +#[derive(Debug, Clone)] +pub struct ObjectMeta { + /// Object key + pub key: String, + /// Object size in bytes + pub size: i64, + /// Last modified time + pub last_modified: SystemTime, + /// Whether this is a directory marker + pub is_dir_marker: bool, +} + +/// Result of a ListObjects operation +#[derive(Debug)] +pub struct ListResult { + /// Files (non-directory objects) + pub files: Vec, + /// Directory prefixes (common prefixes) + pub directories: Vec, +} + +/// Convert AWS DateTime to SystemTime +fn aws_datetime_to_systemtime(dt: &aws_sdk_s3::primitives::DateTime) -> SystemTime { + let secs = dt.secs(); + if secs >= 0 { + UNIX_EPOCH + Duration::from_secs(secs as u64) + } else { + UNIX_EPOCH + } +} + +/// S3 Client wrapper +pub struct S3Client { + client: Client, + bucket: String, + prefix: String, + marker_mode: DirectoryMarkerMode, +} + +impl S3Client { + /// Create a new S3 client from configuration + pub async fn new(config: &HashMap) -> Result { + let bucket = config + .get("bucket") + .and_then(|v| v.as_string()) + .ok_or_else(|| Error::config("bucket is required for S3FS"))? + .to_string(); + + let region = config + .get("region") + .and_then(|v| v.as_string()) + .unwrap_or("us-east-1") + .to_string(); + + let endpoint = config.get("endpoint").and_then(|v| v.as_string()); + + let access_key = config + .get("access_key_id") + .and_then(|v| v.as_string()) + .map(|s| s.to_string()); + + let secret_key = config + .get("secret_access_key") + .and_then(|v| v.as_string()) + .map(|s| s.to_string()); + + let use_path_style = config + .get("use_path_style") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let prefix = config + .get("prefix") + .and_then(|v| v.as_string()) + .unwrap_or("") + .to_string(); + + let marker_mode = config + .get("directory_marker_mode") + .and_then(|v| v.as_string()) + .map(|s| DirectoryMarkerMode::from_str(s)) + .unwrap_or(DirectoryMarkerMode::Empty); + + // Build S3 config + let mut s3_config_builder = aws_sdk_s3::Config::builder() + .behavior_version(BehaviorVersion::latest()) + .region(Region::new(region)) + .force_path_style(use_path_style); + + // Set endpoint if provided (MinIO, LocalStack, TOS) + if let Some(ep) = endpoint { + s3_config_builder = s3_config_builder.endpoint_url(ep.to_string()); + } + + // Set credentials if provided, otherwise SDK uses default chain + if let (Some(ak), Some(sk)) = (access_key, secret_key) { + let creds = Credentials::new(ak, sk, None, None, "ragfs-s3fs"); + s3_config_builder = s3_config_builder.credentials_provider(creds); + } + + let s3_config = s3_config_builder.build(); + let client = Client::from_conf(s3_config); + + Ok(Self { + client, + bucket, + prefix, + marker_mode, + }) + } + + /// Build the full S3 key from a filesystem path + pub fn build_key(&self, path: &str) -> String { + let clean = path.trim_start_matches('/'); + if self.prefix.is_empty() { + clean.to_string() + } else { + let prefix = self.prefix.trim_end_matches('/'); + if clean.is_empty() { + format!("{}/", prefix) + } else { + format!("{}/{}", prefix, clean) + } + } + } + + /// Strip the prefix from an S3 key to get the filesystem path + pub fn strip_prefix<'a>(&self, key: &'a str) -> &'a str { + if self.prefix.is_empty() { + key + } else { + let prefix = format!("{}/", self.prefix.trim_end_matches('/')); + key.strip_prefix(&prefix).unwrap_or(key) + } + } + + /// Get an object's contents + pub async fn get_object(&self, key: &str) -> Result> { + let resp = self + .client + .get_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + .map_err(|e| Error::internal(format!("S3 GetObject error: {}", e)))?; + + let bytes = resp + .body + .collect() + .await + .map_err(|e| Error::internal(format!("S3 read body error: {}", e)))?; + + Ok(bytes.to_vec()) + } + + /// Get an object's contents with range request + pub async fn get_object_range( + &self, + key: &str, + offset: u64, + size: u64, + ) -> Result> { + let range = if size == 0 { + format!("bytes={}-", offset) + } else { + format!("bytes={}-{}", offset, offset + size - 1) + }; + + let resp = self + .client + .get_object() + .bucket(&self.bucket) + .key(key) + .range(range) + .send() + .await + .map_err(|e| Error::internal(format!("S3 GetObject range error: {}", e)))?; + + let bytes = resp + .body + .collect() + .await + .map_err(|e| Error::internal(format!("S3 read body error: {}", e)))?; + + Ok(bytes.to_vec()) + } + + /// Upload an object + pub async fn put_object(&self, key: &str, data: Vec) -> Result<()> { + self.client + .put_object() + .bucket(&self.bucket) + .key(key) + .body(ByteStream::from(data)) + .send() + .await + .map_err(|e| Error::internal(format!("S3 PutObject error: {}", e)))?; + + Ok(()) + } + + /// Delete a single object + pub async fn delete_object(&self, key: &str) -> Result<()> { + self.client + .delete_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + .map_err(|e| Error::internal(format!("S3 DeleteObject error: {}", e)))?; + + Ok(()) + } + + /// Batch delete objects (up to 1000 per call) + pub async fn delete_objects(&self, keys: &[String]) -> Result<()> { + if keys.is_empty() { + return Ok(()); + } + + // S3 batch delete limit is 1000 + for chunk in keys.chunks(1000) { + let objects: Vec<_> = chunk + .iter() + .map(|k| { + aws_sdk_s3::types::ObjectIdentifier::builder() + .key(k.as_str()) + .build() + .unwrap() + }) + .collect(); + + let delete = aws_sdk_s3::types::Delete::builder() + .set_objects(Some(objects)) + .build() + .map_err(|e| Error::internal(format!("S3 build delete: {}", e)))?; + + self.client + .delete_objects() + .bucket(&self.bucket) + .delete(delete) + .send() + .await + .map_err(|e| Error::internal(format!("S3 DeleteObjects error: {}", e)))?; + } + + Ok(()) + } + + /// Get object metadata (HeadObject) + pub async fn head_object(&self, key: &str) -> Result> { + match self + .client + .head_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + { + Ok(resp) => { + let size = resp.content_length.unwrap_or(0); + let last_modified = resp + .last_modified() + .map(aws_datetime_to_systemtime) + .unwrap_or(UNIX_EPOCH); + + let is_dir_marker = key.ends_with('/'); + + Ok(Some(ObjectMeta { + key: key.to_string(), + size, + last_modified, + is_dir_marker, + })) + } + Err(sdk_err) => { + // Check if it's a 404 + let service_err = sdk_err.into_service_error(); + if service_err.is_not_found() { + Ok(None) + } else { + Err(Error::internal(format!( + "S3 HeadObject error: {}", + service_err + ))) + } + } + } + } + + /// List objects with prefix and delimiter + pub async fn list_objects( + &self, + prefix: &str, + delimiter: Option<&str>, + ) -> Result { + let mut files = Vec::new(); + let mut directories = Vec::new(); + let mut continuation_token: Option = None; + + loop { + let mut req = self + .client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(prefix); + + if let Some(d) = delimiter { + req = req.delimiter(d); + } + + if let Some(token) = &continuation_token { + req = req.continuation_token(token); + } + + let resp = req + .send() + .await + .map_err(|e| Error::internal(format!("S3 ListObjectsV2 error: {}", e)))?; + + // Process files (contents) + for obj in resp.contents() { + let key = obj.key().unwrap_or(""); + + // Skip the prefix itself and directory markers + if key == prefix || key.ends_with('/') { + continue; + } + + let size = obj.size.unwrap_or(0); + let last_modified = obj + .last_modified() + .map(aws_datetime_to_systemtime) + .unwrap_or(UNIX_EPOCH); + + files.push(ObjectMeta { + key: key.to_string(), + size, + last_modified, + is_dir_marker: false, + }); + } + + // Process directory prefixes (common prefixes) + for cp in resp.common_prefixes() { + if let Some(p) = cp.prefix() { + // Remove trailing slash for consistency + let dir = p.trim_end_matches('/').to_string(); + if !dir.is_empty() { + directories.push(dir); + } + } + } + + // Check if there are more results + if resp.is_truncated() == Some(true) { + continuation_token = resp.next_continuation_token().map(|s| s.to_string()); + } else { + break; + } + } + + Ok(ListResult { files, directories }) + } + + /// Copy an object + pub async fn copy_object(&self, src_key: &str, dst_key: &str) -> Result<()> { + let copy_source = format!("{}/{}", self.bucket, src_key); + + self.client + .copy_object() + .bucket(&self.bucket) + .copy_source(©_source) + .key(dst_key) + .send() + .await + .map_err(|e| Error::internal(format!("S3 CopyObject error: {}", e)))?; + + Ok(()) + } + + /// Check if a directory exists (either marker or any children) + pub async fn directory_exists(&self, path: &str) -> Result { + let dir_key = self.build_key(path); + let dir_key_slash = if dir_key.ends_with('/') { + dir_key.clone() + } else { + format!("{}/", dir_key) + }; + + // Check if directory marker exists + if self.head_object(&dir_key_slash).await?.is_some() { + return Ok(true); + } + + // Check if any objects exist with this prefix + let resp = self + .client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(&dir_key_slash) + .max_keys(1) + .send() + .await + .map_err(|e| Error::internal(format!("S3 ListObjectsV2 error: {}", e)))?; + + let has_contents = !resp.contents().is_empty(); + let has_prefixes = !resp.common_prefixes().is_empty(); + + Ok(has_contents || has_prefixes) + } + + /// Delete a directory and all its contents + pub async fn delete_directory(&self, path: &str) -> Result<()> { + let dir_key = self.build_key(path); + let prefix = if dir_key.ends_with('/') { + dir_key + } else { + format!("{}/", dir_key) + }; + + // List and delete all objects under prefix + loop { + let resp = self + .client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(&prefix) + .max_keys(1000) + .send() + .await + .map_err(|e| Error::internal(format!("S3 ListObjectsV2 error: {}", e)))?; + + let contents = resp.contents(); + if contents.is_empty() { + break; + } + + let keys: Vec = contents + .iter() + .filter_map(|obj: &aws_sdk_s3::types::Object| obj.key().map(|k| k.to_string())) + .collect(); + + self.delete_objects(&keys).await?; + + if contents.len() < 1000 { + break; + } + } + + Ok(()) + } + + /// Create a directory marker object + pub async fn create_directory_marker(&self, path: &str) -> Result<()> { + if let Some(data) = self.marker_mode.marker_data() { + let dir_key = self.build_key(path); + let key = if dir_key.ends_with('/') { + dir_key + } else { + format!("{}/", dir_key) + }; + + self.put_object(&key, data).await?; + } + Ok(()) + } + + /// Get the marker mode + pub fn marker_mode(&self) -> &DirectoryMarkerMode { + &self.marker_mode + } + + /// Get the bucket name + pub fn bucket(&self) -> &str { + &self.bucket + } +} diff --git a/crates/ragfs/src/plugins/s3fs/mod.rs b/crates/ragfs/src/plugins/s3fs/mod.rs new file mode 100644 index 000000000..0fdc070bb --- /dev/null +++ b/crates/ragfs/src/plugins/s3fs/mod.rs @@ -0,0 +1,776 @@ +//! S3FS - S3-backed File System +//! +//! A file system backed by Amazon S3 or S3-compatible object storage. +//! Supports AWS S3, MinIO, LocalStack, ByteDance TOS, and other +//! S3-compatible services. +//! +//! ## Features +//! +//! - Full POSIX-like file system operations over S3 +//! - Directory simulation via prefix/delimiter listing + marker objects +//! - Dual-layer caching (directory listings + stat metadata) +//! - Range-based reads for partial file access +//! - Configurable directory marker modes +//! - Support for custom S3 endpoints + +pub mod cache; +pub mod client; + +use async_trait::async_trait; +use std::sync::Arc; +use std::time::SystemTime; + +use cache::{S3ListDirCache, S3StatCache}; +use client::S3Client; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// S3-backed file system +pub struct S3FileSystem { + client: Arc, + dir_cache: S3ListDirCache, + stat_cache: S3StatCache, +} + +impl S3FileSystem { + /// Create a new S3FileSystem + pub async fn new(config: &PluginConfig) -> Result { + let client = S3Client::new(&config.params).await?; + + let cache_enabled = config + .params + .get("cache_enabled") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let cache_max_size = config + .params + .get("cache_max_size") + .and_then(|v| v.as_int()) + .unwrap_or(1000) as usize; + + let cache_ttl = config + .params + .get("cache_ttl") + .and_then(|v| v.as_int()) + .unwrap_or(30) as u64; + + let stat_cache_ttl = config + .params + .get("stat_cache_ttl") + .and_then(|v| v.as_int()) + .unwrap_or(60) as u64; + + let dir_cache = S3ListDirCache::new(cache_max_size, cache_ttl, cache_enabled); + let stat_cache = S3StatCache::new(cache_max_size, stat_cache_ttl, cache_enabled); + + tracing::info!( + "S3FS initialized: bucket={}, cache={}", + client.bucket(), + cache_enabled + ); + + Ok(Self { + client: Arc::new(client), + dir_cache, + stat_cache, + }) + } + + /// Normalize path to consistent format + fn normalize_path(path: &str) -> String { + if path.is_empty() || path == "/" { + return "/".to_string(); + } + + let mut result = if path.starts_with('/') { + path.to_string() + } else { + format!("/{}", path) + }; + + if result.len() > 1 && result.ends_with('/') { + result.pop(); + } + + while result.contains("//") { + result = result.replace("//", "/"); + } + + result + } + + /// Get file name from path + fn file_name(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + path.rsplit('/') + .next() + .unwrap_or("") + .to_string() + } +} + +#[async_trait] +impl FileSystem for S3FileSystem { + async fn create(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // Check if already exists + if self.client.head_object(&key).await?.is_some() { + return Err(Error::already_exists(&normalized)); + } + + // Create empty file + self.client.put_object(&key, Vec::new()).await?; + + // Invalidate caches + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + + Ok(()) + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + + // Check if already exists + if self.client.directory_exists(&normalized).await? { + return Err(Error::already_exists(&normalized)); + } + + // Create directory marker + self.client.create_directory_marker(&normalized).await?; + + // Invalidate caches + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + + if normalized == "/" { + return Err(Error::invalid_operation("cannot remove root directory")); + } + + let key = self.client.build_key(&normalized); + + // Check if it's a file + if let Some(meta) = self.client.head_object(&key).await? { + if !meta.is_dir_marker { + // Delete file + self.client.delete_object(&key).await?; + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + return Ok(()); + } + } + + // Check if it's a directory + if self.client.directory_exists(&normalized).await? { + // Check if directory is empty + let dir_prefix = format!("{}/", self.client.build_key(&normalized)); + let listing = self.client.list_objects(&dir_prefix, Some("/")).await?; + + if !listing.files.is_empty() || !listing.directories.is_empty() { + return Err(Error::DirectoryNotEmpty(normalized)); + } + + // Delete directory marker + let dir_key = format!("{}/", self.client.build_key(&normalized)); + self.client.delete_object(&dir_key).await?; + + self.dir_cache.invalidate_parent(&normalized).await; + self.dir_cache.invalidate(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + return Ok(()); + } + + Err(Error::not_found(&normalized)) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + + if normalized == "/" { + // Delete everything under prefix + self.client.delete_directory("").await?; + self.dir_cache.invalidate_prefix("/").await; + self.stat_cache.invalidate_prefix("/").await; + return Ok(()); + } + + // Delete the file itself (if it exists as a file) + let key = self.client.build_key(&normalized); + let _ = self.client.delete_object(&key).await; + + // Delete directory and all children + self.client.delete_directory(&normalized).await?; + + self.dir_cache.invalidate_parent(&normalized).await; + self.dir_cache.invalidate_prefix(&normalized).await; + self.stat_cache.invalidate_prefix(&normalized).await; + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // Check if it's a directory + if key.ends_with('/') || self.client.directory_exists(&normalized).await? { + // Try to read as file first + if self.client.head_object(&key).await?.is_none() { + return Err(Error::IsADirectory(normalized)); + } + } + + if offset == 0 && size == 0 { + // Full read + self.client.get_object(&key).await + } else { + // Range read + self.client.get_object_range(&key, offset, size).await + } + } + + async fn write(&self, path: &str, data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // S3 always replaces the full object + self.client.put_object(&key, data.to_vec()).await?; + + // Invalidate caches + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + + Ok(data.len() as u64) + } + + async fn read_dir(&self, path: &str) -> Result> { + let normalized = Self::normalize_path(path); + + // Check cache + if let Some(files) = self.dir_cache.get(&normalized).await { + return Ok(files); + } + + // Build prefix for listing + let prefix = if normalized == "/" { + if self.client.build_key("").is_empty() { + String::new() + } else { + self.client.build_key("") + } + } else { + format!("{}/", self.client.build_key(&normalized)) + }; + + let listing = self.client.list_objects(&prefix, Some("/")).await?; + + let mut files = Vec::new(); + + // Add files + for obj in &listing.files { + let rel_path = self.client.strip_prefix(&obj.key); + let name = rel_path.rsplit('/').next().unwrap_or(rel_path); + + if name.is_empty() { + continue; + } + + files.push(FileInfo { + name: name.to_string(), + size: obj.size as u64, + mode: 0o644, + mod_time: obj.last_modified, + is_dir: false, + }); + } + + // Add directories + for dir_key in &listing.directories { + let rel_path = self.client.strip_prefix(dir_key); + let name = rel_path.rsplit('/').next().unwrap_or(rel_path); + + if name.is_empty() { + continue; + } + + files.push(FileInfo { + name: name.to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }); + } + + // Sort by name + files.sort_by(|a, b| a.name.cmp(&b.name)); + + // Cache + self.dir_cache + .put(normalized.clone(), files.clone()) + .await; + + Ok(files) + } + + async fn stat(&self, path: &str) -> Result { + let normalized = Self::normalize_path(path); + + // Root always exists + if normalized == "/" { + return Ok(FileInfo { + name: "/".to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }); + } + + // Check stat cache + if let Some(cached) = self.stat_cache.get(&normalized).await { + return cached.ok_or_else(|| Error::not_found(&normalized)); + } + + let key = self.client.build_key(&normalized); + + // Check if it's a file + if let Some(meta) = self.client.head_object(&key).await? { + if !meta.is_dir_marker { + let info = FileInfo { + name: Self::file_name(&normalized), + size: meta.size as u64, + mode: 0o644, + mod_time: meta.last_modified, + is_dir: false, + }; + self.stat_cache + .put(normalized.clone(), Some(info.clone())) + .await; + return Ok(info); + } + } + + // Check if it's a directory + if self.client.directory_exists(&normalized).await? { + let info = FileInfo { + name: Self::file_name(&normalized), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }; + self.stat_cache + .put(normalized.clone(), Some(info.clone())) + .await; + return Ok(info); + } + + // Not found + self.stat_cache.put(normalized.clone(), None).await; + Err(Error::not_found(&normalized)) + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_normalized = Self::normalize_path(old_path); + let new_normalized = Self::normalize_path(new_path); + + if old_normalized == "/" || new_normalized == "/" { + return Err(Error::invalid_operation("cannot rename root directory")); + } + + let old_key = self.client.build_key(&old_normalized); + + // Check if old path exists as a file + if let Some(meta) = self.client.head_object(&old_key).await? { + if !meta.is_dir_marker { + // File rename: copy + delete + let new_key = self.client.build_key(&new_normalized); + self.client.copy_object(&old_key, &new_key).await?; + self.client.delete_object(&old_key).await?; + + self.dir_cache.invalidate_parent(&old_normalized).await; + self.dir_cache.invalidate_parent(&new_normalized).await; + self.stat_cache.invalidate(&old_normalized).await; + self.stat_cache.invalidate(&new_normalized).await; + + return Ok(()); + } + } + + // Directory rename: copy all children + delete originals + if self.client.directory_exists(&old_normalized).await? { + let old_prefix = format!("{}/", self.client.build_key(&old_normalized)); + let new_prefix_base = self.client.build_key(&new_normalized); + + // List all objects under old prefix + let listing = self.client.list_objects(&old_prefix, None).await?; + + // Copy directory marker + let old_dir_key = format!("{}/", self.client.build_key(&old_normalized)); + let new_dir_key = format!("{}/", new_prefix_base); + + if self.client.head_object(&old_dir_key).await?.is_some() { + self.client + .copy_object(&old_dir_key, &new_dir_key) + .await?; + } + + // Copy all children + for obj in &listing.files { + let relative = obj.key.strip_prefix(&old_prefix).unwrap_or(&obj.key); + let new_key = format!("{}/{}", new_prefix_base, relative); + self.client.copy_object(&obj.key, &new_key).await?; + } + + // Delete old directory + self.client.delete_directory(&old_normalized).await?; + + // Also delete the old directory marker + let _ = self.client.delete_object(&old_dir_key).await; + + // Invalidate caches + self.dir_cache.invalidate_prefix(&old_normalized).await; + self.dir_cache.invalidate_parent(&old_normalized).await; + self.dir_cache.invalidate_parent(&new_normalized).await; + self.stat_cache.invalidate_prefix(&old_normalized).await; + self.stat_cache.invalidate_prefix(&new_normalized).await; + + return Ok(()); + } + + Err(Error::not_found(&old_normalized)) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + // S3 doesn't support Unix permissions - no-op + Ok(()) + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // Read current data + let mut data = self.client.get_object(&key).await?; + + // Truncate + data.resize(size as usize, 0); + + // Write back + self.client.put_object(&key, data).await?; + + self.stat_cache.invalidate(&normalized).await; + + Ok(()) + } +} + +/// S3FS Plugin +pub struct S3FSPlugin { + config_params: Vec, +} + +impl S3FSPlugin { + /// Create a new S3FSPlugin + pub fn new() -> Self { + Self { + config_params: vec![ + ConfigParameter::required_string("bucket", "S3 bucket name"), + ConfigParameter::optional( + "region", + "string", + "us-east-1", + "AWS region", + ), + ConfigParameter::optional( + "endpoint", + "string", + "", + "Custom S3 endpoint (for MinIO, LocalStack, TOS)", + ), + ConfigParameter::optional( + "access_key_id", + "string", + "", + "AWS access key ID (falls back to AWS_ACCESS_KEY_ID env)", + ), + ConfigParameter::optional( + "secret_access_key", + "string", + "", + "AWS secret access key (falls back to AWS_SECRET_ACCESS_KEY env)", + ), + ConfigParameter::optional( + "use_path_style", + "bool", + "true", + "Use path-style addressing (bucket/key vs bucket.host/key)", + ), + ConfigParameter::optional( + "prefix", + "string", + "", + "Key prefix for namespace isolation (e.g. 'agfs/')", + ), + ConfigParameter::optional( + "directory_marker_mode", + "string", + "empty", + "Directory marker mode: none, empty, nonempty", + ), + ConfigParameter::optional( + "cache_enabled", + "bool", + "true", + "Enable caching", + ), + ConfigParameter::optional( + "cache_max_size", + "int", + "1000", + "Maximum cache entries", + ), + ConfigParameter::optional( + "cache_ttl", + "int", + "30", + "Directory listing cache TTL in seconds", + ), + ConfigParameter::optional( + "stat_cache_ttl", + "int", + "60", + "Stat cache TTL in seconds", + ), + ], + } + } +} + +impl Default for S3FSPlugin { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl ServicePlugin for S3FSPlugin { + fn name(&self) -> &str { + "s3fs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "S3-backed file system (AWS S3, MinIO, LocalStack, TOS)" + } + + fn readme(&self) -> &str { + r#"# S3FS - S3-backed File System + +A file system backed by Amazon S3 or S3-compatible object storage. + +## Features + +- Full POSIX-like file system operations over S3 +- Supports AWS S3, MinIO, LocalStack, ByteDance TOS +- Directory simulation via prefix/delimiter + marker objects +- Dual-layer caching (directory listings + stat metadata) +- Range-based reads for partial file access +- Configurable directory marker modes + +## Configuration + +### AWS S3 +```yaml +plugins: + s3fs: + enabled: true + path: /s3 + config: + bucket: my-bucket + region: us-east-1 +``` + +### MinIO (Local Testing) +```yaml +plugins: + s3fs: + enabled: true + path: /s3 + config: + bucket: test-bucket + endpoint: http://localhost:9000 + access_key_id: minioadmin + secret_access_key: minioadmin + use_path_style: true +``` + +### ByteDance TOS +```yaml +plugins: + s3fs: + enabled: true + path: /s3 + config: + bucket: my-tos-bucket + region: cn-beijing + endpoint: https://tos-cn-beijing.volces.com + use_path_style: false + directory_marker_mode: nonempty +``` + +## Directory Marker Modes + +- `empty` (default): Zero-byte marker objects for directories +- `nonempty`: Single-byte marker (for TOS and services that reject zero-byte objects) +- `none`: No markers, pure prefix-based directory detection + +## Notes + +- S3 does not support partial/offset writes (always full object replacement) +- chmod is a no-op (S3 has no Unix permissions) +- Rename is implemented as copy + delete +"# + } + + async fn validate(&self, config: &PluginConfig) -> Result<()> { + // bucket is required + if config + .params + .get("bucket") + .and_then(|v| v.as_string()) + .is_none() + { + return Err(Error::config("'bucket' is required for S3FS")); + } + + // Validate directory_marker_mode if provided + if let Some(mode) = config + .params + .get("directory_marker_mode") + .and_then(|v| v.as_string()) + { + if !["none", "empty", "nonempty"].contains(&mode) { + return Err(Error::config(format!( + "invalid directory_marker_mode: {} (valid: none, empty, nonempty)", + mode + ))); + } + } + + Ok(()) + } + + async fn initialize(&self, config: PluginConfig) -> Result> { + let fs = S3FileSystem::new(&config).await?; + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_path() { + assert_eq!(S3FileSystem::normalize_path(""), "/"); + assert_eq!(S3FileSystem::normalize_path("/"), "/"); + assert_eq!(S3FileSystem::normalize_path("/foo"), "/foo"); + assert_eq!(S3FileSystem::normalize_path("/foo/"), "/foo"); + assert_eq!(S3FileSystem::normalize_path("foo"), "/foo"); + assert_eq!(S3FileSystem::normalize_path("/foo//bar"), "/foo/bar"); + } + + #[test] + fn test_file_name() { + assert_eq!(S3FileSystem::file_name("/"), "/"); + assert_eq!(S3FileSystem::file_name("/foo.txt"), "foo.txt"); + assert_eq!(S3FileSystem::file_name("/dir/file.txt"), "file.txt"); + } + + #[tokio::test] + async fn test_plugin_validate() { + let plugin = S3FSPlugin::new(); + + // Missing bucket should fail + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params: std::collections::HashMap::new(), + }; + assert!(plugin.validate(&config).await.is_err()); + + // With bucket should pass + let mut params = std::collections::HashMap::new(); + params.insert( + "bucket".to_string(), + crate::core::ConfigValue::String("test-bucket".to_string()), + ); + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params, + }; + assert!(plugin.validate(&config).await.is_ok()); + } + + #[tokio::test] + async fn test_plugin_validate_marker_mode() { + let plugin = S3FSPlugin::new(); + + // Invalid marker mode + let mut params = std::collections::HashMap::new(); + params.insert( + "bucket".to_string(), + crate::core::ConfigValue::String("test".to_string()), + ); + params.insert( + "directory_marker_mode".to_string(), + crate::core::ConfigValue::String("invalid".to_string()), + ); + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params, + }; + assert!(plugin.validate(&config).await.is_err()); + + // Valid marker mode + let mut params = std::collections::HashMap::new(); + params.insert( + "bucket".to_string(), + crate::core::ConfigValue::String("test".to_string()), + ); + params.insert( + "directory_marker_mode".to_string(), + crate::core::ConfigValue::String("nonempty".to_string()), + ); + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params, + }; + assert!(plugin.validate(&config).await.is_ok()); + } +} diff --git a/crates/ragfs/src/plugins/sqlfs/backend.rs b/crates/ragfs/src/plugins/sqlfs/backend.rs new file mode 100644 index 000000000..5beb84cc7 --- /dev/null +++ b/crates/ragfs/src/plugins/sqlfs/backend.rs @@ -0,0 +1,492 @@ +//! Database backend abstraction for SQLFS +//! +//! This module provides an abstraction over different database backends +//! (SQLite, MySQL/TiDB) to allow SQLFS to work with multiple databases. + +use crate::core::{ConfigValue, Error, Result}; +use rusqlite::{params, Connection}; +use std::collections::HashMap; +use std::sync::Mutex; + +/// Maximum file size in bytes (5MB, same as Go version) +pub const MAX_FILE_SIZE: usize = 5 * 1024 * 1024; +/// Maximum file size in MB (for display) +pub const MAX_FILE_SIZE_MB: usize = 5; + +/// Database backend trait +/// +/// All database backends must implement this trait to provide +/// uniform access to different database systems. +pub trait DatabaseBackend: Send + Sync { + /// Get the driver name for logging and metadata + fn driver_name(&self) -> &'static str; + + /// Check if this path exists + fn path_exists(&self, path: &str) -> Result; + + /// Check if a path is a directory + fn is_directory(&self, path: &str) -> Result; + + /// Create a new file entry + fn create_file(&self, path: &str, mode: u32, data: &[u8]) -> Result<()>; + + /// Create a new directory entry + fn create_directory(&self, path: &str, mode: u32) -> Result<()>; + + /// Delete a file or directory entry + fn delete_entry(&self, path: &str) -> Result<()>; + + /// Delete entries matching a pattern (for recursive delete) + fn delete_entries_by_pattern( + &self, + pattern: &str, + exclude_path: Option<&str>, + ) -> Result; + + /// Read file data + fn read_file(&self, path: &str) -> Result)>>; + + /// Update file data + fn update_file(&self, path: &str, data: &[u8]) -> Result<()>; + + /// Get file metadata + fn get_metadata(&self, path: &str) -> Result>; + + /// Update file mode + fn update_mode(&self, path: &str, mode: u32) -> Result<()>; + + /// Rename a path (file or directory) + fn rename_path(&self, old_path: &str, new_path: &str) -> Result<()>; + + /// Rename all children under a path (for directory rename) + fn rename_children(&self, old_path: &str, new_path: &str) -> Result<()>; + + /// List directory contents (direct children only) + fn list_directory(&self, path: &str) -> Result>; + + /// Count entries matching a pattern + fn count_by_pattern(&self, pattern: &str) -> Result; + + /// Get parent path + fn parent_path(&self, path: &str) -> String; +} + +/// File metadata from database +#[derive(Debug, Clone)] +pub struct FileMetadata { + /// Full path of the file or directory + pub path: String, + /// Whether this entry is a directory + pub is_dir: bool, + /// Unix-style file permissions + pub mode: u32, + /// File size in bytes + pub size: i64, + /// Last modification time as Unix timestamp + pub mod_time: i64, + /// File content data (None for metadata-only queries) + pub data: Option>, +} + +/// SQLite backend implementation +/// +/// Uses `Mutex` to satisfy `Send + Sync` requirements. +/// rusqlite's `Connection` is not `Sync` due to internal `RefCell` usage, +/// so we wrap it in a `Mutex` for thread-safe access. +pub struct SQLiteBackend { + conn: Mutex, +} + +impl SQLiteBackend { + /// Create a new SQLite backend + /// + /// Initializes the database schema and applies optimizations (WAL mode, etc.) + pub fn new(db_path: Option<&str>) -> Result { + let path = db_path.unwrap_or(":memory:"); + let conn = Connection::open(path) + .map_err(|e| Error::internal(format!("sqlite connection error: {}", e)))?; + + // Initialize schema + conn.execute_batch( + r#" + CREATE TABLE IF NOT EXISTS files ( + path TEXT PRIMARY KEY, + is_dir INTEGER NOT NULL, + mode INTEGER NOT NULL, + size INTEGER NOT NULL, + mod_time INTEGER NOT NULL, + data BLOB + ); + CREATE INDEX IF NOT EXISTS idx_parent ON files(path); + "#, + ) + .map_err(|e| Error::internal(format!("schema init error: {}", e)))?; + + // Apply optimizations + conn.execute_batch( + r#" + PRAGMA journal_mode=WAL; + PRAGMA synchronous=NORMAL; + PRAGMA cache_size=-64000; + "#, + ) + .map_err(|e| Error::internal(format!("optimization error: {}", e)))?; + + // Ensure root directory exists + let now = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT OR IGNORE INTO files (path, is_dir, mode, size, mod_time, data) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params!["/", 1, 0o755, 0i64, now, None::>], + ) + .map_err(|e| Error::internal(format!("root init error: {}", e)))?; + + Ok(Self { + conn: Mutex::new(conn), + }) + } +} + +impl DatabaseBackend for SQLiteBackend { + fn driver_name(&self) -> &'static str { + "sqlite3" + } + + fn path_exists(&self, path: &str) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT COUNT(*) FROM files WHERE path = ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + let count: i64 = stmt + .query_row(params![path], |row| row.get(0)) + .unwrap_or(0); + + Ok(count > 0) + } + + fn is_directory(&self, path: &str) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT is_dir FROM files WHERE path = ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + match stmt.query_row(params![path], |row| row.get::<_, i32>(0)) { + Ok(is_dir) => Ok(is_dir == 1), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(false), + Err(e) => Err(Error::internal(format!("query error: {}", e))), + } + } + + fn create_file(&self, path: &str, mode: u32, data: &[u8]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT INTO files (path, is_dir, mode, size, mod_time, data) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params![path, 0, mode, data.len() as i64, now, data], + ) + .map_err(|e| Error::internal(format!("insert error: {}", e)))?; + Ok(()) + } + + fn create_directory(&self, path: &str, mode: u32) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT INTO files (path, is_dir, mode, size, mod_time, data) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params![path, 1, mode, 0i64, now, None::>], + ) + .map_err(|e| Error::internal(format!("insert error: {}", e)))?; + Ok(()) + } + + fn delete_entry(&self, path: &str) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + conn.execute("DELETE FROM files WHERE path = ?1", params![path]) + .map_err(|e| Error::internal(format!("delete error: {}", e)))?; + Ok(()) + } + + fn delete_entries_by_pattern( + &self, + pattern: &str, + exclude_path: Option<&str>, + ) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + + let result = if let Some(exclude) = exclude_path { + conn.execute( + "DELETE FROM files WHERE path LIKE ?1 AND path != ?2", + params![pattern, exclude], + ) + .map_err(|e| Error::internal(format!("delete error: {}", e)))? + } else { + conn.execute("DELETE FROM files WHERE path LIKE ?1", params![pattern]) + .map_err(|e| Error::internal(format!("delete error: {}", e)))? + }; + + Ok(result) + } + + fn read_file(&self, path: &str) -> Result)>> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT is_dir, data FROM files WHERE path = ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + match stmt.query_row(params![path], |row| { + let is_dir: i32 = row.get(0)?; + let data: Option> = row.get(1)?; + Ok((is_dir == 1, data.unwrap_or_default())) + }) { + Ok(result) => Ok(Some(result)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(Error::internal(format!("query error: {}", e))), + } + } + + fn update_file(&self, path: &str, data: &[u8]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "UPDATE files SET data = ?1, size = ?2, mod_time = ?3 WHERE path = ?4", + params![data, data.len() as i64, now, path], + ) + .map_err(|e| Error::internal(format!("update error: {}", e)))?; + Ok(()) + } + + fn get_metadata(&self, path: &str) -> Result> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached( + "SELECT path, is_dir, mode, size, mod_time FROM files WHERE path = ?1", + ) + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + match stmt.query_row(params![path], |row| { + Ok(FileMetadata { + path: row.get(0)?, + is_dir: row.get::<_, i32>(1)? == 1, + mode: row.get(2)?, + size: row.get(3)?, + mod_time: row.get(4)?, + data: None, + }) + }) { + Ok(meta) => Ok(Some(meta)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(Error::internal(format!("query error: {}", e))), + } + } + + fn update_mode(&self, path: &str, mode: u32) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "UPDATE files SET mode = ?1, mod_time = ?2 WHERE path = ?3", + params![mode, now, path], + ) + .map_err(|e| Error::internal(format!("update error: {}", e)))?; + Ok(()) + } + + fn rename_path(&self, old_path: &str, new_path: &str) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + conn.execute( + "UPDATE files SET path = ?1 WHERE path = ?2", + params![new_path, old_path], + ) + .map_err(|e| Error::internal(format!("rename error: {}", e)))?; + Ok(()) + } + + fn rename_children(&self, old_path: &str, new_path: &str) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let old_pattern = format!("{}/%", old_path); + let old_len = (old_path.len() + 1) as i32; + let sql = "UPDATE files SET path = ?1 || SUBSTR(path, ?2) WHERE path LIKE ?3"; + conn.execute(sql, params![new_path, old_len, old_pattern]) + .map_err(|e| Error::internal(format!("rename children error: {}", e)))?; + Ok(()) + } + + fn list_directory(&self, path: &str) -> Result> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + + // Build pattern for direct children only + // For root "/": children are like "/" (no further slashes) + // For "/dir": children are like "/dir/" (no further slashes) + let prefix = if path == "/" { + "/".to_string() + } else { + format!("{}/", path) + }; + + // Query all entries that start with the prefix, + // excluding the directory itself + let sql = "SELECT path, is_dir, mode, size, mod_time FROM files WHERE path LIKE ?1 AND path != ?2 ORDER BY path"; + let like_pattern = format!("{}%", prefix); + + let mut stmt = conn + .prepare_cached(sql) + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + let mut results = Vec::new(); + let prefix_len = prefix.len(); + + let rows = stmt + .query_map(params![like_pattern, path], |row| { + Ok(FileMetadata { + path: row.get(0)?, + is_dir: row.get::<_, i32>(1)? == 1, + mode: row.get(2)?, + size: row.get(3)?, + mod_time: row.get(4)?, + data: None, + }) + }) + .map_err(|e| Error::internal(format!("query error: {}", e)))?; + + for row_result in rows { + let meta = + row_result.map_err(|e| Error::internal(format!("row error: {}", e)))?; + + // Only include direct children (no further '/' after the prefix) + let remainder = &meta.path[prefix_len..]; + if !remainder.contains('/') { + results.push(meta); + } + } + + Ok(results) + } + + fn count_by_pattern(&self, pattern: &str) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT COUNT(*) FROM files WHERE path LIKE ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + let count: i64 = stmt + .query_row(params![pattern], |row| row.get(0)) + .map_err(|e| Error::internal(format!("query error: {}", e)))?; + + Ok(count) + } + + fn parent_path(&self, path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + // Remove trailing slash + let trimmed = path.trim_end_matches('/'); + if trimmed.is_empty() { + return "/".to_string(); + } + + // Find last slash + if let Some(pos) = trimmed.rfind('/') { + if pos == 0 { + return "/".to_string(); + } + return trimmed[..pos].to_string(); + } + + "/".to_string() + } +} + +/// Create a database backend from configuration +pub fn create_backend(config: &HashMap) -> Result> { + let backend_type = config + .get("backend") + .and_then(|v| v.as_string()) + .unwrap_or("sqlite"); + + match backend_type { + "sqlite" | "sqlite3" => { + let db_path = config.get("db_path").and_then(|v| v.as_string()); + let backend = SQLiteBackend::new(db_path)?; + Ok(Box::new(backend)) + } + "mysql" | "tidb" => { + // TODO: Implement MySQL/TiDB backend + Err(Error::internal("MySQL/TiDB backend not yet implemented")) + } + _ => Err(Error::config(format!( + "unsupported database backend: {} (valid options: sqlite, sqlite3)", + backend_type + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parent_path() { + let backend = SQLiteBackend::new(Some(":memory:")).unwrap(); + assert_eq!(backend.parent_path("/"), "/"); + assert_eq!(backend.parent_path("/file.txt"), "/"); + assert_eq!(backend.parent_path("/dir/"), "/"); + assert_eq!(backend.parent_path("/dir/file.txt"), "/dir"); + assert_eq!(backend.parent_path("/a/b/c/file.txt"), "/a/b/c"); + } + + #[test] + fn test_sqlite_backend_basic() { + let backend = SQLiteBackend::new(Some(":memory:")).unwrap(); + + // Root should already exist + assert!(backend.path_exists("/").unwrap()); + assert!(backend.is_directory("/").unwrap()); + + // Create a directory + backend.create_directory("/testdir", 0o755).unwrap(); + assert!(backend.path_exists("/testdir").unwrap()); + assert!(backend.is_directory("/testdir").unwrap()); + + // Create a file + backend.create_file("/testdir/file.txt", 0o644, b"hello").unwrap(); + assert!(backend.path_exists("/testdir/file.txt").unwrap()); + assert!(!backend.is_directory("/testdir/file.txt").unwrap()); + + // Read file + let result = backend.read_file("/testdir/file.txt").unwrap(); + assert!(result.is_some()); + let (is_dir, data) = result.unwrap(); + assert!(!is_dir); + assert_eq!(data, b"hello"); + + // List directory - should return only direct children + let entries = backend.list_directory("/testdir").unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].path, "/testdir/file.txt"); + } + + #[test] + fn test_list_directory_direct_children() { + let backend = SQLiteBackend::new(Some(":memory:")).unwrap(); + + // Create nested structure: /a/b/c + backend.create_directory("/a", 0o755).unwrap(); + backend.create_directory("/a/b", 0o755).unwrap(); + backend.create_directory("/a/b/c", 0o755).unwrap(); + backend.create_file("/a/file1.txt", 0o644, b"").unwrap(); + backend.create_file("/a/b/file2.txt", 0o644, b"").unwrap(); + + // List /a - should only return /a/b and /a/file1.txt + let entries = backend.list_directory("/a").unwrap(); + assert_eq!(entries.len(), 2); + let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect(); + assert!(paths.contains(&"/a/b")); + assert!(paths.contains(&"/a/file1.txt")); + + // List / - should only return /a + let entries = backend.list_directory("/").unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].path, "/a"); + } +} diff --git a/crates/ragfs/src/plugins/sqlfs/cache.rs b/crates/ragfs/src/plugins/sqlfs/cache.rs new file mode 100644 index 000000000..dc4fa105d --- /dev/null +++ b/crates/ragfs/src/plugins/sqlfs/cache.rs @@ -0,0 +1,350 @@ +//! LRU cache for directory listings +//! +//! This module provides an LRU (Least Recently Used) cache with TTL +//! for directory listings in SQLFS. This significantly improves performance +//! for operations like shell tab completion and repeated directory listings. + +use crate::core::types::FileInfo; +use lru::LruCache; +use std::num::NonZeroUsize; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::RwLock; + +/// Cache entry with timestamp for TTL +#[derive(Debug, Clone)] +struct CacheEntry { + files: Vec, + timestamp: Instant, +} + +/// LRU cache for directory listings +/// +/// This cache provides: +/// - LRU eviction when max capacity is reached +/// - TTL (time-to-live) for each entry +/// - Thread-safe access for concurrent operations +/// - Cache hit/miss statistics +pub struct ListDirCache { + inner: Arc>, +} + +/// Inner cache state +struct CacheInner { + cache: LruCache, + ttl: Duration, + enabled: bool, + hit_count: u64, + miss_count: u64, +} + +impl ListDirCache { + /// Create a new directory listing cache + /// + /// # Arguments + /// * `max_size` - Maximum number of entries to cache (default: 1000) + /// * `ttl_seconds` - Time-to-live in seconds (default: 5) + /// * `enabled` - Whether caching is enabled (default: true) + pub fn new(max_size: usize, ttl_seconds: u64, enabled: bool) -> Self { + let max_size = if max_size == 0 { 1000 } else { max_size }; + let ttl = if ttl_seconds == 0 { + Duration::from_secs(5) + } else { + Duration::from_secs(ttl_seconds) + }; + + Self { + inner: Arc::new(RwLock::new(CacheInner { + cache: LruCache::new(NonZeroUsize::new(max_size).unwrap()), + ttl, + enabled, + hit_count: 0, + miss_count: 0, + })), + } + } + + /// Get cached directory listing + /// + /// Returns None if: + /// - Cache is disabled + /// - Path is not in cache + /// - Entry has expired (TTL) + pub async fn get(&self, path: &str) -> Option> { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return None; + } + + let ttl = inner.ttl; + + // Check if entry exists and is still valid + let result = inner.cache.get(path).and_then(|entry| { + if Instant::now().duration_since(entry.timestamp) > ttl { + None // expired + } else { + Some(entry.files.clone()) + } + }); + + match result { + Some(files) => { + // Refresh the entry's timestamp + if let Some(entry) = inner.cache.get_mut(path) { + entry.timestamp = Instant::now(); + } + inner.hit_count += 1; + Some(files) + } + None => { + // Remove expired entry if it exists + inner.cache.pop(path); + inner.miss_count += 1; + None + } + } + } + + /// Put a directory listing into the cache + pub async fn put(&self, path: String, files: Vec) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + let entry = CacheEntry { + files, + timestamp: Instant::now(), + }; + + inner.cache.put(path, entry); + } + + /// Invalidate a specific path from the cache + pub async fn invalidate(&self, path: &str) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + inner.cache.pop(path); + } + + /// Invalidate all paths with a given prefix + /// + /// This is used when a directory or its children are modified. + pub async fn invalidate_prefix(&self, prefix: &str) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + // Collect keys to invalidate + let to_invalidate: Vec = inner + .cache + .iter() + .filter(|(path, _)| { + *path == prefix || is_descendant(path, prefix) + }) + .map(|(path, _)| path.clone()) + .collect(); + + // Remove all invalidated paths + for path in to_invalidate { + inner.cache.pop(&path); + } + } + + /// Invalidate the parent directory of a given path + /// + /// This is called when a file/directory is created, deleted, or renamed. + pub async fn invalidate_parent(&self, path: &str) { + let parent = parent_path(path); + self.invalidate(&parent).await; + } + + /// Clear all entries from the cache + pub async fn clear(&self) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + inner.cache.clear(); + } + + /// Get cache statistics + pub async fn stats(&self) -> CacheStats { + let inner = self.inner.read().await; + + CacheStats { + size: inner.cache.len(), + hit_count: inner.hit_count, + miss_count: inner.miss_count, + enabled: inner.enabled, + } + } +} + +/// Cache statistics +#[derive(Debug, Clone)] +pub struct CacheStats { + /// Number of entries in cache + pub size: usize, + + /// Total cache hits + pub hit_count: u64, + + /// Total cache misses + pub miss_count: u64, + + /// Whether cache is enabled + pub enabled: bool, +} + +impl CacheStats { + /// Calculate hit rate + pub fn hit_rate(&self) -> f64 { + let total = self.hit_count + self.miss_count; + if total == 0 { + 0.0 + } else { + (self.hit_count as f64) / (total as f64) + } + } +} + +/// Get parent directory path +fn parent_path(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + // Remove trailing slash + let trimmed = path.trim_end_matches('/'); + if trimmed.is_empty() { + return "/".to_string(); + } + + // Find last slash + if let Some(pos) = trimmed.rfind('/') { + if pos == 0 { + return "/".to_string(); + } + return trimmed[..pos].to_string(); + } + + "/".to_string() +} + +/// Check if a path is a descendant of a parent path +fn is_descendant(path: &str, parent: &str) -> bool { + // A path is not a descendant of itself + if path == parent { + return false; + } + + // Special case for root: everything is a descendant except root itself + if parent == "/" { + return path != "/"; + } + + // Check if path starts with parent + "/" + if path.len() <= parent.len() { + return false; + } + + &path[..parent.len()] == parent && path.as_bytes()[parent.len()] == b'/' +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_cache_basic() { + let cache = ListDirCache::new(10, 5, true); + + // Put and get + let files = vec![FileInfo::new_file("test.txt".to_string(), 100, 0o644)]; + cache.put("/test".to_string(), files.clone()).await; + + let retrieved = cache.get("/test").await; + assert!(retrieved.is_some()); + assert_eq!(retrieved.unwrap().len(), 1); + + // Invalidate + cache.invalidate("/test").await; + assert!(cache.get("/test").await.is_none()); + } + + #[tokio::test] + async fn test_cache_invalidate_prefix() { + let cache = ListDirCache::new(100, 5, true); + + // Populate cache + cache.put("/a".to_string(), vec![]).await; + cache.put("/a/b".to_string(), vec![]).await; + cache.put("/a/b/c".to_string(), vec![]).await; + cache.put("/d".to_string(), vec![]).await; + + // Invalidate prefix /a + cache.invalidate_prefix("/a").await; + + // /a and descendants should be gone + assert!(cache.get("/a").await.is_none()); + assert!(cache.get("/a/b").await.is_none()); + assert!(cache.get("/a/b/c").await.is_none()); + + // /d should still exist + assert!(cache.get("/d").await.is_some()); + } + + #[tokio::test] + async fn test_cache_lru() { + let cache = ListDirCache::new(3, 5, true); + + cache.put("a".to_string(), vec![]).await; + cache.put("b".to_string(), vec![]).await; + cache.put("c".to_string(), vec![]).await; + + // Access 'a' to make it most recently used + cache.get("a").await; + + // Add 'd', should evict 'b' (least recently used) + cache.put("d".to_string(), vec![]).await; + + assert!(cache.get("a").await.is_some()); + assert!(cache.get("c").await.is_some()); + assert!(cache.get("d").await.is_some()); + assert!(cache.get("b").await.is_none()); + } + + #[test] + fn test_is_descendant() { + assert!(!is_descendant("/a", "/a")); + assert!(is_descendant("/a/b", "/a")); + assert!(is_descendant("/a/b/c", "/a")); + assert!(!is_descendant("/ab/c", "/a")); + assert!(!is_descendant("/b", "/a")); + + // Root special case + assert!(!is_descendant("/", "/")); + assert!(is_descendant("/a", "/")); + assert!(is_descendant("/a/b", "/")); + } + + #[test] + fn test_parent_path() { + assert_eq!(parent_path("/"), "/"); + assert_eq!(parent_path("/file.txt"), "/"); + assert_eq!(parent_path("/dir/"), "/"); + assert_eq!(parent_path("/dir/file.txt"), "/dir"); + assert_eq!(parent_path("/a/b/c/file.txt"), "/a/b/c"); + } +} diff --git a/crates/ragfs/src/plugins/sqlfs/mod.rs b/crates/ragfs/src/plugins/sqlfs/mod.rs new file mode 100644 index 000000000..6639908f6 --- /dev/null +++ b/crates/ragfs/src/plugins/sqlfs/mod.rs @@ -0,0 +1,865 @@ +//! SQLFS - Database-backed File System +//! +//! This module provides a persistent file system implementation backed by +//! SQLite or MySQL/TiDB. Features include: +//! +//! - Persistent storage (survives server restarts) +//! - ACID transactions +//! - LRU cache for directory listings +//! - Multiple database backends +//! - Maximum file size limit (5MB) + +pub mod backend; +pub mod cache; + +use async_trait::async_trait; +use backend::{create_backend, DatabaseBackend, MAX_FILE_SIZE, MAX_FILE_SIZE_MB}; +use cache::ListDirCache; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// SQLFS - Database-backed file system +pub struct SQLFileSystem { + backend: Arc>>, + cache: ListDirCache, +} + +impl SQLFileSystem { + /// Create a new SQLFS instance + /// + /// # Arguments + /// * `config` - Plugin configuration containing database connection parameters + pub fn new(config: &PluginConfig) -> Result { + // Create database backend (schema init and optimizations happen inside) + let backend = create_backend(&config.params)?; + + tracing::info!( + "SQLFS backend created: {}", + backend.driver_name(), + ); + + // Create cache from config + let cache_enabled = config + .params + .get("cache_enabled") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let cache_max_size = config + .params + .get("cache_max_size") + .and_then(|v| v.as_int()) + .unwrap_or(1000) as usize; + + let cache_ttl = config + .params + .get("cache_ttl_seconds") + .and_then(|v| v.as_int()) + .unwrap_or(5); + + let cache = ListDirCache::new(cache_max_size, cache_ttl as u64, cache_enabled); + + tracing::info!( + "SQLFS initialized with backend: {}, cache: {} (max_size: {}, ttl: {}s)", + backend.driver_name(), + cache_enabled, + cache_max_size, + cache_ttl + ); + + Ok(Self { + backend: Arc::new(RwLock::new(backend)), + cache, + }) + } + + /// Normalize path to ensure consistent format + fn normalize_path(path: &str) -> String { + if path.is_empty() || path == "/" { + return "/".to_string(); + } + + // Ensure starts with / + let mut result = if path.starts_with('/') { + path.to_string() + } else { + format!("/{}", path) + }; + + // Remove trailing slash (except for root) + if result.len() > 1 && result.ends_with('/') { + result.pop(); + } + + // Collapse double slashes + while result.contains("//") { + result = result.replace("//", "/"); + } + + result + } + + /// Get file name from full path + fn file_name(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + let normalized = Self::normalize_path(path); + normalized + .rsplit('/') + .next() + .unwrap_or("") + .to_string() + } +} + +impl Default for SQLFileSystem { + fn default() -> Self { + // Create with default SQLite in-memory database + let config = PluginConfig { + name: "sqlfs".to_string(), + mount_path: "/sqlfs".to_string(), + params: HashMap::new(), + }; + + Self::new(&config).expect("Failed to create default SQLFS") + } +} + +#[async_trait] +impl FileSystem for SQLFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + // Check parent directory exists + let parent = backend.parent_path(&normalized); + if parent != "/" { + match backend.is_directory(&parent)? { + true => {} + false => { + if backend.path_exists(&parent)? { + return Err(Error::NotADirectory(parent)); + } + return Err(Error::not_found(&parent)); + } + } + } + + // Check if file already exists + if backend.path_exists(&normalized)? { + return Err(Error::already_exists(&normalized)); + } + + // Create empty file + backend.create_file(&normalized, 0o644, &[])?; + + // Invalidate parent cache + self.cache.invalidate_parent(&normalized).await; + + Ok(()) + } + + async fn mkdir(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + // Check parent directory exists + let parent = backend.parent_path(&normalized); + if parent != "/" { + match backend.is_directory(&parent)? { + true => {} + false => { + if backend.path_exists(&parent)? { + return Err(Error::NotADirectory(parent)); + } + return Err(Error::not_found(&parent)); + } + } + } + + // Check if directory already exists + if backend.path_exists(&normalized)? { + return Err(Error::already_exists(&normalized)); + } + + // Create directory + let mode_to_use = if mode == 0 { 0o755 } else { mode }; + backend.create_directory(&normalized, mode_to_use)?; + + // Invalidate parent cache + self.cache.invalidate_parent(&normalized).await; + + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + + if normalized == "/" { + return Err(Error::invalid_operation("cannot remove root directory")); + } + + let backend = self.backend.read().await; + + // Check if exists + if !backend.path_exists(&normalized)? { + return Err(Error::not_found(&normalized)); + } + + // Check if it's a directory + if backend.is_directory(&normalized)? { + // Check if directory is empty + let pattern = format!("{}/%", normalized); + let child_count = backend.count_by_pattern(&pattern)?; + if child_count > 0 { + return Err(Error::DirectoryNotEmpty(normalized)); + } + } + + // Delete entry + backend.delete_entry(&normalized)?; + + // Invalidate caches + self.cache.invalidate_parent(&normalized).await; + self.cache.invalidate(&normalized).await; + + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + const BATCH_SIZE: usize = 1000; + + if normalized == "/" { + // Delete all children except root + loop { + let deleted = backend.delete_entries_by_pattern("/%", Some("/"))?; + if deleted == 0 || deleted < BATCH_SIZE { + break; + } + } + self.cache.invalidate_prefix("/").await; + return Ok(()); + } + + // Delete path and all children + loop { + let pattern = format!("{}/%", normalized); + let deleted = backend.delete_entries_by_pattern(&pattern, None)?; + if deleted == 0 || deleted < BATCH_SIZE { + break; + } + } + + // Delete the entry itself + backend.delete_entry(&normalized)?; + + // Invalidate caches + self.cache.invalidate_parent(&normalized).await; + self.cache.invalidate_prefix(&normalized).await; + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + match backend.read_file(&normalized)? { + Some((is_dir, data)) => { + if is_dir { + return Err(Error::IsADirectory(normalized)); + } + + // Apply offset and size + let data_len = data.len(); + let offset = offset as usize; + + if offset >= data_len { + return Ok(Vec::new()); + } + + let end = if size == 0 { + data_len + } else { + std::cmp::min(offset + size as usize, data_len) + }; + + Ok(data[offset..end].to_vec()) + } + None => Err(Error::not_found(&normalized)), + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let normalized = Self::normalize_path(path); + + // Check file size limit + if data.len() > MAX_FILE_SIZE { + return Err(Error::invalid_operation(format!( + "file size exceeds maximum limit of {}MB (got {} bytes)", + MAX_FILE_SIZE_MB, + data.len() + ))); + } + + // SQLFS doesn't support offset writes (like object store) + if offset > 0 { + return Err(Error::invalid_operation( + "SQLFS does not support offset writes", + )); + } + + let backend = self.backend.read().await; + + let exists = backend.path_exists(&normalized)?; + + if exists { + // Check if it's a directory + if backend.is_directory(&normalized)? { + return Err(Error::IsADirectory(normalized)); + } + + // Update existing file + backend.update_file(&normalized, data)?; + } else { + // Create new file + if !matches!(flags, WriteFlag::Create) { + return Err(Error::not_found(&normalized)); + } + + // Check parent exists + let parent = backend.parent_path(&normalized); + if parent != "/" { + if !backend.is_directory(&parent)? { + return Err(Error::not_found(&parent)); + } + } + + backend.create_file(&normalized, 0o644, data)?; + + // Invalidate parent cache + self.cache.invalidate_parent(&normalized).await; + } + + Ok(data.len() as u64) + } + + async fn read_dir(&self, path: &str) -> Result> { + let normalized = Self::normalize_path(path); + + // Try cache first + if let Some(files) = self.cache.get(&normalized).await { + return Ok(files); + } + + let backend = self.backend.read().await; + + // Check if directory exists + if !backend.path_exists(&normalized)? { + return Err(Error::not_found(&normalized)); + } + + if !backend.is_directory(&normalized)? { + return Err(Error::NotADirectory(normalized)); + } + + // List directory + let entries = backend.list_directory(&normalized)?; + + // Convert to FileInfo + let mut files = Vec::new(); + for entry in entries { + files.push(FileInfo { + name: Self::file_name(&entry.path), + size: entry.size as u64, + mode: entry.mode, + mod_time: std::time::UNIX_EPOCH + .checked_add(std::time::Duration::from_secs(entry.mod_time as u64)) + .unwrap_or(std::time::UNIX_EPOCH), + is_dir: entry.is_dir, + }); + } + + // Cache the result + self.cache.put(normalized.clone(), files.clone()).await; + + Ok(files) + } + + async fn stat(&self, path: &str) -> Result { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + match backend.get_metadata(&normalized)? { + Some(meta) => Ok(FileInfo { + name: Self::file_name(&normalized), + size: meta.size as u64, + mode: meta.mode, + mod_time: std::time::UNIX_EPOCH + .checked_add(std::time::Duration::from_secs(meta.mod_time as u64)) + .unwrap_or(std::time::UNIX_EPOCH), + is_dir: meta.is_dir, + }), + None => Err(Error::not_found(&normalized)), + } + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_normalized = Self::normalize_path(old_path); + let new_normalized = Self::normalize_path(new_path); + + if old_normalized == "/" || new_normalized == "/" { + return Err(Error::invalid_operation("cannot rename root directory")); + } + + let backend = self.backend.read().await; + + // Check old path exists + if !backend.path_exists(&old_normalized)? { + return Err(Error::not_found(&old_normalized)); + } + + // Check new path doesn't exist + if backend.path_exists(&new_normalized)? { + return Err(Error::already_exists(&new_normalized)); + } + + // Check new parent exists + let new_parent = backend.parent_path(&new_normalized); + if new_parent != "/" { + if !backend.is_directory(&new_parent)? { + return Err(Error::not_found(&new_parent)); + } + } + + // Rename entry + backend.rename_path(&old_normalized, &new_normalized)?; + + // If it's a directory, rename children + if backend.is_directory(&new_normalized)? { + backend.rename_children(&old_normalized, &new_normalized)?; + } + + // Invalidate caches + self.cache.invalidate_parent(&old_normalized).await; + self.cache.invalidate_parent(&new_normalized).await; + self.cache.invalidate(&old_normalized).await; + self.cache.invalidate_prefix(&old_normalized).await; + + Ok(()) + } + + async fn chmod(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + if !backend.path_exists(&normalized)? { + return Err(Error::not_found(&normalized)); + } + + backend.update_mode(&normalized, mode)?; + Ok(()) + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + match backend.read_file(&normalized)? { + Some((is_dir, mut data)) => { + if is_dir { + return Err(Error::IsADirectory(normalized)); + } + + data.resize(size as usize, 0); + backend.update_file(&normalized, &data)?; + Ok(()) + } + None => Err(Error::not_found(&normalized)), + } + } +} + +/// SQLFS Plugin +pub struct SQLFSPlugin { + config_params: Vec, +} + +impl SQLFSPlugin { + /// Create a new SQLFSPlugin + pub fn new() -> Self { + Self { + config_params: vec![ + ConfigParameter::optional( + "backend", + "string", + "sqlite", + "Database backend (sqlite, mysql, tidb)", + ), + ConfigParameter::optional( + "db_path", + "string", + ":memory:", + "Database file path (SQLite only)", + ), + ConfigParameter::optional( + "host", + "string", + "127.0.0.1", + "Database host (MySQL/TiDB)", + ), + ConfigParameter::optional("port", "int", "3306", "Database port (MySQL/TiDB)"), + ConfigParameter::optional( + "user", + "string", + "root", + "Database user (MySQL/TiDB)", + ), + ConfigParameter::optional( + "password", + "string", + "", + "Database password (MySQL/TiDB)", + ), + ConfigParameter::optional( + "database", + "string", + "sqlfs", + "Database name (MySQL/TiDB)", + ), + ConfigParameter::optional( + "cache_enabled", + "bool", + "true", + "Enable directory listing cache", + ), + ConfigParameter::optional( + "cache_max_size", + "int", + "1000", + "Maximum cache entries", + ), + ConfigParameter::optional( + "cache_ttl_seconds", + "int", + "5", + "Cache TTL in seconds", + ), + ], + } + } +} + +impl Default for SQLFSPlugin { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl ServicePlugin for SQLFSPlugin { + fn name(&self) -> &str { + "sqlfs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "Database-backed file system with SQLite and MySQL/TiDB support" + } + + fn readme(&self) -> &str { + r#"# SQLFS - Database-backed File System + +A persistent file system backed by SQLite or MySQL/TiDB. + +## Features + +- Persistent storage (survives server restarts) +- Full POSIX-like file system operations +- Multiple database backends (SQLite, MySQL, TiDB) +- ACID transactions +- LRU cache for directory listings +- Maximum file size: 5MB + +## Configuration + +### SQLite Backend (Local Testing) +```yaml +plugins: + sqlfs: + enabled: true + path: /sqlfs + config: + backend: sqlite + db_path: sqlfs.db + cache_enabled: true + cache_max_size: 1000 + cache_ttl_seconds: 5 +``` + +### MySQL/TiDB Backend +```yaml +plugins: + sqlfs: + enabled: true + path: /sqlfs + config: + backend: mysql + host: localhost + port: 3306 + user: root + password: password + database: sqlfs + cache_enabled: true +``` + +## Usage + +Create a directory: +``` +agfs mkdir /sqlfs/mydir +``` + +Write a file: +``` +echo "Hello, World!" | agfs write /sqlfs/mydir/file.txt +``` + +Read a file: +``` +agfs cat /sqlfs/mydir/file.txt +``` + +List directory: +``` +agfs ls /sqlfs/mydir +``` + +## Notes + +- SQLFS does not support offset writes (like object store) +- Maximum file size is 5MB per file +- Use MemFS or StreamFS for larger files +"# + } + + async fn validate(&self, config: &PluginConfig) -> Result<()> { + // Validate backend type + let backend = config + .params + .get("backend") + .and_then(|v| v.as_string()) + .unwrap_or("sqlite"); + + let valid_backends = ["sqlite", "sqlite3", "mysql", "tidb"]; + if !valid_backends.contains(&backend) { + return Err(Error::config(format!( + "unsupported backend: {} (valid: {})", + backend, + valid_backends.join(", ") + ))); + } + + // Validate cache settings if provided + if let Some(v) = config.params.get("cache_enabled") { + v.as_bool() + .ok_or_else(|| Error::config("cache_enabled must be a boolean"))?; + } + + if let Some(v) = config.params.get("cache_max_size") { + v.as_int() + .ok_or_else(|| Error::config("cache_max_size must be an integer"))?; + } + + if let Some(v) = config.params.get("cache_ttl_seconds") { + v.as_int() + .ok_or_else(|| Error::config("cache_ttl_seconds must be an integer"))?; + } + + Ok(()) + } + + async fn initialize(&self, config: PluginConfig) -> Result> { + let fs = SQLFileSystem::new(&config)?; + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_sqlfs_basic() { + let config = PluginConfig { + name: "sqlfs".to_string(), + mount_path: "/sqlfs".to_string(), + params: std::collections::HashMap::new(), + }; + + let plugin = SQLFSPlugin::new(); + assert!(plugin.validate(&config).await.is_ok()); + + let fs = plugin.initialize(config).await.unwrap(); + + // Create and write + fs.write("/test.txt", b"hello", 0, WriteFlag::Create) + .await + .unwrap(); + + // Read + let data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"hello"); + + // Stat + let info = fs.stat("/test.txt").await.unwrap(); + assert_eq!(info.size, 5); + assert!(!info.is_dir); + } + + #[tokio::test] + async fn test_sqlfs_directories() { + let fs = SQLFileSystem::default(); + + // Create directory + fs.mkdir("/testdir", 0o755).await.unwrap(); + + // Create file in directory + fs.write("/testdir/file.txt", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + + // List directory + let entries = fs.read_dir("/testdir").await.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].name, "file.txt"); + + // Cannot remove non-empty directory + assert!(fs.remove("/testdir").await.is_err()); + + // Can remove with remove_all + fs.remove_all("/testdir").await.unwrap(); + assert!(fs.stat("/testdir").await.is_err()); + } + + #[tokio::test] + async fn test_sqlfs_rename() { + let fs = SQLFileSystem::default(); + + fs.write("/old.txt", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + + fs.rename("/old.txt", "/new.txt").await.unwrap(); + + assert!(fs.stat("/old.txt").await.is_err()); + let data = fs.read("/new.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"data"); + } + + #[tokio::test] + async fn test_sqlfs_truncate() { + let fs = SQLFileSystem::default(); + + fs.write("/trunc.txt", b"hello world", 0, WriteFlag::Create) + .await + .unwrap(); + + fs.truncate("/trunc.txt", 5).await.unwrap(); + + let data = fs.read("/trunc.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"hello"); + } + + #[tokio::test] + async fn test_sqlfs_file_size_limit() { + let fs = SQLFileSystem::default(); + + // Create data larger than MAX_FILE_SIZE + let big_data = vec![0u8; MAX_FILE_SIZE + 1]; + + let result = fs.write("/big.txt", &big_data, 0, WriteFlag::Create).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_sqlfs_offset_write_rejected() { + let fs = SQLFileSystem::default(); + + let result = fs.write("/test.txt", b"data", 10, WriteFlag::Create).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_sqlfs_nested_directories() { + let fs = SQLFileSystem::default(); + + fs.mkdir("/a", 0o755).await.unwrap(); + fs.mkdir("/a/b", 0o755).await.unwrap(); + fs.write("/a/b/file.txt", b"nested", 0, WriteFlag::Create) + .await + .unwrap(); + + // List /a should only show /a/b + let entries = fs.read_dir("/a").await.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].name, "b"); + assert!(entries[0].is_dir); + + // Read nested file + let data = fs.read("/a/b/file.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"nested"); + } + + #[tokio::test] + async fn test_sqlfs_read_with_offset_and_size() { + let fs = SQLFileSystem::default(); + + fs.write("/range.txt", b"hello world", 0, WriteFlag::Create) + .await + .unwrap(); + + // Read with offset + let data = fs.read("/range.txt", 6, 0).await.unwrap(); + assert_eq!(data, b"world"); + + // Read with offset and size + let data = fs.read("/range.txt", 0, 5).await.unwrap(); + assert_eq!(data, b"hello"); + + // Read beyond end + let data = fs.read("/range.txt", 100, 0).await.unwrap(); + assert!(data.is_empty()); + } + + #[tokio::test] + async fn test_sqlfs_chmod() { + let fs = SQLFileSystem::default(); + + fs.write("/perm.txt", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + + fs.chmod("/perm.txt", 0o600).await.unwrap(); + + let info = fs.stat("/perm.txt").await.unwrap(); + assert_eq!(info.mode, 0o600); + } +} diff --git a/crates/ragfs/src/server/config.rs b/crates/ragfs/src/server/config.rs new file mode 100644 index 000000000..f8aea2dda --- /dev/null +++ b/crates/ragfs/src/server/config.rs @@ -0,0 +1,125 @@ +//! Server configuration module +//! +//! This module handles server configuration including address binding, +//! logging levels, and other runtime settings. + +use clap::Parser; +use serde::{Deserialize, Serialize}; +use std::net::SocketAddr; + +/// Server configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ServerConfig { + /// Server bind address + pub address: String, + + /// Log level (trace, debug, info, warn, error) + pub log_level: String, + + /// Enable CORS + pub enable_cors: bool, +} + +impl Default for ServerConfig { + fn default() -> Self { + Self { + address: "0.0.0.0:8080".to_string(), + log_level: "info".to_string(), + enable_cors: true, + } + } +} + +impl ServerConfig { + /// Parse server address into SocketAddr + pub fn socket_addr(&self) -> Result { + self.address.parse().map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("Invalid address '{}': {}", self.address, e), + ) + }) + } +} + +/// Command-line arguments +#[derive(Debug, Parser)] +#[command(name = "ragfs-server")] +#[command(about = "RAGFS HTTP Server", long_about = None)] +pub struct Args { + /// Server bind address + #[arg(short, long, default_value = "0.0.0.0:8080", env = "RAGFS_ADDRESS")] + pub address: String, + + /// Log level + #[arg(short, long, default_value = "info", env = "RAGFS_LOG_LEVEL")] + pub log_level: String, + + /// Configuration file path (optional) + #[arg(short, long, env = "RAGFS_CONFIG")] + pub config: Option, + + /// Enable CORS + #[arg(long, default_value = "true", env = "RAGFS_ENABLE_CORS")] + pub enable_cors: bool, +} + +impl Args { + /// Convert Args to ServerConfig + pub fn to_config(&self) -> ServerConfig { + ServerConfig { + address: self.address.clone(), + log_level: self.log_level.clone(), + enable_cors: self.enable_cors, + } + } + + /// Load configuration from file if specified, otherwise use CLI args + pub fn load_config(&self) -> Result> { + if let Some(config_path) = &self.config { + // Load from YAML file + let content = std::fs::read_to_string(config_path)?; + let config: ServerConfig = serde_yaml::from_str(&content)?; + Ok(config) + } else { + // Use CLI args + Ok(self.to_config()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_config() { + let config = ServerConfig::default(); + assert_eq!(config.address, "0.0.0.0:8080"); + assert_eq!(config.log_level, "info"); + assert!(config.enable_cors); + } + + #[test] + fn test_socket_addr_parsing() { + let config = ServerConfig { + address: "127.0.0.1:3000".to_string(), + log_level: "debug".to_string(), + enable_cors: false, + }; + + let addr = config.socket_addr().unwrap(); + assert_eq!(addr.port(), 3000); + } + + #[test] + fn test_invalid_socket_addr() { + let config = ServerConfig { + address: "invalid".to_string(), + log_level: "info".to_string(), + enable_cors: true, + }; + + assert!(config.socket_addr().is_err()); + } +} diff --git a/crates/ragfs/src/server/handlers.rs b/crates/ragfs/src/server/handlers.rs new file mode 100644 index 000000000..a64e16f7b --- /dev/null +++ b/crates/ragfs/src/server/handlers.rs @@ -0,0 +1,359 @@ +//! HTTP handlers for RAGFS API +//! +//! This module implements all HTTP request handlers for the RAGFS REST API. + +use axum::{ + extract::{Query, State}, + http::StatusCode, + response::{IntoResponse, Response}, + Json, +}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +use crate::core::{FileSystem, MountableFS, PluginConfig, WriteFlag}; + +/// Shared application state +#[derive(Clone)] +pub struct AppState { + /// The mounted filesystem + pub fs: Arc, +} + +/// Standard API response +#[derive(Debug, Serialize)] +pub struct ApiResponse { + /// Whether the operation succeeded + pub success: bool, + /// Response data (if successful) + #[serde(skip_serializing_if = "Option::is_none")] + pub data: Option, + /// Error message (if failed) + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +impl ApiResponse { + /// Create a successful response + pub fn success(data: T) -> Self { + Self { + success: true, + data: Some(data), + error: None, + } + } + + /// Create an error response + pub fn error(message: impl Into) -> ApiResponse<()> { + ApiResponse { + success: false, + data: None, + error: Some(message.into()), + } + } +} + +/// Query parameters for file operations +#[derive(Debug, Deserialize)] +pub struct FileQuery { + /// File path + pub path: String, + /// Read offset in bytes + #[serde(default)] + pub offset: u64, + /// Number of bytes to read (0 = all) + #[serde(default)] + pub size: u64, +} + +/// Query parameters for directory operations +#[derive(Debug, Deserialize)] +pub struct DirQuery { + /// Directory path + pub path: String, +} + +/// Request body for mount operation +#[derive(Debug, Deserialize)] +pub struct MountRequest { + /// Plugin name + pub plugin: String, + /// Mount path + pub path: String, + /// Plugin configuration parameters + #[serde(default)] + pub params: std::collections::HashMap, +} + +/// Request body for unmount operation +#[derive(Debug, Deserialize)] +pub struct UnmountRequest { + /// Mount path to unmount + pub path: String, +} + +/// Health check response +#[derive(Debug, Serialize)] +pub struct HealthResponse { + /// Health status + pub status: String, + /// Server version + pub version: String, +} + +/// Mount info response +#[derive(Debug, Serialize)] +pub struct MountInfo { + /// Mount path + pub path: String, + /// Plugin name + pub plugin: String, +} + +// ============================================================================ +// File Operations Handlers +// ============================================================================ + +/// GET /api/v1/files - Read file +pub async fn read_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.read(&query.path, query.offset, query.size).await { + Ok(data) => (StatusCode::OK, data).into_response(), + Err(e) => ( + StatusCode::NOT_FOUND, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// PUT /api/v1/files - Write file +pub async fn write_file( + State(state): State, + Query(query): Query, + body: bytes::Bytes, +) -> Response { + match state + .fs + .write(&query.path, &body, query.offset, WriteFlag::None) + .await + { + Ok(written) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "bytes_written": written + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// POST /api/v1/files - Create file +pub async fn create_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.create(&query.path).await { + Ok(_) => ( + StatusCode::CREATED, + Json(ApiResponse::success(serde_json::json!({ + "path": query.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// DELETE /api/v1/files - Delete file +pub async fn delete_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.remove(&query.path).await { + Ok(_) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "path": query.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// GET /api/v1/stat - Get file metadata +pub async fn stat_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.stat(&query.path).await { + Ok(info) => (StatusCode::OK, Json(ApiResponse::success(info))).into_response(), + Err(e) => ( + StatusCode::NOT_FOUND, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +// ============================================================================ +// Directory Operations Handlers +// ============================================================================ + +/// GET /api/v1/directories - List directory +pub async fn list_directory( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.read_dir(&query.path).await { + Ok(entries) => (StatusCode::OK, Json(ApiResponse::success(entries))).into_response(), + Err(e) => ( + StatusCode::NOT_FOUND, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// POST /api/v1/directories - Create directory +pub async fn create_directory( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.mkdir(&query.path, 0o755).await { + Ok(_) => ( + StatusCode::CREATED, + Json(ApiResponse::success(serde_json::json!({ + "path": query.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +// ============================================================================ +// Mount Management Handlers +// ============================================================================ + +/// GET /api/v1/mounts - List all mounts +pub async fn list_mounts(State(state): State) -> Response { + let mounts = state.fs.list_mounts().await; + let mount_infos: Vec = mounts + .into_iter() + .map(|(path, plugin)| MountInfo { path, plugin }) + .collect(); + + (StatusCode::OK, Json(ApiResponse::success(mount_infos))).into_response() +} + +/// POST /api/v1/mount - Mount a filesystem +pub async fn mount_filesystem( + State(state): State, + Json(req): Json, +) -> Response { + // Convert JSON params to ConfigValue + let params = req + .params + .into_iter() + .map(|(k, v)| { + let config_value = match v { + serde_json::Value::String(s) => crate::core::ConfigValue::String(s), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + crate::core::ConfigValue::Int(i) + } else { + crate::core::ConfigValue::String(n.to_string()) + } + } + serde_json::Value::Bool(b) => crate::core::ConfigValue::Bool(b), + serde_json::Value::Array(arr) => { + let strings: Vec = arr + .into_iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect(); + crate::core::ConfigValue::StringList(strings) + } + _ => crate::core::ConfigValue::String(v.to_string()), + }; + (k, config_value) + }) + .collect(); + + let config = PluginConfig { + name: req.plugin.clone(), + mount_path: req.path.clone(), + params, + }; + + match state.fs.mount(config).await { + Ok(_) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "plugin": req.plugin, + "path": req.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// POST /api/v1/unmount - Unmount a filesystem +pub async fn unmount_filesystem( + State(state): State, + Json(req): Json, +) -> Response { + match state.fs.unmount(&req.path).await { + Ok(_) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "path": req.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +// ============================================================================ +// Health Check Handler +// ============================================================================ + +/// GET /api/v1/health - Health check +pub async fn health_check() -> Response { + let response = HealthResponse { + status: "healthy".to_string(), + version: crate::VERSION.to_string(), + }; + + (StatusCode::OK, Json(ApiResponse::success(response))).into_response() +} diff --git a/crates/ragfs/src/server/main.rs b/crates/ragfs/src/server/main.rs new file mode 100644 index 000000000..0b71a4cf4 --- /dev/null +++ b/crates/ragfs/src/server/main.rs @@ -0,0 +1,88 @@ +//! RAGFS Server +//! +//! HTTP server that exposes the RAGFS filesystem through a REST API. + +use clap::Parser; +use ragfs::core::MountableFS; +use ragfs::plugins::{KVFSPlugin, MemFSPlugin, QueueFSPlugin, SQLFSPlugin}; +#[cfg(feature = "s3")] +use ragfs::plugins::S3FSPlugin; +use ragfs::server::{create_router, AppState, Args}; +use std::sync::Arc; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Parse command-line arguments + let args = Args::parse(); + + // Load configuration + let config = args.load_config()?; + + // Initialize tracing/logging + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| config.log_level.clone().into()), + ) + .with(tracing_subscriber::fmt::layer()) + .init(); + + tracing::info!("Starting RAGFS Server v{}", ragfs::VERSION); + tracing::info!("Configuration: {:?}", config); + + // Create MountableFS + let fs = Arc::new(MountableFS::new()); + + // Register built-in plugins + tracing::info!("Registering plugins..."); + fs.register_plugin(MemFSPlugin).await; + tracing::info!(" - memfs: In-memory file system"); + fs.register_plugin(KVFSPlugin).await; + tracing::info!(" - kvfs: Key-value file system"); + fs.register_plugin(QueueFSPlugin).await; + tracing::info!(" - queuefs: Message queue file system"); + fs.register_plugin(SQLFSPlugin::new()).await; + tracing::info!(" - sqlfs: Database-backed file system (SQLite)"); + #[cfg(feature = "s3")] + { + fs.register_plugin(S3FSPlugin::new()).await; + tracing::info!(" - s3fs: S3-backed file system"); + } + + // Create application state + let state = AppState { fs: fs.clone() }; + + // Create router + let app = create_router(state, config.enable_cors); + + // Parse socket address + let addr = config.socket_addr()?; + + tracing::info!("Server listening on {}", addr); + tracing::info!("API endpoints:"); + tracing::info!(" GET /api/v1/health"); + tracing::info!(" GET /api/v1/files?path="); + tracing::info!(" PUT /api/v1/files?path="); + tracing::info!(" POST /api/v1/files?path="); + tracing::info!(" DELETE /api/v1/files?path="); + tracing::info!(" GET /api/v1/stat?path="); + tracing::info!(" GET /api/v1/directories?path="); + tracing::info!(" POST /api/v1/directories?path="); + tracing::info!(" GET /api/v1/mounts"); + tracing::info!(" POST /api/v1/mount"); + tracing::info!(" POST /api/v1/unmount"); + tracing::info!(""); + tracing::info!("Example: Mount MemFS"); + tracing::info!(" curl -X POST http://{}//api/v1/mount \\", addr); + tracing::info!(" -H 'Content-Type: application/json' \\"); + tracing::info!(" -d '{{\"plugin\": \"memfs\", \"path\": \"/memfs\"}}'"); + + // Create TCP listener + let listener = tokio::net::TcpListener::bind(addr).await?; + + // Start server + axum::serve(listener, app).await?; + + Ok(()) +} diff --git a/crates/ragfs/src/server/mod.rs b/crates/ragfs/src/server/mod.rs new file mode 100644 index 000000000..832c4a5a2 --- /dev/null +++ b/crates/ragfs/src/server/mod.rs @@ -0,0 +1,9 @@ +//! Server module for RAGFS HTTP API + +pub mod config; +pub mod handlers; +pub mod router; + +pub use config::{Args, ServerConfig}; +pub use handlers::AppState; +pub use router::create_router; diff --git a/crates/ragfs/src/server/router.rs b/crates/ragfs/src/server/router.rs new file mode 100644 index 000000000..2d140dde8 --- /dev/null +++ b/crates/ragfs/src/server/router.rs @@ -0,0 +1,73 @@ +//! Router configuration for RAGFS HTTP server +//! +//! This module sets up all the routes and middleware for the API. + +use axum::{ + routing::{delete, get, post, put}, + Router, +}; +use tower_http::{ + cors::CorsLayer, + trace::{DefaultMakeSpan, DefaultOnResponse, TraceLayer}, +}; +use tracing::Level; + +use super::handlers::{ + create_directory, create_file, delete_file, health_check, list_directory, list_mounts, + mount_filesystem, read_file, stat_file, unmount_filesystem, write_file, AppState, +}; + +/// Create the main application router +pub fn create_router(state: AppState, enable_cors: bool) -> Router { + let api_routes = Router::new() + // File operations + .route("/files", get(read_file)) + .route("/files", put(write_file)) + .route("/files", post(create_file)) + .route("/files", delete(delete_file)) + .route("/stat", get(stat_file)) + // Directory operations + .route("/directories", get(list_directory)) + .route("/directories", post(create_directory)) + // Mount management + .route("/mounts", get(list_mounts)) + .route("/mount", post(mount_filesystem)) + .route("/unmount", post(unmount_filesystem)) + // Health check + .route("/health", get(health_check)); + + let app = Router::new() + .nest("/api/v1", api_routes) + .with_state(state); + + // Add tracing middleware + let app = app.layer( + TraceLayer::new_for_http() + .make_span_with(DefaultMakeSpan::new().level(Level::INFO)) + .on_response(DefaultOnResponse::new().level(Level::INFO)), + ); + + // Add CORS if enabled + if enable_cors { + app.layer(CorsLayer::permissive()) + } else { + app + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::MountableFS; + use std::sync::Arc; + + #[test] + fn test_router_creation() { + let state = AppState { + fs: Arc::new(MountableFS::new()), + }; + + let _router = create_router(state, true); + // If this compiles and runs, the router is correctly configured + } +} diff --git a/crates/ragfs/src/shell/main.rs b/crates/ragfs/src/shell/main.rs new file mode 100644 index 000000000..a40c5be02 --- /dev/null +++ b/crates/ragfs/src/shell/main.rs @@ -0,0 +1,8 @@ +//! RAGFS Shell +//! +//! Interactive command-line shell for RAGFS. + +fn main() { + println!("RAGFS Shell - Coming soon!"); + println!("This will be implemented in Phase 9 of the migration plan."); +} diff --git a/openviking/pyagfs/__init__.py b/openviking/pyagfs/__init__.py index 7d8b48fc0..a74e2b6c9 100644 --- a/openviking/pyagfs/__init__.py +++ b/openviking/pyagfs/__init__.py @@ -12,15 +12,19 @@ ) from .helpers import cp, download, upload -# Binding client depends on a native shared library (libagfsbinding.so/dylib/dll). -# Make it optional so the pure-HTTP AGFSClient remains usable when the native -# library is not installed (e.g. Docker images without CGO build). +# Binding client: try Rust native (ragfs-python via PyO3) first, +# then fall back to Go ctypes binding (libagfsbinding.so/dylib/dll). try: - from .binding_client import AGFSBindingClient - from .binding_client import FileHandle as BindingFileHandle -except (ImportError, OSError): - AGFSBindingClient = None - BindingFileHandle = None + from ragfs_python import RAGFSBindingClient as AGFSBindingClient + + BindingFileHandle = None # FileHandle not yet implemented in ragfs-python +except ImportError: + try: + from .binding_client import AGFSBindingClient + from .binding_client import FileHandle as BindingFileHandle + except (ImportError, OSError): + AGFSBindingClient = None + BindingFileHandle = None __all__ = [ "AGFSClient", From d619155082e88a2905c4367f1855da69731a20dd Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 15:32:10 +0800 Subject: [PATCH 02/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- crates/ragfs/MIGRATION_PLAN.md | 15 ++- openviking/pyagfs/__init__.py | 107 ++++++++++++++++++--- openviking/utils/agfs_utils.py | 53 ++++++---- openviking_cli/utils/config/agfs_config.py | 12 +++ 4 files changed, 156 insertions(+), 31 deletions(-) diff --git a/crates/ragfs/MIGRATION_PLAN.md b/crates/ragfs/MIGRATION_PLAN.md index ad7cab553..dd3fbfb81 100644 --- a/crates/ragfs/MIGRATION_PLAN.md +++ b/crates/ragfs/MIGRATION_PLAN.md @@ -909,13 +909,23 @@ cd third_party/agfs/agfs-server && go run ./cmd/server --port 8080 --- -### Milestone 1.0: 功能完整 (8 周) +### Milestone 1.0: 功能完整 (8 周) 🔄 进行中 **目标**: 功能与 Go 版本对等 -- [ ] 提供 python wrapper,用于 OpenViking 内联集成 +- [x] 提供 Python wrapper (ragfs-python),用于 OpenViking 内联集成 - [ ] 支持切换和功能回滚,将默认实现切换为 Rust 版本 +**当前进展**: +- ragfs-python crate 已完成 (crates/ragfs-python/): PyO3 native binding +- RAGFSBindingClient 类,API 兼容 Go AGFSBindingClient +- 支持所有核心操作: ls/read/write/create/mkdir/rm/stat/mv/chmod/touch +- 支持 mount/unmount/mounts 插件管理 +- 所有内置插件可用: memfs, kvfs, queuefs, sqlfs +- maturin develop 构建集成 +- openviking/pyagfs/__init__.py 已更新: Rust 优先 -> Go fallback +- Python 端到端测试全部通过 (memfs + sqlfs + kvfs + queuefs) + --- ## 参考资源 @@ -947,6 +957,7 @@ cd third_party/agfs/agfs-server && go run ./cmd/server --port 8080 |------|------|---------| | 2026-04-03 | v1.0 | 初始计划创建 | | 2026-04-03 | v1.1 | 标注 Milestone 0.1/0.2 完成,阶段 1-6 完成;SQLFS 修复 18 个编译错误并通过所有测试;开始 Milestone 0.3 | +| 2026-04-03 | v1.2 | S3FS 完成并通过 MinIO 端到端验证;ragfs-python PyO3 binding 完成 (Milestone 1.0 开始) | --- diff --git a/openviking/pyagfs/__init__.py b/openviking/pyagfs/__init__.py index a74e2b6c9..a0d2a495b 100644 --- a/openviking/pyagfs/__init__.py +++ b/openviking/pyagfs/__init__.py @@ -2,6 +2,9 @@ __version__ = "0.1.7" +import logging +import os + from .client import AGFSClient, FileHandle from .exceptions import ( AGFSClientError, @@ -12,25 +15,105 @@ ) from .helpers import cp, download, upload -# Binding client: try Rust native (ragfs-python via PyO3) first, -# then fall back to Go ctypes binding (libagfsbinding.so/dylib/dll). -try: - from ragfs_python import RAGFSBindingClient as AGFSBindingClient +_logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Binding implementation selection via RAGFS_IMPL environment variable. +# +# RAGFS_IMPL=auto (default) — Rust first, Go fallback +# RAGFS_IMPL=rust — Rust only, error if unavailable +# RAGFS_IMPL=go — Go only, error if unavailable +# --------------------------------------------------------------------------- + +_RAGFS_IMPL_ENV = os.environ.get("RAGFS_IMPL", "").lower() or None + + +def _load_rust_binding(): + """Attempt to load the Rust (PyO3) binding client.""" + from ragfs_python import RAGFSBindingClient as _Rust + + return _Rust, None # FileHandle not yet implemented in ragfs-python + + +def _load_go_binding(): + """Attempt to load the Go (ctypes) binding client.""" + from .binding_client import AGFSBindingClient as _Go + from .binding_client import FileHandle as _GoFH + + return _Go, _GoFH + + +def _resolve_binding(impl: str): + """Return (AGFSBindingClient, BindingFileHandle) based on *impl*. + + *impl* should be one of ``"auto"``, ``"rust"``, or ``"go"``. + """ + + if impl == "rust": + try: + client, fh = _load_rust_binding() + _logger.info("RAGFS_IMPL=rust: loaded Rust binding") + return client, fh + except ImportError as exc: + raise ImportError( + "RAGFS_IMPL=rust but ragfs_python is not installed: " + str(exc) + ) from exc + + if impl == "go": + try: + client, fh = _load_go_binding() + _logger.info("RAGFS_IMPL=go: loaded Go binding") + return client, fh + except (ImportError, OSError) as exc: + raise ImportError( + "RAGFS_IMPL=go but Go binding (libagfsbinding) is not available: " + str(exc) + ) from exc + + if impl == "auto": + # Rust first, Go fallback, silent None if neither available + try: + client, fh = _load_rust_binding() + _logger.info("RAGFS_IMPL=auto: loaded Rust binding") + return client, fh + except ImportError: + pass + + try: + client, fh = _load_go_binding() + _logger.info("RAGFS_IMPL=auto: Rust unavailable, loaded Go binding") + return client, fh + except (ImportError, OSError): + pass + + _logger.warning( + "RAGFS_IMPL=auto: neither Rust nor Go binding available; AGFSBindingClient will be None" + ) + return None, None + + raise ValueError(f"Invalid RAGFS_IMPL value: '{impl}'. Must be one of: auto, rust, go") + + +def get_binding_client(config_impl: str = "auto"): + """Resolve binding classes with env-var override. + + Priority: ``RAGFS_IMPL`` env var > *config_impl* > ``"auto"`` + + Returns: + ``(AGFSBindingClient_class, BindingFileHandle_class)`` + """ + effective = _RAGFS_IMPL_ENV or config_impl or "auto" + return _resolve_binding(effective) + - BindingFileHandle = None # FileHandle not yet implemented in ragfs-python -except ImportError: - try: - from .binding_client import AGFSBindingClient - from .binding_client import FileHandle as BindingFileHandle - except (ImportError, OSError): - AGFSBindingClient = None - BindingFileHandle = None +# Module-level defaults (used when importing ``from openviking.pyagfs import AGFSBindingClient``) +AGFSBindingClient, BindingFileHandle = _resolve_binding(_RAGFS_IMPL_ENV or "auto") __all__ = [ "AGFSClient", "AGFSBindingClient", "FileHandle", "BindingFileHandle", + "get_binding_client", "AGFSClientError", "AGFSConnectionError", "AGFSTimeoutError", diff --git a/openviking/utils/agfs_utils.py b/openviking/utils/agfs_utils.py index b0415a42d..cf9e1b5d3 100644 --- a/openviking/utils/agfs_utils.py +++ b/openviking/utils/agfs_utils.py @@ -30,7 +30,11 @@ def create_agfs_client(agfs_config: Any) -> Any: if mode == "binding-client": # Import binding client if mode is binding-client - from openviking.pyagfs import AGFSBindingClient + # Use get_binding_client() to respect RAGFS_IMPL env var > config.impl > "auto" + from openviking.pyagfs import get_binding_client + + config_impl = getattr(agfs_config, "impl", "auto") + AGFSBindingClient, _ = get_binding_client(config_impl) if AGFSBindingClient is None: raise ImportError( @@ -39,24 +43,38 @@ def create_agfs_client(agfs_config: Any) -> Any: "to build and install the AGFS SDK with native bindings." ) - lib_path = getattr(agfs_config, "lib_path", None) - if lib_path and lib_path not in ["1", "default"]: - os.environ["AGFS_LIB_PATH"] = lib_path - else: - os.environ["AGFS_LIB_PATH"] = str(Path(__file__).parent.parent / "lib") - - # Check if binding library exists + # Go ctypes binding needs AGFS_LIB_PATH and a shared library on disk. + # Rust PyO3 binding is compiled into ragfs_python — skip library checks. + actual_lib_path = None try: - from openviking.pyagfs.binding_client import _find_library - - actual_lib_path = _find_library() - except Exception: - raise ImportError( - "AGFS binding library not found. Please run 'pip install -e .' in the project root to build and install the AGFS SDK." + from openviking.pyagfs.binding_client import ( + AGFSBindingClient as _GoBindingClient, ) + is_go_binding = AGFSBindingClient is _GoBindingClient + except (ImportError, OSError): + is_go_binding = False + + if is_go_binding: + lib_path = getattr(agfs_config, "lib_path", None) + if lib_path and lib_path not in ["1", "default"]: + os.environ["AGFS_LIB_PATH"] = lib_path + else: + os.environ["AGFS_LIB_PATH"] = str(Path(__file__).parent.parent / "lib") + + try: + from openviking.pyagfs.binding_client import _find_library + + actual_lib_path = _find_library() + except Exception: + raise ImportError( + "AGFS binding library not found. Please run 'pip install -e .' in the project root to build and install the AGFS SDK." + ) + client = AGFSBindingClient() - logger.info(f"[AGFSUtils] Created AGFSBindingClient (lib_path={actual_lib_path})") + logger.info( + f"[AGFSUtils] Created AGFSBindingClient (impl={config_impl}, lib_path={actual_lib_path})" + ) # Automatically mount backend for binding client mount_agfs_backend(client, agfs_config) @@ -82,10 +100,11 @@ def mount_agfs_backend(agfs: Any, agfs_config: Any) -> None: agfs_config: AGFS configuration object containing backend settings. """ from openviking.agfs_manager import AGFSManager - from openviking.pyagfs import AGFSBindingClient # Only binding-client needs manual mounting. HTTP server handles its own mounting. - if AGFSBindingClient is None or not isinstance(agfs, AGFSBindingClient): + # Check for the presence of a `mount` method as the duck-type indicator for + # binding clients (works for both Rust and Go implementations). + if not callable(getattr(agfs, "mount", None)): return # 1. Mount standard plugins to align with HTTP server behavior diff --git a/openviking_cli/utils/config/agfs_config.py b/openviking_cli/utils/config/agfs_config.py index bdbf80dcb..fb02331a1 100644 --- a/openviking_cli/utils/config/agfs_config.py +++ b/openviking_cli/utils/config/agfs_config.py @@ -103,6 +103,13 @@ class AGFSConfig(BaseModel): description="AGFS client mode: 'http-client' | 'binding-client'", ) + impl: str = Field( + default="auto", + description="Binding implementation to use when mode is 'binding-client'. " + "'auto' = Rust first with Go fallback, 'rust' = Rust only, 'go' = Go only. " + "Can be overridden by the RAGFS_IMPL environment variable.", + ) + backend: str = Field( default="local", description="AGFS storage backend: 'local' | 's3' | 'memory'" ) @@ -137,6 +144,11 @@ def validate_config(self): f"Invalid AGFS mode: '{self.mode}'. Must be one of: 'http-client', 'binding-client'" ) + if self.impl not in ["auto", "rust", "go"]: + raise ValueError( + f"Invalid AGFS impl: '{self.impl}'. Must be one of: 'auto', 'rust', 'go'" + ) + if self.backend not in ["local", "s3", "memory"]: raise ValueError( f"Invalid AGFS backend: '{self.backend}'. Must be one of: 'local', 's3', 'memory'" From d66ed78fd8e027e50e3a93f721a5ecf414b64ee3 Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 18:11:40 +0800 Subject: [PATCH 03/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- .github/workflows/_build.yml | 85 ++- Dockerfile | 29 + MANIFEST.in | 4 + Makefile | 33 ++ crates/ragfs-python/src/lib.rs | 8 +- crates/ragfs/src/core/types.rs | 15 +- crates/ragfs/src/plugins/localfs/mod.rs | 464 +++++++++++++++ crates/ragfs/src/plugins/mod.rs | 4 + crates/ragfs/src/plugins/queuefs/backend.rs | 310 ++++++++++ crates/ragfs/src/plugins/queuefs/mod.rs | 561 ++++++++++++++----- crates/ragfs/src/plugins/serverinfofs/mod.rs | 361 ++++++++++++ openviking/pyagfs/__init__.py | 47 +- openviking/utils/agfs_utils.py | 11 +- pyproject.toml | 2 + setup.py | 97 ++++ 15 files changed, 1886 insertions(+), 145 deletions(-) create mode 100644 crates/ragfs/src/plugins/localfs/mod.rs create mode 100644 crates/ragfs/src/plugins/queuefs/backend.rs create mode 100644 crates/ragfs/src/plugins/serverinfofs/mod.rs diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index b77e505a7..0ec5079f0 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -237,12 +237,50 @@ jobs: mkdir -p openviking/bin cp target/${{ matrix.arch == 'aarch64' && 'aarch64-unknown-linux-gnu' || 'x86_64-unknown-linux-gnu' }}/release/ov openviking/bin/ chmod +x openviking/bin/ov + + - name: Build ragfs-python and extract into openviking/lib/ (Linux) + shell: bash + run: | + uv pip install maturin + TMPDIR=$(mktemp -d) + cd crates/ragfs-python + maturin build --release \ + --target ${{ matrix.arch == 'aarch64' && 'aarch64-unknown-linux-gnu' || 'x86_64-unknown-linux-gnu' }} \ + --out "$TMPDIR" + cd ../.. + mkdir -p openviking/lib + python3 -c " +import zipfile, glob, os, sys +whls = glob.glob(os.path.join('$TMPDIR', 'ragfs_python-*.whl')) +assert whls, 'maturin produced no wheel' +with zipfile.ZipFile(whls[0]) as zf: + for name in zf.namelist(): + bn = os.path.basename(name) + if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): + dst = os.path.join('openviking', 'lib', bn) + with zf.open(name) as src, open(dst, 'wb') as f: + f.write(src.read()) + os.chmod(dst, 0o755) + print(f'Extracted {bn} -> {dst}') + sys.exit(0) +print('ERROR: No ragfs_python .so/.pyd found in wheel') +sys.exit(1) + " + rm -rf "$TMPDIR" + echo "Contents of openviking/lib/:" + ls -la openviking/lib/ - name: Clean workspace (force ignore dirty) shell: bash run: | + # Back up pre-built artifacts before cleaning + cp -a openviking/bin /tmp/_ov_bin || true + cp -a openviking/lib /tmp/_ov_lib || true git reset --hard HEAD git clean -fd rm -rf openviking/_version.py openviking.egg-info + # Restore pre-built artifacts + cp -a /tmp/_ov_bin openviking/bin || true + cp -a /tmp/_ov_lib openviking/lib || true # Ignore uv.lock changes to avoid dirty state in setuptools_scm git update-index --assume-unchanged uv.lock || true @@ -257,6 +295,8 @@ jobs: git status --ignored echo "=== Check openviking/_version.py ===" if [ -f openviking/_version.py ]; then cat openviking/_version.py; else echo "Not found"; fi + echo "=== Verify pre-built artifacts survived clean ===" + ls -la openviking/bin/ openviking/lib/ || true - name: Build package (Wheel Only) run: uv build --wheel @@ -276,11 +316,8 @@ jobs: - name: Repair wheels (Linux) run: | uv pip install auditwheel - # Repair wheels and output to a temporary directory uv run auditwheel repair dist/*.whl -w dist_fixed - # Remove original non-compliant wheels rm dist/*.whl - # Move repaired wheels back to dist mv dist_fixed/*.whl dist/ rmdir dist_fixed @@ -405,12 +442,52 @@ jobs: cp target/release/ov openviking/bin/ chmod +x openviking/bin/ov fi + + - name: Build ragfs-python and extract into openviking/lib/ (macOS/Windows) + shell: bash + run: | + uv pip install maturin + TMPDIR=$(mktemp -d) + cd crates/ragfs-python + if [[ "${{ matrix.os }}" == "windows-latest" ]]; then + maturin build --release --target x86_64-pc-windows-msvc --out "$TMPDIR" + else + maturin build --release --out "$TMPDIR" + fi + cd ../.. + mkdir -p openviking/lib + python3 -c " +import zipfile, glob, os, sys +whls = glob.glob(os.path.join('$TMPDIR', 'ragfs_python-*.whl')) +assert whls, 'maturin produced no wheel' +with zipfile.ZipFile(whls[0]) as zf: + for name in zf.namelist(): + bn = os.path.basename(name) + if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): + dst = os.path.join('openviking', 'lib', bn) + with zf.open(name) as src, open(dst, 'wb') as f: + f.write(src.read()) + os.chmod(dst, 0o755) + print(f'Extracted {bn} -> {dst}') + sys.exit(0) +print('ERROR: No ragfs_python .so/.pyd found in wheel') +sys.exit(1) + " + rm -rf "$TMPDIR" + echo "Contents of openviking/lib/:" + ls -la openviking/lib/ - name: Clean workspace (force ignore dirty) shell: bash run: | + # Back up pre-built artifacts before cleaning + cp -a openviking/bin /tmp/_ov_bin || true + cp -a openviking/lib /tmp/_ov_lib || true git reset --hard HEAD git clean -fd rm -rf openviking/_version.py openviking.egg-info + # Restore pre-built artifacts + cp -a /tmp/_ov_bin openviking/bin || true + cp -a /tmp/_ov_lib openviking/lib || true # Ignore uv.lock changes to avoid dirty state in setuptools_scm git update-index --assume-unchanged uv.lock || true @@ -425,6 +502,8 @@ jobs: git status --ignored echo "=== Check openviking/_version.py ===" if [ -f openviking/_version.py ]; then cat openviking/_version.py; else echo "Not found"; fi + echo "=== Verify pre-built artifacts survived clean ===" + ls -la openviking/bin/ openviking/lib/ || true - name: Build package (Wheel Only) run: uv build --wheel diff --git a/Dockerfile b/Dockerfile index 5659a0585..0f683d4e8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -65,6 +65,35 @@ RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ ;; \ esac +# Build ragfs-python (Rust AGFS binding) and extract the native extension +# into the installed openviking package so it ships alongside the Go binding. +# Selection at runtime via RAGFS_IMPL env var (auto/rust/go). +RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ + uv pip install maturin && \ + export _TMPDIR=$(mktemp -d) && \ + cd crates/ragfs-python && \ + maturin build --release --out "$_TMPDIR" && \ + cd ../.. && \ + export _OV_LIB=$(/app/.venv/bin/python -c "import openviking; from pathlib import Path; print(Path(openviking.__file__).resolve().parent / 'lib')") && \ + mkdir -p "$_OV_LIB" && \ + /app/.venv/bin/python -c " \ +import zipfile, glob, os, sys; \ +tmpdir, ov_lib = os.environ['_TMPDIR'], os.environ['_OV_LIB']; \ +whls = glob.glob(os.path.join(tmpdir, 'ragfs_python-*.whl')); \ +assert whls, 'maturin produced no wheel'; \ +with zipfile.ZipFile(whls[0]) as zf: \ + for name in zf.namelist(): \ + bn = os.path.basename(name); \ + if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): \ + dst = os.path.join(ov_lib, bn); \ + with zf.open(name) as src, open(dst, 'wb') as f: f.write(src.read()); \ + os.chmod(dst, 0o755); \ + print(f'ragfs-python: extracted {bn} -> {dst}'); \ + sys.exit(0); \ +print('WARNING: No ragfs_python .so/.pyd in wheel'); sys.exit(1) \ + " && \ + rm -rf "$_TMPDIR" + # Stage 4: runtime FROM python:3.13-slim-trixie diff --git a/MANIFEST.in b/MANIFEST.in index 800d1691d..e69ccc18a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,6 +10,10 @@ include LICENSE include README.md include pyproject.toml include setup.py +include Cargo.toml +include Cargo.lock +graft crates/ragfs +graft crates/ragfs-python recursive-include openviking *.yaml # sdist should be source-only: never ship runtime binaries from working tree diff --git a/Makefile b/Makefile index 55db08601..2e2045a74 100644 --- a/Makefile +++ b/Makefile @@ -99,6 +99,39 @@ build: check-deps check-pip echo " [OK] pip found, use pip to install..."; \ $(PYTHON) -m pip install -e .; \ fi + @echo "Building ragfs-python (Rust AGFS binding) into openviking/lib/..." + @MATURIN_CMD=""; \ + if command -v maturin > /dev/null 2>&1; then \ + MATURIN_CMD=maturin; \ + elif command -v uv > /dev/null 2>&1 && uv pip --help > /dev/null 2>&1; then \ + uv pip install maturin && MATURIN_CMD=maturin; \ + fi; \ + if [ -n "$$MATURIN_CMD" ]; then \ + TMPDIR=$$(mktemp -d); \ + cd crates/ragfs-python && $$MATURIN_CMD build --release --out "$$TMPDIR" 2>&1; \ + cd ../..; \ + mkdir -p openviking/lib; \ + $(PYTHON) -c " \ +import zipfile, glob, shutil, os, sys; \ +whls = glob.glob(os.path.join('$$TMPDIR', 'ragfs_python-*.whl')); \ +assert whls, 'maturin produced no wheel'; \ +with zipfile.ZipFile(whls[0]) as zf: \ + for name in zf.namelist(): \ + bn = os.path.basename(name); \ + if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): \ + dst = os.path.join('openviking', 'lib', bn); \ + with zf.open(name) as src, open(dst, 'wb') as f: f.write(src.read()); \ + os.chmod(dst, 0o755); \ + print(f' [OK] ragfs-python: extracted {bn} -> {dst}'); \ + sys.exit(0); \ +print('[Warning] No ragfs_python .so/.pyd found in wheel'); sys.exit(1) \ + "; \ + rm -rf "$$TMPDIR"; \ + else \ + echo " [SKIP] maturin not found, ragfs-python (Rust binding) will not be built."; \ + echo " Install maturin to enable: uv pip install maturin"; \ + echo " The Go binding will be used as fallback."; \ + fi @echo "Build completed successfully." clean: diff --git a/crates/ragfs-python/src/lib.rs b/crates/ragfs-python/src/lib.rs index 16b3b8736..9998a69eb 100644 --- a/crates/ragfs-python/src/lib.rs +++ b/crates/ragfs-python/src/lib.rs @@ -12,7 +12,7 @@ use std::sync::Arc; use std::time::UNIX_EPOCH; use ragfs::core::{ConfigValue, FileInfo, FileSystem, MountableFS, PluginConfig, WriteFlag}; -use ragfs::plugins::{KVFSPlugin, MemFSPlugin, QueueFSPlugin, SQLFSPlugin}; +use ragfs::plugins::{KVFSPlugin, LocalFSPlugin, MemFSPlugin, QueueFSPlugin, ServerInfoFSPlugin, SQLFSPlugin}; /// Convert a ragfs error into a Python RuntimeError fn to_py_err(e: ragfs::core::Error) -> PyErr { @@ -123,6 +123,8 @@ impl RAGFSBindingClient { fs.register_plugin(KVFSPlugin).await; fs.register_plugin(QueueFSPlugin).await; fs.register_plugin(SQLFSPlugin::new()).await; + fs.register_plugin(LocalFSPlugin::new()).await; + fs.register_plugin(ServerInfoFSPlugin::new()).await; }); Ok(Self { fs, rt }) @@ -390,12 +392,14 @@ impl RAGFSBindingClient { /// List all registered plugin names. fn list_plugins(&self) -> PyResult> { - // Return the names of built-in plugins + // Return names of built-in plugins Ok(vec![ "memfs".to_string(), "kvfs".to_string(), "queuefs".to_string(), "sqlfs".to_string(), + "localfs".to_string(), + "serverinfofs".to_string(), ]) } diff --git a/crates/ragfs/src/core/types.rs b/crates/ragfs/src/core/types.rs index fa81de729..175bd8abf 100644 --- a/crates/ragfs/src/core/types.rs +++ b/crates/ragfs/src/core/types.rs @@ -49,6 +49,17 @@ impl FileInfo { is_dir: true, } } + + /// Create a new FileInfo with all parameters + pub fn new(name: String, size: u64, mode: u32, mod_time: SystemTime, is_dir: bool) -> Self { + Self { + name, + size, + mode, + mod_time, + is_dir, + } + } } /// Write operation flags @@ -196,7 +207,9 @@ mod systemtime_serde { where S: Serializer, { - let duration = time.duration_since(UNIX_EPOCH).map_err(serde::ser::Error::custom)?; + let duration = time + .duration_since(UNIX_EPOCH) + .map_err(serde::ser::Error::custom)?; duration.as_secs().serialize(serializer) } diff --git a/crates/ragfs/src/plugins/localfs/mod.rs b/crates/ragfs/src/plugins/localfs/mod.rs new file mode 100644 index 000000000..7ac32c667 --- /dev/null +++ b/crates/ragfs/src/plugins/localfs/mod.rs @@ -0,0 +1,464 @@ +//! LocalFS plugin - Local file system mount +//! +//! This plugin mounts a local directory into RAGFS virtual file system, +//! providing direct access to local files and directories. + +use async_trait::async_trait; +use std::fs; +use std::path::{Path, PathBuf}; + +use crate::core::errors::{Error, Result}; +use crate::core::filesystem::FileSystem; +use crate::core::plugin::ServicePlugin; +use crate::core::types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag}; + +/// LocalFS - Local file system implementation +pub struct LocalFileSystem { + /// Base path of the mounted directory + base_path: PathBuf, +} + +impl LocalFileSystem { + /// Create a new LocalFileSystem + /// + /// # Arguments + /// * `base_path` - The local directory path to mount + /// + /// # Errors + /// Returns an error if the base path doesn't exist or is not a directory + pub fn new(base_path: &str) -> Result { + let path = PathBuf::from(base_path); + + // Check if path exists + if !path.exists() { + return Err(Error::plugin(format!( + "base path does not exist: {}", + base_path + ))); + } + + // Check if it's a directory + if !path.is_dir() { + return Err(Error::plugin(format!( + "base path is not a directory: {}", + base_path + ))); + } + + Ok(Self { base_path: path }) + } + + /// Resolve a virtual path to actual local path + fn resolve_path(&self, path: &str) -> PathBuf { + // Remove leading slash to make it relative + let relative = path.strip_prefix('/').unwrap_or(path); + + // Join with base path + if relative.is_empty() { + self.base_path.clone() + } else { + self.base_path.join(relative) + } + } +} + +#[async_trait] +impl FileSystem for LocalFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if file already exists + if local_path.exists() { + return Err(Error::AlreadyExists(path.to_string())); + } + + // Check if parent directory exists + if let Some(parent) = local_path.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Create empty file + fs::File::create(&local_path) + .map_err(|e| Error::plugin(format!("failed to create file: {}", e)))?; + + Ok(()) + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if directory already exists + if local_path.exists() { + return Err(Error::AlreadyExists(path.to_string())); + } + + // Check if parent directory exists + if let Some(parent) = local_path.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Create directory + fs::create_dir(&local_path) + .map_err(|e| Error::plugin(format!("failed to create directory: {}", e)))?; + + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + // If directory, check if empty + if local_path.is_dir() { + let entries = fs::read_dir(&local_path) + .map_err(|e| Error::plugin(format!("failed to read directory: {}", e)))?; + + if entries.count() > 0 { + return Err(Error::plugin(format!("directory not empty: {}", path))); + } + } + + // Remove file or empty directory + fs::remove_file(&local_path) + .or_else(|_| fs::remove_dir(&local_path)) + .map_err(|e| Error::plugin(format!("failed to remove: {}", e)))?; + + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + // Remove recursively + fs::remove_dir_all(&local_path) + .map_err(|e| Error::plugin(format!("failed to remove: {}", e)))?; + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let local_path = self.resolve_path(path); + + // Check if exists and is not a directory + let metadata = fs::metadata(&local_path) + .map_err(|_| Error::NotFound(path.to_string()))?; + + if metadata.is_dir() { + return Err(Error::plugin(format!("is a directory: {}", path))); + } + + // Read file + let data = fs::read(&local_path) + .map_err(|e| Error::plugin(format!("failed to read file: {}", e)))?; + + // Apply offset and size + let file_size = data.len() as u64; + let start = offset.min(file_size) as usize; + let end = if size == 0 { + data.len() + } else { + (offset + size).min(file_size) as usize + }; + + if start >= data.len() { + Ok(vec![]) + } else { + Ok(data[start..end].to_vec()) + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, _flags: WriteFlag) -> Result { + let local_path = self.resolve_path(path); + + // Check if it's a directory + if local_path.exists() && local_path.is_dir() { + return Err(Error::plugin(format!("is a directory: {}", path))); + } + + // Check if parent directory exists + if let Some(parent) = local_path.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Open or create file + let mut file = if local_path.exists() { + fs::OpenOptions::new() + .write(true) + .open(&local_path) + .map_err(|e| Error::plugin(format!("failed to open file: {}", e)))? + } else { + fs::OpenOptions::new() + .write(true) + .create(true) + .open(&local_path) + .map_err(|e| Error::plugin(format!("failed to create file: {}", e)))? + }; + + // Write data + use std::io::{Seek, SeekFrom, Write}; + + if offset > 0 { + file.seek(SeekFrom::Start(offset)) + .map_err(|e| Error::plugin(format!("failed to seek: {}", e)))?; + } + + let written = file + .write(data) + .map_err(|e| Error::plugin(format!("failed to write: {}", e)))?; + + Ok(written as u64) + } + + async fn read_dir(&self, path: &str) -> Result> { + let local_path = self.resolve_path(path); + + // Check if directory exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + if !local_path.is_dir() { + return Err(Error::plugin(format!("not a directory: {}", path))); + } + + // Read directory + let entries = fs::read_dir(&local_path) + .map_err(|e| Error::plugin(format!("failed to read directory: {}", e)))?; + + let mut files = Vec::new(); + for entry in entries { + let entry = entry.map_err(|e| Error::plugin(format!("failed to read entry: {}", e)))?; + let metadata = entry + .metadata() + .map_err(|e| Error::plugin(format!("failed to get metadata: {}", e)))?; + + let name = entry.file_name().to_string_lossy().to_string(); + let mode = if metadata.is_dir() { 0o755 } else { 0o644 }; + let mod_time = metadata + .modified() + .unwrap_or(std::time::SystemTime::UNIX_EPOCH); + + files.push(FileInfo::new( + name, + metadata.len(), + mode, + mod_time, + metadata.is_dir(), + )); + } + + Ok(files) + } + + async fn stat(&self, path: &str) -> Result { + let local_path = self.resolve_path(path); + + // Get file metadata + let metadata = fs::metadata(&local_path) + .map_err(|_| Error::NotFound(path.to_string()))?; + + let name = Path::new(path) + .file_name() + .unwrap_or(path.as_ref()) + .to_string_lossy() + .to_string(); + let mode = if metadata.is_dir() { 0o755 } else { 0o644 }; + let mod_time = metadata + .modified() + .unwrap_or(std::time::SystemTime::UNIX_EPOCH); + + Ok(FileInfo::new( + name, + metadata.len(), + mode, + mod_time, + metadata.is_dir(), + )) + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_local = self.resolve_path(old_path); + let new_local = self.resolve_path(new_path); + + // Check if old path exists + if !old_local.exists() { + return Err(Error::NotFound(old_path.to_string())); + } + + // Check if new path parent directory exists + if let Some(parent) = new_local.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Rename/move + fs::rename(&old_local, &new_local) + .map_err(|e| Error::plugin(format!("failed to rename: {}", e)))?; + + Ok(()) + } + + async fn chmod(&self, path: &str, _mode: u32) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + // Note: chmod is not fully implemented on all platforms + // For now, just return success + Ok(()) + } +} + +/// LocalFS plugin +pub struct LocalFSPlugin { + config_params: Vec, +} + +impl LocalFSPlugin { + /// Create a new LocalFS plugin + pub fn new() -> Self { + Self { + config_params: vec![ + ConfigParameter { + name: "local_dir".to_string(), + param_type: "string".to_string(), + required: true, + default: None, + description: "Local directory path to expose (must exist)".to_string(), + }, + ], + } + } +} + +#[async_trait] +impl ServicePlugin for LocalFSPlugin { + fn name(&self) -> &str { + "localfs" + } + + fn readme(&self) -> &str { + r#"LocalFS Plugin - Local File System Mount + +This plugin mounts a local directory into RAGFS virtual file system. + +FEATURES: + - Mount any local directory into RAGFS + - Full POSIX file system operations + - Direct access to local files and directories + - Preserves file permissions and timestamps + - Efficient file operations (no copying) + +CONFIGURATION: + + Basic configuration: + [plugins.localfs] + enabled = true + path = "/local" + + [plugins.localfs.config] + local_dir = "/path/to/local/directory" + + Multiple local mounts: + [plugins.localfs_home] + enabled = true + path = "/home" + + [plugins.localfs_home.config] + local_dir = "/Users/username" + +USAGE: + + List directory: + agfs ls /local + + Read a file: + agfs cat /local/file.txt + + Write to a file: + agfs write /local/file.txt "Hello, World!" + + Create a directory: + agfs mkdir /local/newdir + + Remove a file: + agfs rm /local/file.txt + +NOTES: + - Changes are directly applied to local file system + - File permissions are preserved and can be modified + - Be careful with rm -r as it permanently deletes files + +VERSION: 1.0.0 +"# + } + + async fn validate(&self, config: &PluginConfig) -> Result<()> { + // Validate local_dir parameter + let local_dir = config + .params + .get("local_dir") + .and_then(|v| match v { + crate::core::types::ConfigValue::String(s) => Some(s), + _ => None, + }) + .ok_or_else(|| Error::plugin("local_dir is required in configuration".to_string()))?; + + // Check if path exists + let path = Path::new(local_dir); + if !path.exists() { + return Err(Error::plugin(format!( + "base path does not exist: {}", + local_dir + ))); + } + + // Verify it's a directory + if !path.is_dir() { + return Err(Error::plugin(format!( + "base path is not a directory: {}", + local_dir + ))); + } + + Ok(()) + } + + async fn initialize(&self, config: PluginConfig) -> Result> { + // Parse configuration + let local_dir = config + .params + .get("local_dir") + .and_then(|v| match v { + crate::core::types::ConfigValue::String(s) => Some(s), + _ => None, + }) + .ok_or_else(|| Error::plugin("local_dir is required".to_string()))?; + + let fs = LocalFileSystem::new(local_dir)?; + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} diff --git a/crates/ragfs/src/plugins/mod.rs b/crates/ragfs/src/plugins/mod.rs index c84424db9..1fcc0c2b1 100644 --- a/crates/ragfs/src/plugins/mod.rs +++ b/crates/ragfs/src/plugins/mod.rs @@ -3,15 +3,19 @@ //! This module contains all built-in filesystem plugins. pub mod kvfs; +pub mod localfs; pub mod memfs; pub mod queuefs; #[cfg(feature = "s3")] pub mod s3fs; +pub mod serverinfofs; pub mod sqlfs; pub use kvfs::{KVFSPlugin, KVFileSystem}; +pub use localfs::{LocalFSPlugin, LocalFileSystem}; pub use memfs::{MemFSPlugin, MemFileSystem}; pub use queuefs::{QueueFSPlugin, QueueFileSystem}; #[cfg(feature = "s3")] pub use s3fs::{S3FSPlugin, S3FileSystem}; +pub use serverinfofs::{ServerInfoFSPlugin, ServerInfoFileSystem}; pub use sqlfs::{SQLFSPlugin, SQLFileSystem}; diff --git a/crates/ragfs/src/plugins/queuefs/backend.rs b/crates/ragfs/src/plugins/queuefs/backend.rs new file mode 100644 index 000000000..2fbf25dbd --- /dev/null +++ b/crates/ragfs/src/plugins/queuefs/backend.rs @@ -0,0 +1,310 @@ +//! Queue Backend Abstraction +//! +//! This module provides a pluggable backend system for QueueFS, allowing different +//! storage implementations (memory, SQLite, etc.) while maintaining a consistent interface. + +use crate::core::errors::{Error, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, VecDeque}; +use std::time::SystemTime; +use uuid::Uuid; + +/// A message in the queue +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + /// Unique identifier for the message + pub id: String, + /// Message data + pub data: Vec, + /// Timestamp when the message was enqueued + pub timestamp: SystemTime, +} + +impl Message { + /// Create a new message with the given data + pub fn new(data: Vec) -> Self { + Self { + id: Uuid::new_v4().to_string(), + data, + timestamp: SystemTime::now(), + } + } +} + +/// Queue backend trait for pluggable storage implementations +pub trait QueueBackend: Send + Sync { + /// Create a new queue with the given name + fn create_queue(&mut self, name: &str) -> Result<()>; + + /// Remove a queue and all its messages + fn remove_queue(&mut self, name: &str) -> Result<()>; + + /// Check if a queue exists + fn queue_exists(&self, name: &str) -> bool; + + /// List all queues with the given prefix + /// If prefix is empty, returns all queues + fn list_queues(&self, prefix: &str) -> Vec; + + /// Add a message to the queue + fn enqueue(&mut self, queue_name: &str, msg: Message) -> Result<()>; + + /// Remove and return the first message from the queue + fn dequeue(&mut self, queue_name: &str) -> Result>; + + /// View the first message without removing it + fn peek(&self, queue_name: &str) -> Result>; + + /// Get the number of messages in the queue + fn size(&self, queue_name: &str) -> Result; + + /// Clear all messages from the queue + fn clear(&mut self, queue_name: &str) -> Result<()>; + + /// Get the last enqueue time for the queue + fn get_last_enqueue_time(&self, queue_name: &str) -> Result; +} + +/// A single queue with its messages +struct Queue { + messages: VecDeque, + last_enqueue_time: SystemTime, +} + +impl Queue { + fn new() -> Self { + Self { + messages: VecDeque::new(), + last_enqueue_time: SystemTime::UNIX_EPOCH, + } + } +} + +/// In-memory queue backend using HashMap +pub struct MemoryBackend { + queues: HashMap, +} + +impl MemoryBackend { + /// Create a new memory backend + pub fn new() -> Self { + Self { + queues: HashMap::new(), + } + } +} + +impl QueueBackend for MemoryBackend { + fn create_queue(&mut self, name: &str) -> Result<()> { + if self.queues.contains_key(name) { + return Err(Error::AlreadyExists(format!("queue '{}' already exists", name))); + } + self.queues.insert(name.to_string(), Queue::new()); + Ok(()) + } + + fn remove_queue(&mut self, name: &str) -> Result<()> { + if self.queues.remove(name).is_none() { + return Err(Error::NotFound(format!("queue '{}' not found", name))); + } + Ok(()) + } + + fn queue_exists(&self, name: &str) -> bool { + self.queues.contains_key(name) + } + + fn list_queues(&self, prefix: &str) -> Vec { + if prefix.is_empty() { + self.queues.keys().cloned().collect() + } else { + self.queues + .keys() + .filter(|name| name.starts_with(prefix)) + .cloned() + .collect() + } + } + + fn enqueue(&mut self, queue_name: &str, msg: Message) -> Result<()> { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + queue.last_enqueue_time = SystemTime::now(); + queue.messages.push_back(msg); + Ok(()) + } + + fn dequeue(&mut self, queue_name: &str) -> Result> { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.messages.pop_front()) + } + + fn peek(&self, queue_name: &str) -> Result> { + let queue = self.queues.get(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.messages.front().cloned()) + } + + fn size(&self, queue_name: &str) -> Result { + let queue = self.queues.get(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.messages.len()) + } + + fn clear(&mut self, queue_name: &str) -> Result<()> { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + queue.messages.clear(); + Ok(()) + } + + fn get_last_enqueue_time(&self, queue_name: &str) -> Result { + let queue = self.queues.get(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.last_enqueue_time) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_queue() { + let mut backend = MemoryBackend::new(); + + backend.create_queue("test").unwrap(); + assert!(backend.queue_exists("test")); + + // Creating duplicate should fail + let result = backend.create_queue("test"); + assert!(result.is_err()); + } + + #[test] + fn test_remove_queue() { + let mut backend = MemoryBackend::new(); + + backend.create_queue("test").unwrap(); + backend.remove_queue("test").unwrap(); + assert!(!backend.queue_exists("test")); + + // Removing non-existent queue should fail + let result = backend.remove_queue("test"); + assert!(result.is_err()); + } + + #[test] + fn test_list_queues() { + let mut backend = MemoryBackend::new(); + + backend.create_queue("queue1").unwrap(); + backend.create_queue("queue2").unwrap(); + backend.create_queue("logs/errors").unwrap(); + + let all = backend.list_queues(""); + assert_eq!(all.len(), 3); + + let logs = backend.list_queues("logs"); + assert_eq!(logs.len(), 1); + assert_eq!(logs[0], "logs/errors"); + } + + #[test] + fn test_enqueue_dequeue() { + let mut backend = MemoryBackend::new(); + backend.create_queue("test").unwrap(); + + let msg1 = Message::new(b"message 1".to_vec()); + let msg2 = Message::new(b"message 2".to_vec()); + + backend.enqueue("test", msg1.clone()).unwrap(); + backend.enqueue("test", msg2.clone()).unwrap(); + + assert_eq!(backend.size("test").unwrap(), 2); + + let dequeued1 = backend.dequeue("test").unwrap().unwrap(); + assert_eq!(dequeued1.data, b"message 1"); + + let dequeued2 = backend.dequeue("test").unwrap().unwrap(); + assert_eq!(dequeued2.data, b"message 2"); + + assert_eq!(backend.size("test").unwrap(), 0); + assert!(backend.dequeue("test").unwrap().is_none()); + } + + #[test] + fn test_peek() { + let mut backend = MemoryBackend::new(); + backend.create_queue("test").unwrap(); + + let msg = Message::new(b"test message".to_vec()); + backend.enqueue("test", msg.clone()).unwrap(); + + let peeked1 = backend.peek("test").unwrap().unwrap(); + assert_eq!(peeked1.data, b"test message"); + + let peeked2 = backend.peek("test").unwrap().unwrap(); + assert_eq!(peeked2.data, b"test message"); + + // Size should still be 1 + assert_eq!(backend.size("test").unwrap(), 1); + } + + #[test] + fn test_clear() { + let mut backend = MemoryBackend::new(); + backend.create_queue("test").unwrap(); + + backend.enqueue("test", Message::new(b"msg1".to_vec())).unwrap(); + backend.enqueue("test", Message::new(b"msg2".to_vec())).unwrap(); + + assert_eq!(backend.size("test").unwrap(), 2); + + backend.clear("test").unwrap(); + assert_eq!(backend.size("test").unwrap(), 0); + } + + #[test] + fn test_multi_queue_isolation() { + let mut backend = MemoryBackend::new(); + backend.create_queue("queue1").unwrap(); + backend.create_queue("queue2").unwrap(); + + backend.enqueue("queue1", Message::new(b"msg1".to_vec())).unwrap(); + backend.enqueue("queue2", Message::new(b"msg2".to_vec())).unwrap(); + + assert_eq!(backend.size("queue1").unwrap(), 1); + assert_eq!(backend.size("queue2").unwrap(), 1); + + let msg1 = backend.dequeue("queue1").unwrap().unwrap(); + assert_eq!(msg1.data, b"msg1"); + + // queue2 should be unaffected + assert_eq!(backend.size("queue2").unwrap(), 1); + } + + #[test] + fn test_operations_on_nonexistent_queue() { + let mut backend = MemoryBackend::new(); + + assert!(backend.enqueue("nonexistent", Message::new(b"data".to_vec())).is_err()); + assert!(backend.dequeue("nonexistent").is_err()); + assert!(backend.peek("nonexistent").is_err()); + assert!(backend.size("nonexistent").is_err()); + assert!(backend.clear("nonexistent").is_err()); + } +} diff --git a/crates/ragfs/src/plugins/queuefs/mod.rs b/crates/ragfs/src/plugins/queuefs/mod.rs index 8dd3ba3ab..9bb761f47 100644 --- a/crates/ragfs/src/plugins/queuefs/mod.rs +++ b/crates/ragfs/src/plugins/queuefs/mod.rs @@ -1,11 +1,14 @@ //! QueueFS Plugin //! -//! A filesystem-based message queue where operations are performed through control files: -//! - `/enqueue` - Write to this file to add a message to the queue -//! - `/dequeue` - Read from this file to remove and return the first message -//! - `/peek` - Read from this file to view the first message without removing it -//! - `/size` - Read from this file to get the current queue size -//! - `/clear` - Write to this file to clear all messages from the queue +//! A filesystem-based message queue with multi-queue support where operations are performed +//! through control files within each queue directory: +//! - `/queue_name/enqueue` - Write to this file to add a message to the queue +//! - `/queue_name/dequeue` - Read from this file to remove and return the first message +//! - `/queue_name/peek` - Read from this file to view the first message without removing it +//! - `/queue_name/size` - Read from this file to get the current queue size +//! - `/queue_name/clear` - Write to this file to clear all messages from the queue + +mod backend; use crate::core::{ errors::{Error, Result}, @@ -14,55 +17,35 @@ use crate::core::{ types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag}, }; use async_trait::async_trait; -use serde::{Deserialize, Serialize}; -use std::collections::VecDeque; +use backend::{MemoryBackend, Message, QueueBackend}; use std::sync::Arc; use std::time::SystemTime; use tokio::sync::Mutex; -use uuid::Uuid; - -/// A message in the queue -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Message { - /// Unique identifier for the message - pub id: String, - /// Message data - pub data: Vec, - /// Timestamp when the message was enqueued - pub timestamp: SystemTime, -} -impl Message { - /// Create a new message with the given data - fn new(data: Vec) -> Self { - Self { - id: Uuid::new_v4().to_string(), - data, - timestamp: SystemTime::now(), - } - } +/// Parsed path information +struct ParsedPath { + queue_name: Option, + operation: Option, + is_dir: bool, } -/// QueueFS - A filesystem-based message queue +/// QueueFS - A filesystem-based message queue with multi-queue support pub struct QueueFileSystem { - /// The message queue - queue: Arc>>, + /// The queue backend + backend: Arc>>, } impl QueueFileSystem { - /// Create a new QueueFileSystem + /// Create a new QueueFileSystem with memory backend pub fn new() -> Self { Self { - queue: Arc::new(Mutex::new(VecDeque::new())), + backend: Arc::new(Mutex::new(Box::new(MemoryBackend::new()))), } } - /// Check if a path is a control file - fn is_control_file(path: &str) -> bool { - matches!( - path, - "/enqueue" | "/dequeue" | "/peek" | "/size" | "/clear" - ) + /// Check if a name is a control operation + fn is_control_operation(name: &str) -> bool { + matches!(name, "enqueue" | "dequeue" | "peek" | "size" | "clear") } /// Normalize path by removing trailing slashes and ensuring it starts with / @@ -76,13 +59,53 @@ impl QueueFileSystem { path.to_string() } } + + /// Parse a queue path into its components + fn parse_queue_path(path: &str) -> Result { + let path = Self::normalize_path(path); + let path = path.trim_start_matches('/'); + + // Root directory + if path.is_empty() { + return Ok(ParsedPath { + queue_name: None, + operation: None, + is_dir: true, + }); + } + + let parts: Vec<&str> = path.split('/').collect(); + let last = parts[parts.len() - 1]; + + // Check if last part is a control operation + if Self::is_control_operation(last) { + if parts.len() == 1 { + return Err(Error::InvalidOperation( + "operation without queue name".to_string(), + )); + } + let queue_name = parts[..parts.len() - 1].join("/"); + return Ok(ParsedPath { + queue_name: Some(queue_name), + operation: Some(last.to_string()), + is_dir: false, + }); + } + + // It's a directory (queue or parent) + Ok(ParsedPath { + queue_name: Some(parts.join("/")), + operation: None, + is_dir: true, + }) + } } #[async_trait] impl FileSystem for QueueFileSystem { async fn create(&self, path: &str) -> Result<()> { - let path = Self::normalize_path(path); - if Self::is_control_file(&path) { + let parsed = Self::parse_queue_path(path)?; + if !parsed.is_dir && parsed.operation.is_some() { // Control files always exist Ok(()) } else { @@ -93,42 +116,53 @@ impl FileSystem for QueueFileSystem { } async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { - let path = Self::normalize_path(path); - if path == "/" { + let parsed = Self::parse_queue_path(path)?; + if !parsed.is_dir { + return Err(Error::InvalidOperation( + "not a directory path".to_string(), + )); + } + if let Some(queue_name) = parsed.queue_name { + self.backend.lock().await.create_queue(&queue_name)?; Ok(()) } else { - Err(Error::InvalidOperation( - "QueueFS does not support directories".to_string(), - )) + // Root directory always exists + Ok(()) } } async fn read(&self, path: &str, _offset: u64, _size: u64) -> Result> { - let path = Self::normalize_path(path); + let parsed = Self::parse_queue_path(path)?; + + let queue_name = parsed + .queue_name + .ok_or_else(|| Error::InvalidOperation("no queue specified".to_string()))?; + let operation = parsed + .operation + .ok_or_else(|| Error::InvalidOperation("no operation specified".to_string()))?; + + let mut backend = self.backend.lock().await; - match path.as_str() { - "/dequeue" => { - let mut queue = self.queue.lock().await; - let msg = queue - .pop_front() + match operation.as_str() { + "dequeue" => { + let msg = backend + .dequeue(&queue_name)? .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; Ok(msg.data) } - "/peek" => { - let queue = self.queue.lock().await; - let msg = queue - .front() + "peek" => { + let msg = backend + .peek(&queue_name)? .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; Ok(msg.data.clone()) } - "/size" => { - let queue = self.queue.lock().await; - let size = queue.len(); + "size" => { + let size = backend.size(&queue_name)?; Ok(size.to_string().into_bytes()) } _ => Err(Error::InvalidOperation(format!( - "Cannot read from '{}'. Use /dequeue, /peek, or /size", - path + "Cannot read from '{}'. Use dequeue, peek, or size", + operation ))), } } @@ -140,67 +174,142 @@ impl FileSystem for QueueFileSystem { _offset: u64, _flags: WriteFlag, ) -> Result { - let path = Self::normalize_path(path); + let parsed = Self::parse_queue_path(path)?; + + let queue_name = parsed + .queue_name + .ok_or_else(|| Error::InvalidOperation("no queue specified".to_string()))?; + let operation = parsed + .operation + .ok_or_else(|| Error::InvalidOperation("no operation specified".to_string()))?; - match path.as_str() { - "/enqueue" => { + let mut backend = self.backend.lock().await; + + match operation.as_str() { + "enqueue" => { let msg = Message::new(data.to_vec()); let len = data.len() as u64; - self.queue.lock().await.push_back(msg); + backend.enqueue(&queue_name, msg)?; Ok(len) } - "/clear" => { - self.queue.lock().await.clear(); + "clear" => { + backend.clear(&queue_name)?; Ok(0) } _ => Err(Error::InvalidOperation(format!( - "Cannot write to '{}'. Use /enqueue or /clear", - path + "Cannot write to '{}'. Use enqueue or clear", + operation ))), } } async fn read_dir(&self, path: &str) -> Result> { - let path = Self::normalize_path(path); + let parsed = Self::parse_queue_path(path)?; - if path != "/" { - return Err(Error::NotFound(format!("directory not found: {}", path))); + if !parsed.is_dir { + return Err(Error::NotADirectory(path.to_string())); } + let backend = self.backend.lock().await; let now = SystemTime::now(); + + // Root directory: list all top-level queues + if parsed.queue_name.is_none() { + let queues = backend.list_queues(""); + let mut top_level = std::collections::HashSet::new(); + + for q in queues { + if let Some(first) = q.split('/').next() { + top_level.insert(first.to_string()); + } + } + + return Ok(top_level + .into_iter() + .map(|name| FileInfo { + name, + size: 0, + mode: 0o755, + mod_time: now, + is_dir: true, + }) + .collect()); + } + + // Queue directory: check if it has nested queues + let queue_name = parsed.queue_name.unwrap(); + let all_queues = backend.list_queues(&queue_name); + + let has_nested = all_queues + .iter() + .any(|q| q.starts_with(&format!("{}/", queue_name))); + + if has_nested { + // Return subdirectories + let prefix = format!("{}/", queue_name); + let mut subdirs = std::collections::HashSet::new(); + + for q in all_queues { + if let Some(remainder) = q.strip_prefix(&prefix) { + if let Some(first) = remainder.split('/').next() { + subdirs.insert(first.to_string()); + } + } + } + + return Ok(subdirs + .into_iter() + .map(|name| FileInfo { + name, + size: 0, + mode: 0o755, + mod_time: now, + is_dir: true, + }) + .collect()); + } + + // Leaf queue: return control files + if !backend.queue_exists(&queue_name) { + return Err(Error::NotFound(format!( + "queue not found: {}", + queue_name + ))); + } + Ok(vec![ FileInfo { name: "enqueue".to_string(), size: 0, - mode: 0o666, + mode: 0o222, mod_time: now, is_dir: false, }, FileInfo { name: "dequeue".to_string(), size: 0, - mode: 0o666, + mode: 0o444, mod_time: now, is_dir: false, }, FileInfo { name: "peek".to_string(), size: 0, - mode: 0o666, + mode: 0o444, mod_time: now, is_dir: false, }, FileInfo { name: "size".to_string(), size: 0, - mode: 0o666, + mode: 0o444, mod_time: now, is_dir: false, }, FileInfo { name: "clear".to_string(), size: 0, - mode: 0o666, + mode: 0o222, mod_time: now, is_dir: false, }, @@ -208,9 +317,10 @@ impl FileSystem for QueueFileSystem { } async fn stat(&self, path: &str) -> Result { - let path = Self::normalize_path(path); + let parsed = Self::parse_queue_path(path)?; - if path == "/" { + // Root directory + if parsed.queue_name.is_none() { return Ok(FileInfo { name: "/".to_string(), size: 0, @@ -220,17 +330,36 @@ impl FileSystem for QueueFileSystem { }); } - if Self::is_control_file(&path) { - let name = path.trim_start_matches('/').to_string(); + let backend = self.backend.lock().await; + + if parsed.is_dir { + // Queue directory + let queue_name = parsed.queue_name.unwrap(); + if backend.queue_exists(&queue_name) { + Ok(FileInfo { + name: queue_name.split('/').last().unwrap_or(&queue_name).to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }) + } else { + Err(Error::NotFound(format!("queue not found: {}", queue_name))) + } + } else { + // Control file + let operation = parsed.operation.as_ref().unwrap(); Ok(FileInfo { - name, + name: operation.clone(), size: 0, - mode: 0o666, + mode: if matches!(operation.as_str(), "enqueue" | "clear") { + 0o222 + } else { + 0o444 + }, mod_time: SystemTime::now(), is_dir: false, }) - } else { - Err(Error::NotFound(format!("file not found: {}", path))) } } @@ -252,10 +381,23 @@ impl FileSystem for QueueFileSystem { )) } - async fn remove_all(&self, _path: &str) -> Result<()> { - Err(Error::InvalidOperation( - "QueueFS does not support remove_all".to_string(), - )) + async fn remove_all(&self, path: &str) -> Result<()> { + let parsed = Self::parse_queue_path(path)?; + + if !parsed.is_dir { + return Err(Error::InvalidOperation( + "not a directory".to_string(), + )); + } + + if let Some(queue_name) = parsed.queue_name { + self.backend.lock().await.remove_queue(&queue_name)?; + Ok(()) + } else { + Err(Error::InvalidOperation( + "cannot remove root directory".to_string(), + )) + } } async fn truncate(&self, _path: &str, _size: u64) -> Result<()> { @@ -275,14 +417,37 @@ impl ServicePlugin for QueueFSPlugin { } fn readme(&self) -> &str { - "QueueFS - A filesystem-based message queue\n\ + "QueueFS - A filesystem-based message queue with multi-queue support\n\ + \n\ + Usage:\n\ + 1. Create a queue:\n\ + mkdir /queuefs/Embedding\n\ + \n\ + 2. Enqueue messages:\n\ + echo 'message data' > /queuefs/Embedding/enqueue\n\ + \n\ + 3. Dequeue messages:\n\ + cat /queuefs/Embedding/dequeue\n\ + \n\ + 4. Peek at messages:\n\ + cat /queuefs/Embedding/peek\n\ + \n\ + 5. Check queue size:\n\ + cat /queuefs/Embedding/size\n\ \n\ - Control files:\n\ - - /enqueue: Write to add a message to the queue\n\ - - /dequeue: Read to remove and return the first message\n\ - - /peek: Read to view the first message without removing it\n\ - - /size: Read to get the current queue size\n\ - - /clear: Write to clear all messages from the queue" + 6. Clear queue:\n\ + echo '' > /queuefs/Embedding/clear\n\ + \n\ + Control files per queue:\n\ + - enqueue: Write to add a message to the queue\n\ + - dequeue: Read to remove and return the first message\n\ + - peek: Read to view the first message without removing it\n\ + - size: Read to get the current queue size\n\ + - clear: Write to clear all messages from the queue\n\ + \n\ + Supports nested queues:\n\ + mkdir /queuefs/logs/errors\n\ + echo 'error message' > /queuefs/logs/errors/enqueue" } async fn validate(&self, _config: &PluginConfig) -> Result<()> { @@ -307,26 +472,29 @@ mod tests { async fn test_queuefs_enqueue_dequeue() { let fs = QueueFileSystem::new(); + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + // Enqueue messages let data1 = b"message 1"; let data2 = b"message 2"; - fs.write("/enqueue", data1, 0, WriteFlag::None) + fs.write("/test/enqueue", data1, 0, WriteFlag::None) .await .unwrap(); - fs.write("/enqueue", data2, 0, WriteFlag::None) + fs.write("/test/enqueue", data2, 0, WriteFlag::None) .await .unwrap(); // Dequeue messages - let result1 = fs.read("/dequeue", 0, 0).await.unwrap(); + let result1 = fs.read("/test/dequeue", 0, 0).await.unwrap(); assert_eq!(result1, data1); - let result2 = fs.read("/dequeue", 0, 0).await.unwrap(); + let result2 = fs.read("/test/dequeue", 0, 0).await.unwrap(); assert_eq!(result2, data2); // Queue should be empty - let result = fs.read("/dequeue", 0, 0).await; + let result = fs.read("/test/dequeue", 0, 0).await; assert!(result.is_err()); } @@ -334,21 +502,24 @@ mod tests { async fn test_queuefs_peek() { let fs = QueueFileSystem::new(); + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + // Enqueue a message let data = b"test message"; - fs.write("/enqueue", data, 0, WriteFlag::None) + fs.write("/test/enqueue", data, 0, WriteFlag::None) .await .unwrap(); // Peek should return the message without removing it - let result1 = fs.read("/peek", 0, 0).await.unwrap(); + let result1 = fs.read("/test/peek", 0, 0).await.unwrap(); assert_eq!(result1, data); - let result2 = fs.read("/peek", 0, 0).await.unwrap(); + let result2 = fs.read("/test/peek", 0, 0).await.unwrap(); assert_eq!(result2, data); // Dequeue should still work - let result3 = fs.read("/dequeue", 0, 0).await.unwrap(); + let result3 = fs.read("/test/dequeue", 0, 0).await.unwrap(); assert_eq!(result3, data); } @@ -356,25 +527,28 @@ mod tests { async fn test_queuefs_size() { let fs = QueueFileSystem::new(); + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + // Initially empty - let size = fs.read("/size", 0, 0).await.unwrap(); + let size = fs.read("/test/size", 0, 0).await.unwrap(); assert_eq!(String::from_utf8(size).unwrap(), "0"); // Add messages - fs.write("/enqueue", b"msg1", 0, WriteFlag::None) + fs.write("/test/enqueue", b"msg1", 0, WriteFlag::None) .await .unwrap(); - fs.write("/enqueue", b"msg2", 0, WriteFlag::None) + fs.write("/test/enqueue", b"msg2", 0, WriteFlag::None) .await .unwrap(); - let size = fs.read("/size", 0, 0).await.unwrap(); + let size = fs.read("/test/size", 0, 0).await.unwrap(); assert_eq!(String::from_utf8(size).unwrap(), "2"); // Dequeue one - fs.read("/dequeue", 0, 0).await.unwrap(); + fs.read("/test/dequeue", 0, 0).await.unwrap(); - let size = fs.read("/size", 0, 0).await.unwrap(); + let size = fs.read("/test/size", 0, 0).await.unwrap(); assert_eq!(String::from_utf8(size).unwrap(), "1"); } @@ -382,22 +556,27 @@ mod tests { async fn test_queuefs_clear() { let fs = QueueFileSystem::new(); + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + // Add messages - fs.write("/enqueue", b"msg1", 0, WriteFlag::None) + fs.write("/test/enqueue", b"msg1", 0, WriteFlag::None) .await .unwrap(); - fs.write("/enqueue", b"msg2", 0, WriteFlag::None) + fs.write("/test/enqueue", b"msg2", 0, WriteFlag::None) .await .unwrap(); // Clear the queue - fs.write("/clear", b"", 0, WriteFlag::None).await.unwrap(); + fs.write("/test/clear", b"", 0, WriteFlag::None) + .await + .unwrap(); // Queue should be empty - let size = fs.read("/size", 0, 0).await.unwrap(); + let size = fs.read("/test/size", 0, 0).await.unwrap(); assert_eq!(String::from_utf8(size).unwrap(), "0"); - let result = fs.read("/dequeue", 0, 0).await; + let result = fs.read("/test/dequeue", 0, 0).await; assert!(result.is_err()); } @@ -405,7 +584,17 @@ mod tests { async fn test_queuefs_read_dir() { let fs = QueueFileSystem::new(); + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Root should list the queue let entries = fs.read_dir("/").await.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].name, "test"); + assert!(entries[0].is_dir); + + // Queue directory should list control files + let entries = fs.read_dir("/test").await.unwrap(); assert_eq!(entries.len(), 5); let names: Vec = entries.iter().map(|e| e.name.clone()).collect(); @@ -420,16 +609,23 @@ mod tests { async fn test_queuefs_stat() { let fs = QueueFileSystem::new(); + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + // Stat root let info = fs.stat("/").await.unwrap(); assert!(info.is_dir); + // Stat queue directory + let info = fs.stat("/test").await.unwrap(); + assert!(info.is_dir); + // Stat control files - let info = fs.stat("/enqueue").await.unwrap(); + let info = fs.stat("/test/enqueue").await.unwrap(); assert!(!info.is_dir); assert_eq!(info.name, "enqueue"); - // Stat non-existent file + // Stat non-existent queue let result = fs.stat("/nonexistent").await; assert!(result.is_err()); } @@ -438,20 +634,23 @@ mod tests { async fn test_queuefs_invalid_operations() { let fs = QueueFileSystem::new(); + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + // Cannot read from enqueue - let result = fs.read("/enqueue", 0, 0).await; + let result = fs.read("/test/enqueue", 0, 0).await; assert!(result.is_err()); // Cannot write to dequeue - let result = fs.write("/dequeue", b"data", 0, WriteFlag::None).await; + let result = fs.write("/test/dequeue", b"data", 0, WriteFlag::None).await; assert!(result.is_err()); // Cannot rename - let result = fs.rename("/enqueue", "/enqueue2").await; + let result = fs.rename("/test/enqueue", "/test/enqueue2").await; assert!(result.is_err()); - // Cannot remove - let result = fs.remove("/enqueue").await; + // Cannot remove control files + let result = fs.remove("/test/enqueue").await; assert!(result.is_err()); } @@ -459,6 +658,9 @@ mod tests { async fn test_queuefs_concurrent_access() { let fs = Arc::new(QueueFileSystem::new()); + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + // Spawn multiple tasks to enqueue messages let mut handles = vec![]; for i in 0..10 { @@ -466,7 +668,7 @@ mod tests { let handle = tokio::spawn(async move { let data = format!("message {}", i); fs_clone - .write("/enqueue", data.as_bytes(), 0, WriteFlag::None) + .write("/test/enqueue", data.as_bytes(), 0, WriteFlag::None) .await .unwrap(); }); @@ -479,16 +681,16 @@ mod tests { } // Check size - let size = fs.read("/size", 0, 0).await.unwrap(); + let size = fs.read("/test/size", 0, 0).await.unwrap(); assert_eq!(String::from_utf8(size).unwrap(), "10"); // Dequeue all messages for _ in 0..10 { - fs.read("/dequeue", 0, 0).await.unwrap(); + fs.read("/test/dequeue", 0, 0).await.unwrap(); } // Queue should be empty - let size = fs.read("/size", 0, 0).await.unwrap(); + let size = fs.read("/test/size", 0, 0).await.unwrap(); assert_eq!(String::from_utf8(size).unwrap(), "0"); } @@ -509,11 +711,110 @@ mod tests { plugin.validate(&config).await.unwrap(); let fs = plugin.initialize(config).await.unwrap(); + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + // Test basic operation - fs.write("/enqueue", b"test", 0, WriteFlag::None) + fs.write("/test/enqueue", b"test", 0, WriteFlag::None) .await .unwrap(); - let result = fs.read("/dequeue", 0, 0).await.unwrap(); + let result = fs.read("/test/dequeue", 0, 0).await.unwrap(); assert_eq!(result, b"test"); } + + #[tokio::test] + async fn test_multi_queue() { + let fs = QueueFileSystem::new(); + + // Create two queues + fs.mkdir("/Embedding", 0o755).await.unwrap(); + fs.mkdir("/Semantic", 0o755).await.unwrap(); + + // Enqueue to both + fs.write("/Embedding/enqueue", b"embed1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/Semantic/enqueue", b"semantic1", 0, WriteFlag::None) + .await + .unwrap(); + + // Verify isolation + let size1 = fs.read("/Embedding/size", 0, 0).await.unwrap(); + let size2 = fs.read("/Semantic/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size1).unwrap(), "1"); + assert_eq!(String::from_utf8(size2).unwrap(), "1"); + + // Dequeue from specific queue + let msg = fs.read("/Embedding/dequeue", 0, 0).await.unwrap(); + assert_eq!(msg, b"embed1"); + + // Other queue unaffected + let size2 = fs.read("/Semantic/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size2).unwrap(), "1"); + } + + #[tokio::test] + async fn test_nested_queues() { + let fs = QueueFileSystem::new(); + + // Create nested structure + fs.mkdir("/logs", 0o755).await.unwrap(); + fs.mkdir("/logs/errors", 0o755).await.unwrap(); + fs.mkdir("/logs/warnings", 0o755).await.unwrap(); + + // List /logs should show subdirectories + let entries = fs.read_dir("/logs").await.unwrap(); + assert_eq!(entries.len(), 2); + let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect(); + assert!(names.contains(&"errors")); + assert!(names.contains(&"warnings")); + + // Can enqueue to nested queue + fs.write("/logs/errors/enqueue", b"error1", 0, WriteFlag::None) + .await + .unwrap(); + let msg = fs.read("/logs/errors/dequeue", 0, 0).await.unwrap(); + assert_eq!(msg, b"error1"); + } + + #[tokio::test] + async fn test_queue_lifecycle() { + let fs = QueueFileSystem::new(); + + // Create queue + fs.mkdir("/temp", 0o755).await.unwrap(); + fs.write("/temp/enqueue", b"data", 0, WriteFlag::None) + .await + .unwrap(); + + // Verify exists + let size = fs.read("/temp/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "1"); + + // Delete queue + fs.remove_all("/temp").await.unwrap(); + + // Verify deleted + let result = fs.stat("/temp").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_path_parsing() { + let fs = QueueFileSystem::new(); + + // Create queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Various path formats should work + fs.write("/test/enqueue", b"msg1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/test/enqueue/", b"msg2", 0, WriteFlag::None) + .await + .unwrap(); + + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "2"); + } } diff --git a/crates/ragfs/src/plugins/serverinfofs/mod.rs b/crates/ragfs/src/plugins/serverinfofs/mod.rs new file mode 100644 index 000000000..13cdabadb --- /dev/null +++ b/crates/ragfs/src/plugins/serverinfofs/mod.rs @@ -0,0 +1,361 @@ +//! ServerInfoFS plugin - Server metadata and information +//! +//! This plugin provides runtime information about RAGFS server. + +use async_trait::async_trait; +use std::time::{Duration, Instant, UNIX_EPOCH}; + +use crate::core::errors::{Error, Result}; +use crate::core::filesystem::FileSystem; +use crate::core::plugin::ServicePlugin; +use crate::core::types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag}; + +/// ServerInfoFS - Server metadata filesystem +pub struct ServerInfoFileSystem { + /// Server start time + start_time: Instant, + /// Server version + version: String, +} + +impl ServerInfoFileSystem { + /// Create a new ServerInfoFileSystem + pub fn new(version: &str) -> Self { + Self { + start_time: Instant::now(), + version: version.to_string(), + } + } + + /// Check if path is valid + fn is_valid_path(path: &str) -> bool { + matches!( + path, + "/" | "/server_info" | "/uptime" | "/version" | "/stats" | "/README" + ) + } + + /// Get server info as JSON + fn get_server_info(&self) -> String { + let uptime = self.start_time.elapsed(); + let uptime_secs = uptime.as_secs(); + + format!( + r#"{{ + "version": "{}", + "uptime": "{}", + "start_time": "{}", + "rust_version": "{}" +}}"#, + self.version, + format_duration(uptime), + format_timestamp(UNIX_EPOCH.elapsed().unwrap_or(Duration::from_secs(0)).as_secs() - uptime_secs), + env!("CARGO_PKG_RUST_VERSION") + ) + } + + /// Get uptime string + fn get_uptime(&self) -> String { + format_duration(self.start_time.elapsed()) + } + + /// Get stats as JSON + fn get_stats(&self) -> String { + format!( + r#"{{ + "uptime_seconds": {}, + "uptime": "{}" +}}"#, + self.start_time.elapsed().as_secs(), + format_duration(self.start_time.elapsed()) + ) + } + + /// Get readme content + fn get_readme(&self) -> String { + format!( + r#"ServerInfoFS Plugin - Server Metadata and Information + +This plugin provides runtime information about RAGFS server. + +USAGE: + View server version: + cat /serverinfofs/version + + View server uptime: + cat /serverinfofs/uptime + + View server info: + cat /serverinfofs/server_info + + View runtime stats: + cat /serverinfofs/stats + +FILES: + /server_info - Complete server information (JSON) + /uptime - Server uptime since start + /version - Server version + /stats - Runtime statistics + /README - This file + +EXAMPLES: + # Check server version + agfs:/> cat /serverinfofs/version + {} + + # Check uptime + agfs:/> cat /serverinfofs/uptime + {} + + # Get complete info + agfs:/> cat /serverinfofs/server_info + {{ + "version": "{}", + "uptime": "{}", + ... + }} + +VERSION: 1.0.0 +"#, + self.version, + format_duration(self.start_time.elapsed()), + self.version, + format_duration(self.start_time.elapsed()) + ) + } +} + +#[async_trait] +impl FileSystem for ServerInfoFileSystem { + async fn create(&self, _path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn remove_all(&self, _path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + if !Self::is_valid_path(path) { + return Err(Error::NotFound(path.to_string())); + } + + if path == "/" { + return Err(Error::plugin("is a directory: /".to_string())); + } + + let data = match path { + "/server_info" => self.get_server_info(), + "/uptime" => self.get_uptime(), + "/version" => self.version.clone(), + "/stats" => self.get_stats(), + "/README" => self.get_readme(), + _ => return Err(Error::NotFound(path.to_string())), + }; + + // Add newline if not present + let data = if data.ends_with('\n') { + data + } else { + format!("{}\n", data) + }; + + // Apply offset and size + let bytes = data.as_bytes(); + let file_size = bytes.len() as u64; + let start = offset.min(file_size) as usize; + let end = if size == 0 { + bytes.len() + } else { + (offset + size).min(file_size) as usize + }; + + if start >= bytes.len() { + Ok(vec![]) + } else { + Ok(bytes[start..end].to_vec()) + } + } + + async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn read_dir(&self, path: &str) -> Result> { + if path != "/" { + return Err(Error::plugin(format!("not a directory: {}", path))); + } + + let now = std::time::SystemTime::now(); + + // Generate content for each file to get accurate sizes + let server_info = self.get_server_info(); + let uptime = self.get_uptime(); + let version = self.version.clone(); + let stats = self.get_stats(); + let readme = self.get_readme(); + + Ok(vec![ + FileInfo::new("README".to_string(), readme.len() as u64, 0o444, now, false), + FileInfo::new("server_info".to_string(), server_info.len() as u64, 0o444, now, false), + FileInfo::new("uptime".to_string(), uptime.len() as u64, 0o444, now, false), + FileInfo::new("version".to_string(), version.len() as u64, 0o444, now, false), + FileInfo::new("stats".to_string(), stats.len() as u64, 0o444, now, false), + ]) + } + + async fn stat(&self, path: &str) -> Result { + if !Self::is_valid_path(path) { + return Err(Error::NotFound(path.to_string())); + } + + let now = std::time::SystemTime::now(); + + if path == "/" { + return Ok(FileInfo::new("/".to_string(), 0, 0o555, now, true)); + } + + // For files, read content to get size + let data = match path { + "/server_info" => self.get_server_info(), + "/uptime" => self.get_uptime(), + "/version" => self.version.clone(), + "/stats" => self.get_stats(), + "/README" => self.get_readme(), + _ => return Err(Error::NotFound(path.to_string())), + }; + + let name = path.strip_prefix('/').unwrap_or(path); + Ok(FileInfo::new(name.to_string(), data.len() as u64, 0o444, now, false)) + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } +} + +/// ServerInfoFS plugin +pub struct ServerInfoFSPlugin { + config_params: Vec, +} + +impl ServerInfoFSPlugin { + /// Create a new ServerInfoFS plugin + pub fn new() -> Self { + Self { + config_params: vec![], + } + } +} + +#[async_trait] +impl ServicePlugin for ServerInfoFSPlugin { + fn name(&self) -> &str { + "serverinfofs" + } + + fn readme(&self) -> &str { + r#"ServerInfoFS Plugin - Server Metadata and Information + +This plugin provides runtime information about RAGFS server. + +USAGE: + View server version: + cat /serverinfofs/version + + View server uptime: + cat /serverinfofs/uptime + + View server info: + cat /serverinfofs/server_info + + View runtime stats: + cat /serverinfofs/stats + +FILES: + /server_info - Complete server information (JSON) + /uptime - Server uptime since start + /version - Server version + /stats - Runtime statistics + /README - This file + +VERSION: 1.0.0 +"# + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + // No validation needed + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + let fs = ServerInfoFileSystem::new(env!("CARGO_PKG_VERSION")); + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} + +/// Format duration as human-readable string +fn format_duration(duration: Duration) -> String { + let secs = duration.as_secs(); + let days = secs / 86400; + let hours = (secs % 86400) / 3600; + let minutes = (secs % 3600) / 60; + let seconds = secs % 60; + + if days > 0 { + format!("{}d{}h{}m{}s", days, hours, minutes, seconds) + } else if hours > 0 { + format!("{}h{}m{}s", hours, minutes, seconds) + } else if minutes > 0 { + format!("{}m{}s", minutes, seconds) + } else { + format!("{}s", seconds) + } +} + +/// Format timestamp as RFC3339 string +fn format_timestamp(secs: u64) -> String { + let s = secs; + let days = s / 86400; + let time_of_day = s % 86400; + let h = time_of_day / 3600; + let m = (time_of_day % 3600) / 60; + let sec = time_of_day % 60; + + let (year, month, day) = days_to_ymd(days); + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", + year, month, day, h, m, sec + ) +} + +/// Convert days since Unix epoch to (year, month, day) +fn days_to_ymd(days: u64) -> (u64, u64, u64) { + let z = days + 719468; + let era = z / 146097; + let doe = z - era * 146097; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y, m, d) +} diff --git a/openviking/pyagfs/__init__.py b/openviking/pyagfs/__init__.py index a0d2a495b..728c115b3 100644 --- a/openviking/pyagfs/__init__.py +++ b/openviking/pyagfs/__init__.py @@ -2,8 +2,12 @@ __version__ = "0.1.7" +import glob +import importlib.util import logging import os +import sysconfig +from pathlib import Path from .client import AGFSClient, FileHandle from .exceptions import ( @@ -17,6 +21,9 @@ _logger = logging.getLogger(__name__) +# Directory that ships pre-built native libraries (Go .so/.dylib and Rust .so/.dylib). +_LIB_DIR = Path(__file__).resolve().parent.parent / "lib" + # --------------------------------------------------------------------------- # Binding implementation selection via RAGFS_IMPL environment variable. # @@ -28,11 +35,41 @@ _RAGFS_IMPL_ENV = os.environ.get("RAGFS_IMPL", "").lower() or None +def _find_ragfs_so(): + """Locate the ragfs_python native extension inside openviking/lib/. + + Returns the path to the ``.so`` / ``.dylib`` / ``.pyd`` file, or *None*. + """ + ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") or ".so" + # Exact match first: ragfs_python.cpython-312-darwin.so + exact = _LIB_DIR / f"ragfs_python{ext_suffix}" + if exact.exists(): + return str(exact) + # Glob fallback: ragfs_python.cpython-*.so / ragfs_python.*.pyd + for pattern in ("ragfs_python.cpython-*", "ragfs_python.*"): + matches = glob.glob(str(_LIB_DIR / pattern)) + if matches: + return matches[0] + return None + + def _load_rust_binding(): - """Attempt to load the Rust (PyO3) binding client.""" + """Attempt to load the Rust (PyO3) binding client. + + Searches openviking/lib/ for the pre-built native extension first, + then falls back to a pip-installed ``ragfs_python`` package. + """ + so_path = _find_ragfs_so() + if so_path: + spec = importlib.util.spec_from_file_location("ragfs_python", so_path) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod.RAGFSBindingClient, None + + # Fallback: maybe ragfs_python was pip-installed (dev environment) from ragfs_python import RAGFSBindingClient as _Rust - return _Rust, None # FileHandle not yet implemented in ragfs-python + return _Rust, None def _load_go_binding(): @@ -56,7 +93,7 @@ def _resolve_binding(impl: str): return client, fh except ImportError as exc: raise ImportError( - "RAGFS_IMPL=rust but ragfs_python is not installed: " + str(exc) + "RAGFS_IMPL=rust but ragfs_python native library is not available: " + str(exc) ) from exc if impl == "go": @@ -73,14 +110,14 @@ def _resolve_binding(impl: str): # Rust first, Go fallback, silent None if neither available try: client, fh = _load_rust_binding() - _logger.info("RAGFS_IMPL=auto: loaded Rust binding") + _logger.info("RAGFS_IMPL=auto: loaded Rust binding (ragfs-python)") return client, fh except ImportError: pass try: client, fh = _load_go_binding() - _logger.info("RAGFS_IMPL=auto: Rust unavailable, loaded Go binding") + _logger.info("RAGFS_IMPL=auto: Rust unavailable, loaded Go binding (libagfsbinding)") return client, fh except (ImportError, OSError): pass diff --git a/openviking/utils/agfs_utils.py b/openviking/utils/agfs_utils.py index cf9e1b5d3..deae50683 100644 --- a/openviking/utils/agfs_utils.py +++ b/openviking/utils/agfs_utils.py @@ -34,6 +34,8 @@ def create_agfs_client(agfs_config: Any) -> Any: from openviking.pyagfs import get_binding_client config_impl = getattr(agfs_config, "impl", "auto") + env_impl = os.environ.get("RAGFS_IMPL", "").lower() or None + effective_impl = env_impl or config_impl or "auto" AGFSBindingClient, _ = get_binding_client(config_impl) if AGFSBindingClient is None: @@ -45,7 +47,6 @@ def create_agfs_client(agfs_config: Any) -> Any: # Go ctypes binding needs AGFS_LIB_PATH and a shared library on disk. # Rust PyO3 binding is compiled into ragfs_python — skip library checks. - actual_lib_path = None try: from openviking.pyagfs.binding_client import ( AGFSBindingClient as _GoBindingClient, @@ -65,15 +66,17 @@ def create_agfs_client(agfs_config: Any) -> Any: try: from openviking.pyagfs.binding_client import _find_library - actual_lib_path = _find_library() + _find_library() except Exception: raise ImportError( "AGFS binding library not found. Please run 'pip install -e .' in the project root to build and install the AGFS SDK." ) client = AGFSBindingClient() - logger.info( - f"[AGFSUtils] Created AGFSBindingClient (impl={config_impl}, lib_path={actual_lib_path})" + binding_type = "Rust (ragfs-python)" if not is_go_binding else "Go (libagfsbinding)" + logger.warning( + f"[AGFS] Binding impl selected: {binding_type} " + f"(RAGFS_IMPL={effective_impl}, env={env_impl}, config={config_impl})" ) # Automatically mount backend for binding client diff --git a/pyproject.toml b/pyproject.toml index 4c9e9d54a..b0ff8d5c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -200,6 +200,8 @@ openviking = [ "lib/libagfsbinding.so", "lib/libagfsbinding.dylib", "lib/libagfsbinding.dll", + "lib/ragfs_python*.so", + "lib/ragfs_python*.pyd", "bin/ov", "bin/ov.exe", "storage/vectordb/engine/*.abi3.so", diff --git a/setup.py b/setup.py index b162775a6..2a062b553 100644 --- a/setup.py +++ b/setup.py @@ -75,6 +75,7 @@ class OpenVikingBuildExt(build_ext): def run(self): self.build_agfs_artifacts() self.build_ov_cli_artifact() + self.build_ragfs_python_artifact() self.cmake_executable = CMAKE_PATH for ext in self.extensions: @@ -374,6 +375,100 @@ def _build_ov_cli_artifact_impl(self, ov_cli_dir, binary_name, ov_target_binary) else: print("[Warning] Cargo not found. Cannot build ov CLI from source.") + def build_ragfs_python_artifact(self): + """Build ragfs-python (Rust AGFS binding) via maturin and copy the native + extension into ``openviking/lib/`` so it ships inside the openviking wheel. + + This is a best-effort build — the Go binding serves as fallback, + so failure here is non-fatal. + """ + ragfs_python_dir = Path("crates/ragfs-python").resolve() + ragfs_lib_dir = Path("openviking/lib").resolve() + + if not ragfs_python_dir.exists(): + print("[Info] ragfs-python source directory not found. Skipping.") + return + + if os.environ.get("OV_SKIP_RAGFS_BUILD") == "1": + print("[OK] Skipping ragfs-python build (OV_SKIP_RAGFS_BUILD=1)") + return + + maturin_cmd = shutil.which("maturin") + if not maturin_cmd: + print( + "[SKIP] maturin not found. ragfs-python (Rust binding) will not be built.\n" + " Install maturin to enable: pip install maturin\n" + " The Go binding will be used as fallback." + ) + return + + import tempfile + import zipfile + + with tempfile.TemporaryDirectory() as tmpdir: + try: + print("Building ragfs-python (Rust AGFS binding) via maturin...") + env = os.environ.copy() + build_args = [maturin_cmd, "build", "--release", "--out", tmpdir] + # Respect CARGO_BUILD_TARGET for cross-compilation + target = env.get("CARGO_BUILD_TARGET") + if target: + build_args.extend(["--target", target]) + + result = subprocess.run( + build_args, + cwd=str(ragfs_python_dir), + env=env, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if result.stdout: + print(result.stdout.decode("utf-8", errors="replace")) + if result.stderr: + print(result.stderr.decode("utf-8", errors="replace")) + + # Extract the native .so/.pyd from the built wheel. + whl_files = list(Path(tmpdir).glob("ragfs_python-*.whl")) + if not whl_files: + print("[Warning] maturin produced no wheel. Skipping ragfs-python.") + return + + ragfs_lib_dir.mkdir(parents=True, exist_ok=True) + extracted = False + with zipfile.ZipFile(str(whl_files[0])) as zf: + for name in zf.namelist(): + basename = Path(name).name + # Match: ragfs_python.cpython-312-darwin.so, ragfs_python.cp312-win_amd64.pyd, etc. + if basename.startswith("ragfs_python") and ( + basename.endswith(".so") or basename.endswith(".pyd") + ): + target_path = ragfs_lib_dir / basename + with zf.open(name) as src, open(target_path, "wb") as dst: + dst.write(src.read()) + if sys.platform != "win32": + os.chmod(str(target_path), 0o755) + print(f"[OK] ragfs-python: extracted {basename} -> {target_path}") + extracted = True + break + + if not extracted: + print("[Warning] Could not find ragfs_python .so/.pyd in built wheel.") + else: + self._copy_artifacts_to_build_lib(target_lib=target_path) + + except Exception as exc: + error_detail = "" + if isinstance(exc, subprocess.CalledProcessError): + if exc.stdout: + error_detail += exc.stdout.decode("utf-8", errors="replace") + if exc.stderr: + error_detail += exc.stderr.decode("utf-8", errors="replace") + print(f"[Warning] Failed to build ragfs-python: {exc}") + if error_detail: + print(error_detail) + print(" The Go binding will be used as fallback.") + def build_extension(self, ext): """Build a single Python native extension artifact using CMake.""" if getattr(self, "_engine_extensions_built", False): @@ -478,6 +573,8 @@ def finalize_options(self): "lib/libagfsbinding.so", "lib/libagfsbinding.dylib", "lib/libagfsbinding.dll", + "lib/ragfs_python*.so", + "lib/ragfs_python*.pyd", "bin/ov", "bin/ov.exe", "console/static/**/*", From f771df3f873500fe181dcb7c4fedeb76b6299eee Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 18:25:20 +0800 Subject: [PATCH 04/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- crates/ragfs/src/core/errors.rs | 7 +++++++ crates/ragfs/src/plugins/queuefs/mod.rs | 24 ++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/crates/ragfs/src/core/errors.rs b/crates/ragfs/src/core/errors.rs index 53c58ea8e..b2f802842 100644 --- a/crates/ragfs/src/core/errors.rs +++ b/crates/ragfs/src/core/errors.rs @@ -5,6 +5,7 @@ //! and clear error messages. use std::io; +use serde_json; /// Result type alias for RAGFS operations pub type Result = std::result::Result; @@ -81,6 +82,12 @@ pub enum Error { Internal(String), } +impl From for Error { + fn from(err: serde_json::Error) -> Self { + Self::Serialization(err.to_string()) + } +} + impl Error { /// Create a NotFound error pub fn not_found(path: impl Into) -> Self { diff --git a/crates/ragfs/src/plugins/queuefs/mod.rs b/crates/ragfs/src/plugins/queuefs/mod.rs index 9bb761f47..0b53e6719 100644 --- a/crates/ragfs/src/plugins/queuefs/mod.rs +++ b/crates/ragfs/src/plugins/queuefs/mod.rs @@ -18,10 +18,18 @@ use crate::core::{ }; use async_trait::async_trait; use backend::{MemoryBackend, Message, QueueBackend}; +use serde::Serialize; use std::sync::Arc; use std::time::SystemTime; use tokio::sync::Mutex; +/// Dequeue response format (matches Go libagfsbinding format) +#[derive(Debug, Serialize)] +struct QueueMessage { + id: String, + data: String, +} + /// Parsed path information struct ParsedPath { queue_name: Option, @@ -148,13 +156,25 @@ impl FileSystem for QueueFileSystem { let msg = backend .dequeue(&queue_name)? .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; - Ok(msg.data) + // Return in Go libagfsbinding format: {"id": "...", "data": "..."} + let data_str = String::from_utf8_lossy(&msg.data).to_string(); + let response = QueueMessage { + id: msg.id, + data: data_str, + }; + Ok(serde_json::to_vec(&response)?) } "peek" => { let msg = backend .peek(&queue_name)? .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; - Ok(msg.data.clone()) + // Return in Go libagfsbinding format: {"id": "...", "data": "..."} + let data_str = String::from_utf8_lossy(&msg.data).to_string(); + let response = QueueMessage { + id: msg.id.clone(), + data: data_str, + }; + Ok(serde_json::to_vec(&response)?) } "size" => { let size = backend.size(&queue_name)?; From a2b19a2477ab0b5d857a5ad052b309190ebe8bcc Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 18:36:09 +0800 Subject: [PATCH 05/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- crates/ragfs/src/plugins/queuefs/backend.rs | 14 ++++++++++++++ crates/ragfs/src/plugins/queuefs/mod.rs | 19 ++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/crates/ragfs/src/plugins/queuefs/backend.rs b/crates/ragfs/src/plugins/queuefs/backend.rs index 2fbf25dbd..8e6a1d57b 100644 --- a/crates/ragfs/src/plugins/queuefs/backend.rs +++ b/crates/ragfs/src/plugins/queuefs/backend.rs @@ -63,6 +63,9 @@ pub trait QueueBackend: Send + Sync { /// Get the last enqueue time for the queue fn get_last_enqueue_time(&self, queue_name: &str) -> Result; + + /// Acknowledge (delete) a message by ID + fn ack(&mut self, queue_name: &str, msg_id: &str) -> Result; } /// A single queue with its messages @@ -176,6 +179,17 @@ impl QueueBackend for MemoryBackend { Ok(queue.last_enqueue_time) } + + fn ack(&mut self, queue_name: &str, msg_id: &str) -> Result { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + // Find and remove message by ID + let original_len = queue.messages.len(); + queue.messages.retain(|msg| msg.id != msg_id); + Ok(queue.messages.len() != original_len) + } } #[cfg(test)] diff --git a/crates/ragfs/src/plugins/queuefs/mod.rs b/crates/ragfs/src/plugins/queuefs/mod.rs index 0b53e6719..220853c62 100644 --- a/crates/ragfs/src/plugins/queuefs/mod.rs +++ b/crates/ragfs/src/plugins/queuefs/mod.rs @@ -7,6 +7,7 @@ //! - `/queue_name/peek` - Read from this file to view the first message without removing it //! - `/queue_name/size` - Read from this file to get the current queue size //! - `/queue_name/clear` - Write to this file to clear all messages from the queue +//! - `/queue_name/ack` - Write message ID to this file to acknowledge and delete it mod backend; @@ -53,7 +54,7 @@ impl QueueFileSystem { /// Check if a name is a control operation fn is_control_operation(name: &str) -> bool { - matches!(name, "enqueue" | "dequeue" | "peek" | "size" | "clear") + matches!(name, "enqueue" | "dequeue" | "peek" | "size" | "clear" | "ack") } /// Normalize path by removing trailing slashes and ensuring it starts with / @@ -216,8 +217,13 @@ impl FileSystem for QueueFileSystem { backend.clear(&queue_name)?; Ok(0) } + "ack" => { + let msg_id = String::from_utf8_lossy(data).trim().to_string(); + backend.ack(&queue_name, &msg_id)?; + Ok(0) + } _ => Err(Error::InvalidOperation(format!( - "Cannot write to '{}'. Use enqueue or clear", + "Cannot write to '{}'. Use enqueue, clear, or ack", operation ))), } @@ -333,6 +339,13 @@ impl FileSystem for QueueFileSystem { mod_time: now, is_dir: false, }, + FileInfo { + name: "ack".to_string(), + size: 0, + mode: 0o222, + mod_time: now, + is_dir: false, + }, ]) } @@ -372,7 +385,7 @@ impl FileSystem for QueueFileSystem { Ok(FileInfo { name: operation.clone(), size: 0, - mode: if matches!(operation.as_str(), "enqueue" | "clear") { + mode: if matches!(operation.as_str(), "enqueue" | "clear" | "ack") { 0o222 } else { 0o444 From 330b97f588bc86691824a6ac95b20cb27d099e6b Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 18:44:31 +0800 Subject: [PATCH 06/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- crates/ragfs/ORIGIN.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 crates/ragfs/ORIGIN.md diff --git a/crates/ragfs/ORIGIN.md b/crates/ragfs/ORIGIN.md new file mode 100644 index 000000000..616fa2ee8 --- /dev/null +++ b/crates/ragfs/ORIGIN.md @@ -0,0 +1,11 @@ +# RAGFS Origin + +This crate (RAGFS) is a Rust reimplementation of the AGFS project originally authored by [c44pt0r](https://github.com/c44pt0r). + +## Source + +RAGFS is based on the Go implementation of AGFS located at `third_party/agfs/` in this repository. + +## License + +The original AGFS project is open source. This Rust implementation maintains compatibility with and references the original AGFS license. From 787cf5270446c6a95bd8f46485dfed9c5b7b13cf Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 18:56:57 +0800 Subject: [PATCH 07/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- Makefile | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 2e2045a74..a02586393 100644 --- a/Makefile +++ b/Makefile @@ -111,21 +111,22 @@ build: check-deps check-pip cd crates/ragfs-python && $$MATURIN_CMD build --release --out "$$TMPDIR" 2>&1; \ cd ../..; \ mkdir -p openviking/lib; \ - $(PYTHON) -c " \ -import zipfile, glob, shutil, os, sys; \ -whls = glob.glob(os.path.join('$$TMPDIR', 'ragfs_python-*.whl')); \ -assert whls, 'maturin produced no wheel'; \ -with zipfile.ZipFile(whls[0]) as zf: \ - for name in zf.namelist(): \ - bn = os.path.basename(name); \ - if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): \ - dst = os.path.join('openviking', 'lib', bn); \ - with zf.open(name) as src, open(dst, 'wb') as f: f.write(src.read()); \ - os.chmod(dst, 0o755); \ - print(f' [OK] ragfs-python: extracted {bn} -> {dst}'); \ - sys.exit(0); \ -print('[Warning] No ragfs_python .so/.pyd found in wheel'); sys.exit(1) \ - "; \ + echo "import zipfile, glob, shutil, os, sys" > /tmp/extract_ragfs.py; \ + echo "whls = glob.glob(os.path.join('$$TMPDIR', 'ragfs_python-*.whl'))" >> /tmp/extract_ragfs.py; \ + echo "assert whls, 'maturin produced no wheel'" >> /tmp/extract_ragfs.py; \ + echo "with zipfile.ZipFile(whls[0]) as zf:" >> /tmp/extract_ragfs.py; \ + echo " for name in zf.namelist():" >> /tmp/extract_ragfs.py; \ + echo " bn = os.path.basename(name)" >> /tmp/extract_ragfs.py; \ + echo " if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')):" >> /tmp/extract_ragfs.py; \ + echo " dst = os.path.join('openviking', 'lib', bn)" >> /tmp/extract_ragfs.py; \ + echo " with zf.open(name) as src, open(dst, 'wb') as f: f.write(src.read())" >> /tmp/extract_ragfs.py; \ + echo " os.chmod(dst, 0o755)" >> /tmp/extract_ragfs.py; \ + echo " print(f' [OK] ragfs-python: extracted {bn} -> {dst}')" >> /tmp/extract_ragfs.py; \ + echo " sys.exit(0)" >> /tmp/extract_ragfs.py; \ + echo "print('[Warning] No ragfs_python .so/.pyd found in wheel')" >> /tmp/extract_ragfs.py; \ + echo "sys.exit(1)" >> /tmp/extract_ragfs.py; \ + $(PYTHON) /tmp/extract_ragfs.py; \ + rm -f /tmp/extract_ragfs.py; \ rm -rf "$$TMPDIR"; \ else \ echo " [SKIP] maturin not found, ragfs-python (Rust binding) will not be built."; \ From a8d9a234d0b7924762baaa86c9b731ca9aa52fb3 Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 18:59:33 +0800 Subject: [PATCH 08/16] reorg: rewrite agfs with rust, and named with ragfs, keep License --- crates/ragfs/MIGRATION_PLAN.md | 976 --------------------------------- crates/ragfs/ORIGIN.md | 5 + 2 files changed, 5 insertions(+), 976 deletions(-) delete mode 100644 crates/ragfs/MIGRATION_PLAN.md diff --git a/crates/ragfs/MIGRATION_PLAN.md b/crates/ragfs/MIGRATION_PLAN.md deleted file mode 100644 index dd3fbfb81..000000000 --- a/crates/ragfs/MIGRATION_PLAN.md +++ /dev/null @@ -1,976 +0,0 @@ -# RAGFS Migration Plan -# Rust 实现的 AGFS 重构计划 - -**文档版本**: v1.0 -**创建日期**: 2026-04-03 -**目标**: 将 third_party/agfs (Go 实现迁移到 crates/ragfs (Rust 实现)) 以支持后续扩展 -**策略**: 允许 Go/Rust 双实现并存,支持自由切换和回退 -**致谢**: 谢谢 c44pt0r 的 AGFS 项目贡献了开源的 Go 实现,为本计划提供基础,我们会持续保持对原项目的 License 引用。 - ---- - -## 目录 - -1. [概述](#概述) -2. [架构设计](#架构设计) -3. [迁移阶段](#迁移阶段) -4. [纯 Rust 插件系统设计](#纯rust-插件系统设计) -5. [Go/Rust 切换机制](#go-rust-切换机制) -6. [技术选型](#技术选型) -7. [风险评估](#风险评估) -8. [里程碑](#里程碑) - ---- - -## 概述 - -### 背景 - -当前 AGFS 完全使用 Go 实现,包含约 33,000 行代码、14 种内置插件。随着 OpenViking 项目引入 Rust 生态(ov_cli),使用 Rust 重写 AGFS 可以: - -- 提升性能(无 GC,编译时优化) -- 增强安全性(内存安全保证) -- 统一技术栈(与 ov_cli 一致,移除项目对 Golang 的依赖) -- 简化部署(无需编译 Go 代码) - -### 设计原则 - -1. **渐进式迁移**: Go 和 Rust 版本可同时存在,独立运行 -2. **API 兼容性**: HTTP API 和 Python SDK 保持完全兼容 -3. **纯 Rust 插件**: 使用 trait 定义插件系统,不依赖 C ABI -4. **可观测性**: 完善的日志、指标和追踪支持,文件目录结构尽量与 Go 实现保持一致 -5. **测试驱动**: 每个模块都有对应的测试用例,确保功能一致 - -### 项目结构 - -``` -crates/ragfs/ -├── Cargo.toml # Workspace 配置 -├── MIGRATION_PLAN.md # 本文档 -├── src/ -│ ├── lib.rs # 库入口,导出公共 API -│ ├── core/ # 核心文件系统抽象 -│ │ ├── mod.rs -│ │ ├── filesystem.rs # FileSystem trait -│ │ ├── mountable.rs # MountableFS 实现 -│ │ ├── handle.rs # 文件句柄管理 -│ │ └── errors.rs # 错误类型定义 -│ ├── server/ # HTTP 服务器 -│ │ ├── mod.rs -│ │ ├── main.rs # 服务器入口 -│ │ ├── config.rs # 配置管理 -│ │ ├── handlers.rs # HTTP 处理器 -│ │ └── middleware.rs # 中间件 -│ ├── shell/ # 交互式 shell -│ │ ├── mod.rs -│ │ └── main.rs -│ └── plugins/ # 内置插件 -│ ├── mod.rs -│ ├── memfs/ -│ ├── kvfs/ -│ ├── queuefs/ -│ ├── s3fs/ -│ ├── sqlfs/ -│ └── ... -└── tests/ # 集成测试 -``` - ---- - -## 架构设计 - -### 核心组件 - -``` -┌─────────────────────────────────────────────────────────────┐ -│ HTTP Client / SDK │ -└────────────────────────┬────────────────────────────────────┘ - │ HTTP/JSON API - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ HTTP Server │ -│ ┌────────────────────────────────────────────────────┐ │ -│ │ Router (axum) │ │ -│ │ - /api/v1/files │ │ -│ │ - /api/v1/directories │ │ -│ │ - /api/v1/mounts │ │ -│ │ - /api/v1/plugins │ │ -│ └────────────────────────────────────────────────────┘ │ -└────────────────────────┬────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ MountableFS │ -│ ┌────────────────────────────────────────────────────┐ │ -│ │ Radix Trie (路径路由) │ │ -│ │ /memfs -> MemFS Plugin │ │ -│ │ /kvfs -> KVFS Plugin │ │ -│ │ /queuefs -> QueueFS Plugin │ │ -│ └────────────────────────────────────────────────────┘ │ -└────────────────────────┬────────────────────────────────────┘ - │ - ┌──────────┼──────────┐ - ▼ ▼ ▼ - ┌──────┐ ┌──────┐ ┌──────┐ - │MemFS │ │KVFS │ │QueueFS│ - └──────┘ └──────┘ └──────┘ -``` - -### 核心数据结构 - -#### FileSystem Trait - -```rust -/// 文件系统抽象接口 -#[async_trait] -pub trait FileSystem: Send + Sync { - /// 创建空文件 - async fn create(&self, path: &str) -> Result<()>; - - /// 创建目录 - async fn mkdir(&self, path: &str, mode: u32) -> Result<()>; - - /// 删除文件 - async fn remove(&self, path: &str) -> Result<()>; - - /// 递归删除 - async fn remove_all(&self, path: &str) -> Result<()>; - - /// 读取文件内容 - async fn read(&self, path: &str, offset: u64, size: u64) -> Result>; - - /// 写入文件 - async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result; - - /// 列出目录 - async fn read_dir(&self, path: &str) -> Result>; - - /// 获取文件信息 - async fn stat(&self, path: &str) -> Result; - - /// 重命名 - async fn rename(&self, old_path: &str, new_path: &str) -> Result<()>; - - /// 修改权限 - async fn chmod(&self, path: &str, mode: u32) -> Result<()>; -} -``` - -#### ServicePlugin Trait - -```rust -/// 服务插件接口 -#[async_trait] -pub trait ServicePlugin: Send + Sync { - /// 插件名称 - fn name(&self) -> &str; - - /// 验证配置 - async fn validate(&self, config: &PluginConfig) -> Result<()>; - - /// 初始化插件 - async fn initialize(&self, config: PluginConfig) -> Result>; - - /// 获取文档 - fn get_readme(&self) -> &str; - - /// 获取配置参数定义 - fn get_config_params(&self) -> Vec; - - /// 关闭插件 - async fn shutdown(&self) -> Result<()>; -} -``` - ---- - -## 迁移阶段 - -### 阶段 1: 基础设施 (2-3 周) ✅ 已完成 - -**目标**: 建立项目骨架和核心抽象 - -#### 任务清单 - -- [x] 创建 Cargo.toml 和项目结构 -- [x] 定义 `FileSystem` trait (`src/core/filesystem.rs`) -- [x] 定义 `ServicePlugin` trait (`src/core/plugin.rs`) -- [x] 定义错误类型 (`src/core/errors.rs`) -- [x] 定义文件信息结构 (`src/core/types.rs`) -- [x] 创建配置模块 (`src/server/config.rs`) -- [x] 编写基础单元测试 - -#### 验收标准 - -- ✅ 可以编译 `ragfs` 库 -- ✅ 所有 trait 有清晰的文档和测试 -- ✅ 错误处理完善 - ---- - -### 阶段 2: MountableFS 核心实现 (2 周) ✅ 已完成 - -**目标**: 实现核心的挂载文件系统 - -#### 任务清单 - -- [x] 实现 Radix Trie 路由 (使用 `radix_trie` crate) -- [x] 实现 `MountableFS` 结构体 -- [x] 实现插件注册机制 -- [x] 实现路径解析和路由 (`find_mount`) -- [x] 实现所有 `FileSystem` 方法的委托 -- [x] 实现并发安全 (使用 `Arc>`) -- [x] 编写并发测试 - -#### 对应 Go 代码 - -- `third_party/agfs/agfs-server/pkg/mountablefs/mountablefs.go` (967 行) - -#### 验收标准 - -- ✅ 可以动态挂载/卸载插件 -- [x] 高并发场景下无数据竞争 -- [x] 所有文件系统操作正确委托到挂载点 - ---- - -### 阶段 3: HTTP 服务器 (2 周) ✅ 已完成 - -**目标**: 实现与 Go 版本兼容的 HTTP API - -#### 任务清单 - -- [x] 实现 axum 路由 -- [x] 实现文件操作处理器 (`/api/v1/files`) -- [x] 实现目录操作处理器 (`/api/v1/directories`) -- [x] 实现挂载管理处理器 (`/api/v1/mount`, `/api/v1/unmount`) -- [x] 实现健康检查 (`/api/v1/health`) -- [x] 实现日志中间件 (tower TraceLayer) -- [ ] 实现指标收集 -- [x] 添加 CORS 支持 - -#### API 兼容性 - -必须与 Go 版本保持完全兼容的端点: - -``` -GET /api/v1/files?path=... # 读取文件 -PUT /api/v1/files?path=... # 写入文件 -POST /api/v1/files?path=... # 创建文件 -DELETE /api/v1/files?path=... # 删除文件 -GET /api/v1/stat?path=... # 获取元数据 - -GET /api/v1/directories?path=... # 列出目录 -POST /api/v1/directories?path=... # 创建目录 - -GET /api/v1/mounts # 列出挂载点 -POST /api/v1/mount # 挂载插件 -POST /api/v1/unmount # 卸载插件 - -GET /api/v1/health # 健康检查 -``` - -#### 验收标准 - -- ✅ 所有 API 端点可访问 -- ✅ API 响应格式与 Go 版本一致 -- ✅ 可以使用现有的 Python SDK 和 agfs-shell 连接 - ---- - -### 阶段 4: 基础插件 - MemFS (1 周) ✅ 已完成 - -**目标**: 实现最简单的内存文件系统 - -#### 任务清单 - -- [x] 实现 `MemFSPlugin` 结构体 -- [x] 实现内存存储 (使用 `Arc>>`) -- [x] 实现所有文件系统操作 -- [x] 编写完整的测试用例 -- [ ] 添加性能基准测试 - -#### 对应 Go 代码 - -- `third_party/agfs/agfs-server/pkg/plugins/memfs/memfs.go` -- `third_party/agfs/agfs-server/pkg/plugins/memfs/memoryfs.go` - -#### 验收标准 - -- ✅ 可以创建/读取/写入/删除文件 -- ✅ 可以列出目录 -- ✅ 数据存储在内存中,重启后丢失 -- [ ] 性能与 Go 版本相当或更好 - ---- - -### 阶段 5: 基础插件 - KVFS (1 周) ✅ 已完成 - -**目标**:**: 实现键值存储文件系统 - -#### 任务清单 - -- [x] 实现 `KVFSPlugin` 结构体 -- [x] 实现键值存储后端 (内存) -- [x] 实现文件名到键的映射 -- [ ] 实现持久化(可选) - -#### 对应 Go 代码 - -- `third_party/agfs/agfs-server/pkg/plugins/kvfs/` - -#### 验收标准 - -- ✅ 写入操作将文件内容作为值存储 -- ✅ 读取操作返回对应键的值 -- ✅ 列出目录返回所有键 - ---- - -### 阶段 6: 基础插件 - QueueFS (1-2 周) ✅ 已完成 - -**目标**: 实现消息队列文件系统 - -#### 任务清单 - -- [x] 实现 `QueueFSPlugin` 结构体 -- [x] 实现队列后端 (内存 VecDeque) -- [x] 实现控制文件: - - `enqueue`: 写入消息 - - `dequeue`: 读取并移除消息 - - `peek`: 查看但不移除 - - `size`: 获取队列长度 - - `clear`: 清空队列 -- [x] 实现消息 ID 生成 (UUID) -- [x] 编写并发测试 (多生产者/多消费者) - -#### 对应 Go 代码 - -- `third_party/agfs/agfs-server/pkg/plugins/queuefs/` - -#### 验收标准 - -- ✅ 多个写入者可以并发入队 -- ✅ 多个读取者可以并发出队 -- ✅ 消息不丢失、不重复 -- ✅ 性能满足实际使用需求 - ---- - -### 阶段 7: 高级插件 🔄 进行中 - -**目标**: 实现生产环境必需的插件 - -#### S3FS ✅ 已完成 - -- [x] 集成 AWS SDK for Rust (`aws-sdk-s3`, feature-gated under `s3`) -- [x] 实现文件上传/下载 (get_object, put_object, get_object_range) -- [x] 实现目录列举 (list_objects with prefix/delimiter, pagination) -- [x] 支持大文件分片上传 (batch delete 1000 per call) -- [x] S3Client wrapper (client.rs): 全面支持 AWS S3/MinIO/LocalStack/TOS -- [x] Dual-layer LRU cache (cache.rs): ListDirCache (30s TTL) + StatCache (60s TTL, 5x capacity) -- [x] S3FileSystem: 完整 FileSystem trait 实现 -- [x] S3FSPlugin: 13 个配置参数, validate, readme -- [x] 3 种 Directory Marker Modes: none/empty/nonempty (TOS 兼容) -- [x] Feature-gated: `cargo build --features s3` (不影响无 S3 需求的构建) -- [x] 9 个单元测试 (cache + path normalization + plugin validate) - -#### SQLFS ✅ 已完成 - -- [x] 使用 `rusqlite` -- [x] 支持 SQLite (MySQL/TiDB 预留接口) -- [x] 实现文件元数据存储 -- [x] 实现文件数据存储 (数据库 BLOB) -- [x] LRU 缓存 (目录列表) -- [x] Mutex 线程安全 -- [x] 17 个单元测试 - -#### ProxyFS - -- [ ] 实现对远程 AGFS 的代理 -- [ ] 实现请求转发 -- [ ] 实现连接池 - -#### 其他插件 - -- [ ] HTTPFS (提供文件下载服务) -- [ ] StreamFS (流式数据) -- [ ] HeartbeatFS (心跳监控) -- [ ] LocalFS (本地文件系统挂载) - ---- - -### 阶段 8: 配置系统 ⏳ 部分完成 - -**目标**: 支持与 Go 版本兼容的 YAML 配置 - -#### 任务清单 - -- [x] 定义配置结构体 (使用 `serde`) -- [x] 实现配置文件加载 (`config.yaml`) -- [x] 支持环境变量覆盖 -- [x] 实现配置验证 (基础) -- [ ] 提供示例配置文件 -- [ ] 支持从 YAML 配置自动挂载插件 - -#### 配置格式 (兼容 Go 版本) - -```yaml -server: - address: ":8080" - log_level: "info" - -plugins: - memfs: - enabled: true - path: "/memfs" - - kvfs: - enabled: true - path: "/kvfs" - - queuefs: - enabled: true - path: "/queuefs" -``` - ---- - -### 阶段 9: Shell 客户端 (2 周) - -**目标**: 实现交互式 shell (可选) - -#### 任务清单 - -- [ ] 实现 REPL (使用 `rustyline`) -- [ ] 实现内置命令 (`ls`, `cat`, `echo`, `mkdir`, `rm`, 等) -- [ ] 实现命令补全 -- [ ] 支持脚本执行 -- [ ] 添加颜色输出 - ---- - -### 阶段 10: 测试与优化 (2-3 周) - -**目标**: 完善测试覆盖和性能优化 - -#### 任务清单 - -- [ ] 编写集成测试 (端到端) -- [ ] 性能基准测试 (与 Go 版本对比) -- [ ] 压力测试 (高并发场景) -- [ ] 内存泄漏检测 -- [ ] 代码覆盖率 > 80% -- [ ] 文档完善 - ---- - -## 纯 Rust 插件系统设计 - -### 设计理念 - -1. **类型安全**: 使用 trait 确保编译时类型检查 -2. **零抽象成本**: 没有虚函数调用开销 (通过 monomorphization) -3. **异步优先**: 所有操作都是异步的 -4. **易于测试**: 插件可以 mock 和单元测试 - -### 插件接口 - -```rust -/// 插件配置参数元数据 -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ConfigParameter { - pub name: String, - pub r#type: String, // "string", "int", "bool", "string_list" - pub required: bool, - pub default: Option, - pub description: String, -} - -/// 插件配置值 -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PluginConfig { - pub name: String, - pub mount_path: String, - pub params: HashMap, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(untagged)] -pub enum ConfigValue { - String(String), - Int(i64), - Bool(bool), - StringList(Vec), -} - -/// 服务插件 trait -#[async_trait] -pub trait ServicePlugin: Send + Sync + 'static { - /// 插件唯一名称 - fn name(&self) -> &str; - - /// 插件版本 - fn version(&self) -> &str { - "0.1.0" - } - - /// 插件描述 - fn description(&self) -> &str { - "" - } - - /// 获取 README 文档 - fn readme(&self) -> &str; - - /// 验证配置 - async fn validate(&self, config: &PluginConfig) -> Result<()>; - - /// 初始化插件,返回文件系统实例 - async fn initialize( - &self, - config: PluginConfig - ) -> Result>; - - /// 关闭插件 - async fn shutdown(&self) -> Result<()>; - - /// 获取支持的配置参数 - fn config_params(&self) -> &[ConfigParameter]; -} - -/// 插件注册表 -pub struct PluginRegistry { - plugins: HashMap>, -} - -impl PluginRegistry { - pub fn new() -> Self { - Self { - plugins: HashMap::new(), - } - } - - /// 注册插件 - pub fn register(&mut self, plugin: P) { - let name = plugin.name().to_string(); - self.plugins.insert(name, Box::new(plugin)); - } - - /// 获取插件 - pub fn get(&self, name: &str) -> Option<&dyn ServicePlugin> { - self.plugins.get(name).map(|p| p.as_ref()) - } - - /// 列出所有插件 - pub fn list(&self) -> Vec<&str> { - self.plugins.keys().map(|s| s.as_str()).collect() - } -} -``` - -### 插件实现示例: MemFS - -```rust -use crate::core::{FileSystem, ServicePlugin, PluginConfig, ConfigParameter}; -use std::collections::HashMap; -use std::sync::Arc; -use tokio::sync::RwLock; - -/// MemFS 插件 -pub struct MemFSPlugin; - -impl ServicePlugin for MemFSPlugin { - fn name(&self) -> &str { - "memfs" - } - - fn readme(&self) -> &str { - r#"MemFS - In-memory File System - -A simple file system that stores data in memory. All data is lost -when the server restarts. - -Usage: - echo "hello" > /memfs/test.txt - cat /memfs/test.txt -"# - } - - async fn validate(&self, config: &PluginConfig) -> Result<()> { - // MemFS 没有必需的配置参数 - Ok(()) - } - - async fn initialize( - &self, - _config: PluginConfig - ) -> Result> { - Ok(Box::new(MemFileSystem::new())) - } - - async fn shutdown(&self) -> Result<()> { - Ok(()) - } - - fn config_params(&self) -> &[ConfigParameter] { - &[] - } -} - -/// MemFS 实现文件系统 -struct MemFileSystem { - files: Arc>>>, -} - -impl MemFileSystem { - fn new() -> Self { - Self { - files: Arc::new(RwLock::new(HashMap::new())), - } - } -} - -#[async_trait] -impl FileSystem for MemFileSystem { - async fn create(&self, path: &str) -> Result<()> { - let mut files = self.files.write().await; - files.insert(path.to_string(), vec![]); - Ok(()) - } - - async fn read(&self, path: &str, _offset: u64, _size: u64) -> Result> { - let files = self.files.read().await; - files.get(path) - .cloned() - .ok_or_else(|| Error::NotFound(path.to_string())) - } - - async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { - let mut files = self.files.write().await; - let file = files.entry(path.to_string()).or_insert_with(Vec::new); - - let offset = offset as usize; - if offset >= file.len() { - file.resize(offset, 0); - } - - let write_end = offset + data.len(); - file[offset..write_end].copy_from_slice(data); - Ok(data.len() as u64) - } - - // ... 其他方法实现 -} -``` - ---- - -## Go/Rust 切换机制 - -### 策略 - -Go 和 Rust 版本作为独立进程运行,通过以下方式切换: - -1. **环境变量控制** - ```bash - export AGFS_IMPL=rust # 使用 Rust 版本 - export AGFS_IMPL=go # 使用 Go 版本 (默认) - ``` - -2. **统一入口脚本** - ```bash - # scripts/agfs-server - if [[ "$AGFS_IMPL" == "rust" ]]; then - cargo run --bin ragfs-server -- "$@" - else - go run ./third_party/agfs/agfs-server/cmd/server/main.go "$@" - fi - ``` - -3. **Makefile 目标** - ```makefile - # 运行 Go 版本 - run-agfs-go: - cd third_party/agfs/agfs-server && make run - - # 运行 Rust 版本 - run-agfs-rust: - cargo run --bin ragfs-server - - # 运行 (根据 AGFS_IMPL 环境变量) - run-agfs: - @echo "Running AGFS (impl=$(AGFS_IMPL))..." - @if [ "$(AGFS_IMPL)" = "rust" ]; then \ - cargo run --bin ragfs-server; \ - else \ - cd third_party/agfs/agfs-server && make run; \ - fi - ``` - -### 兼容性保证 - -#### 1. API 兼容 - -两个版本必须实现完全相同的 HTTP API,包括: -- 请求格式 -- 响应格式 -- 错误码 -- 超时行为 - -#### 2. 配置兼容 - -使用相同的 `config.yaml` 格式,Rust 版本可以读取 Go 版本的配置。 - -#### 3. SDK 兼容 - -Python SDK 应该能够无缝连接到任一版本。 - -### 回退机制 - -如果 Rust 版本出现问题,可以通过以下方式快速回退: - -```bash -# 立即切换回 Go 版本 -export AGFS_IMPL=go - -# 重启服务 -systemctl restart agfs -``` - -### A/B 测试 - -支持同时运行两个版本进行对比: - -```bash -# 在不同端口运行 -cargo run --bin ragfs-server -- --port 8081 -cd third_party/agfs/agfs-server && go run ./cmd/server --port 8080 - -# 运行对比测试 -./scripts/compare_servers.sh http://localhost:8080 http://localhost:8081 -``` - ---- - -## 技术选型 - -### 运行时与网络 - -| 组件 | Go 实现 | Rust 选择 | 理由 | -|------|---------|-----------|------| -| HTTP 服务器 | net/http | axum | 基于 tower 生态,类型安全,性能优秀 | -| 异步运行时 | goroutines | tokio | 最成熟,生态完善 | -| HTTP 客户端 | net/http | reqwest/hyper | 与 tokio 集成良好 | - -### 数据结构 - -| 组件 | Go 实现 | Rust 选择 | 理由 | -|------|---------|-----------|------| -| 并发控制 | sync.RWMutex | tokio::sync::RwLock | 异步安全 | -| 路径路由 | go-immutable-radix | radix_trie | 无锁读取,性能好 | -| UUID | google/uuid | uuid (serde) | 功能完整 | - -### 序列化与配置 - -| 组件 | Go 实现 | Rust 选择 | 理由 | -|------|---------|-----------|------| -| JSON | encoding/json | serde_json | serde 生态,编译时检查 | -| YAML | gopkg.in/yaml.v3 | serde_yaml | 基于 serde | -| TOML | - | toml (可选) | 配置文件可选格式 | - -### 数据库 - -| 组件 | Go 实现 | Rust 选择 | 理由 | -|------|---------|-----------|------| -| SQLite | mattn/go-sqlite3 | rusqlite | 纯 Rust,无 CGO | -| SQL 通用 | - | sqlx | 编译时查询检查 | - -### 云服务 - -| 组件 | Go 实现 | Rust 选择 | 理由 | -|------|---------|-----------|------| -| AWS SDK | aws-sdk-go-v2 | aws-sdk-rust | 官方支持,性能好 | -| S3 | aws-sdk-go-v2/service/s3 | aws-s3 | 简化的 S3 客户端 | - -### 日志与追踪 - -| 组件 | Go 实现 | Rust 选择 | 理由 | -|------|---------|-----------|------| -| 日志 | logrus | tracing | 结构化日志,与 tokio 集成 | -| 追踪 | - | tracing-opentelemetry | OpenTelemetry 支持 | - -### 开发工具 - -| 组件 | Go 实现 | Rust 选择 | 理由 | -|------|---------|-----------|------| -| CLI 解析 | flag | clap | 功能强大,derive 宏 | -| 测试 | testing | built-in + criterion | 内置测试 + 性能基准 | -| 格式化 | gofmt | rustfmt | 官方工具 | -| Linting | golangci-lint | clippy | 编译器内建 | - ---- - -## 风险评估 - -### 高风险 - -1. **插件系统设计变更** - - 风险: 从 C ABI 改为 Rust trait,外部插件需要重写 - - 缓解: 提供 Go 版本插件作为参考,提供迁移指南 - -2. **性能回归** - - 风险: 初始实现可能性能不如 Go 版本 - - 缓解: 并行开发,进行性能对比和优化 - -3. **API 不兼容** - - 风险: 实现细节差异导致行为不同 - - 缓解: 使用相同的测试套件测试两个版本 - -### 中风险 - -1. **异步编程复杂度** - - 风险: Tokio 异步模型比 goroutine 更复杂 - - 缓解: 团队培训,使用成熟的模式 - -2. **学习曲线** - - 风险: 团队成员不熟悉 Rust - - 缓解: 提供培训,结对编程 - -3. **依赖更新** - - 风险: Rust 生态快速变化,API 可能变动 - - 缓解: 使用稳定版本,定期更新 - -### 低风险 - -1. **测试覆盖** - - 风险: 重写时遗漏测试 - - 缓解: 测试驱动开发,代码评审 - -2. **文档缺失** - - 风险: 代码复杂但文档不完善 - - 缓解: 要求所有公共 API 有文档 - ---- - -## 里程碑 - -### Milestone 0.1: MVP (4 周) ✅ 已完成 - -**目标**: 可以运行的最小可用版本 - -- [x] 基础设施和核心 trait -- [x] MountableFS 实现 -- [x] HTTP 服务器 -- [x] MemFS 插件 -- [x] API 兼容性验证 - -**验收**: 可以运行 Rust 版本服务器,与 Python SDK 交互 - -**完成情况**: -- `core/` 模块完整: filesystem.rs, mountable.rs, plugin.rs, errors.rs, types.rs -- MountableFS 支持动态 mount/unmount,路径路由,并发安全 -- HTTP 服务器 (axum): 完整 REST API (files/directories/mounts/health) -- MemFS 插件: 完整文件系统操作 + 6 个测试 -- 所有 62 个单元测试通过 - ---- - -### Milestone 0.2: 基础插件完整 (2 周) ✅ 已完成 - -**目标**: 完成所有基础插件 - -- [x] KVFS — 内存键值存储,支持嵌套 key,6 个测试 -- [x] QueueFS — 基于控制文件的消息队列 (enqueue/dequeue/peek/size/clear),UUID 消息 ID,并发安全,8 个测试 -- [x] 基础配置系统 — CLI args (clap) + YAML 配置文件加载 + 环境变量 - -**验收**: 可以使用所有基础插件功能 ✅ - ---- - -### Milestone 0.3: 生产就绪 (4 周) 🔄 进行中 - -**目标**: 可以在生产环境使用 - -- [x] SQLFS — SQLite 后端,Mutex 线程安全,LRU 缓存,5MB 文件限制,17 个测试 -- [x] S3FS — AWS SDK for Rust, S3/MinIO/TOS 兼容, dual-layer cache, feature-gated, 9 个测试 -- [ ] 完善的日志和指标 -- [ ] 完整的测试覆盖 -- [ ] 文档完善 - -**当前进展**: -- SQLFS 已完成并通过所有测试 (backend.rs + cache.rs + mod.rs) -- S3FS 已完成: client.rs + cache.rs + mod.rs, feature-gated under `s3` -- SQLFSPlugin 和 S3FSPlugin 已注册到 server/main.rs -- 全部 71 个单元测试通过 (含 s3 feature) -- 下一步: 完善日志/指标、测试覆盖、文档 - -**验收**: 可以在生产环境部署并切换 - ---- - -### Milestone 1.0: 功能完整 (8 周) 🔄 进行中 - -**目标**: 功能与 Go 版本对等 - -- [x] 提供 Python wrapper (ragfs-python),用于 OpenViking 内联集成 -- [ ] 支持切换和功能回滚,将默认实现切换为 Rust 版本 - -**当前进展**: -- ragfs-python crate 已完成 (crates/ragfs-python/): PyO3 native binding -- RAGFSBindingClient 类,API 兼容 Go AGFSBindingClient -- 支持所有核心操作: ls/read/write/create/mkdir/rm/stat/mv/chmod/touch -- 支持 mount/unmount/mounts 插件管理 -- 所有内置插件可用: memfs, kvfs, queuefs, sqlfs -- maturin develop 构建集成 -- openviking/pyagfs/__init__.py 已更新: Rust 优先 -> Go fallback -- Python 端到端测试全部通过 (memfs + sqlfs + kvfs + queuefs) - ---- - -## 参考资源 - -### Go 源代码 - -- Server: `third_party/agfs/agfs-server/` -- SDK: `third_party/agfs/agfs-sdk/` -- Shell: `third_party/agfs/agfs-shell/` -- FUSE: `third_party/agfs/agfs-fuse/` - -### Rust 生态 - -- axum: https://docs.rs/axum/latest/axum/ -- tokio: https://tokio.rs/ -- sqlx: https://docs.rs/sqlx/latest/sqlx/ -- aws-sdk-rust: https://github.com/awslabs/aws-sdk-rust - -### 相关项目 - -- Riker: https://github.com/riker-rs/riker (Actor 模型) -- async-std: https://async.rs/ (替代 tokio 的选择) - ---- - -## 更新日志 - -| 日期 | 版本 | 变更内容 | -|------|------|---------| -| 2026-04-03 | v1.0 | 初始计划创建 | -| 2026-04-03 | v1.1 | 标注 Milestone 0.1/0.2 完成,阶段 1-6 完成;SQLFS 修复 18 个编译错误并通过所有测试;开始 Milestone 0.3 | -| 2026-04-03 | v1.2 | S3FS 完成并通过 MinIO 端到端验证;ragfs-python PyO3 binding 完成 (Milestone 1.0 开始) | - ---- - -## 贡献 - -本计划是动态文档,随着项目进展持续更新。更新时请: - -1. 在更新日志中记录变更 -2. 更新相关章节 -3. 同步到团队 - ---- - -## 联系方式 - -如有问题或建议,请联系 OpenViking 团队。 diff --git a/crates/ragfs/ORIGIN.md b/crates/ragfs/ORIGIN.md index 616fa2ee8..453dbac44 100644 --- a/crates/ragfs/ORIGIN.md +++ b/crates/ragfs/ORIGIN.md @@ -9,3 +9,8 @@ RAGFS is based on the Go implementation of AGFS located at `third_party/agfs/` i ## License The original AGFS project is open source. This Rust implementation maintains compatibility with and references the original AGFS license. + +## Switch +export RAGFS_IMPL=auto (default to rust, with fallback to go) +export RAGFS_IMPL=rust +export RAGFS_IMPL=go \ No newline at end of file From 2c626bea0af97bac514b85b61d414dc03c522b0a Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 19:54:42 +0800 Subject: [PATCH 09/16] fix: grep level limit --- crates/ov_cli/src/client.rs | 2 ++ crates/ov_cli/src/commands/search.rs | 3 ++- crates/ov_cli/src/main.rs | 10 ++++++++-- openviking/client/local.py | 2 ++ openviking/server/routers/search.py | 2 ++ openviking/service/fs_service.py | 2 ++ openviking/storage/viking_fs.py | 24 ++++++++++++++++++++---- 7 files changed, 38 insertions(+), 7 deletions(-) diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 766878e24..b41dd361f 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -518,6 +518,7 @@ impl HttpClient { pattern: &str, ignore_case: bool, node_limit: i32, + level_limit: i32, ) -> Result { let body = serde_json::json!({ "uri": uri, @@ -525,6 +526,7 @@ impl HttpClient { "pattern": pattern, "case_insensitive": ignore_case, "node_limit": node_limit, + "level_limit": level_limit, }); self.post("/api/v1/search/grep", &body).await } diff --git a/crates/ov_cli/src/commands/search.rs b/crates/ov_cli/src/commands/search.rs index 02828fc02..d9bf713a0 100644 --- a/crates/ov_cli/src/commands/search.rs +++ b/crates/ov_cli/src/commands/search.rs @@ -48,11 +48,12 @@ pub async fn grep( pattern: &str, ignore_case: bool, node_limit: i32, + level_limit: i32, output_format: OutputFormat, compact: bool, ) -> Result<()> { let result = client - .grep(uri, exclude_uri, pattern, ignore_case, node_limit) + .grep(uri, exclude_uri, pattern, ignore_case, node_limit, level_limit) .await?; output_success(&result, output_format, compact); Ok(()) diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 3bae0bf51..eab8715de 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -421,6 +421,9 @@ enum Commands { default_value = "256" )] node_limit: i32, + /// Maximum depth level to traverse (default: 10) + #[arg(short = 'L', long = "level-limit", default_value = "10")] + level_limit: i32, }, /// Run file glob pattern search Glob { @@ -808,7 +811,8 @@ async fn main() { pattern, ignore_case, node_limit, - } => handle_grep(uri, exclude_uri, pattern, ignore_case, node_limit, ctx).await, + level_limit, + } => handle_grep(uri, exclude_uri, pattern, ignore_case, node_limit, level_limit, ctx).await, Commands::Glob { pattern, @@ -1433,9 +1437,10 @@ async fn handle_grep( pattern: String, ignore_case: bool, node_limit: i32, + level_limit: i32, ctx: CliContext, ) -> Result<()> { - let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit)]; + let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit), format!("-L {}", level_limit)]; if let Some(excluded) = &exclude_uri { params.push(format!("-x {}", excluded)); } @@ -1452,6 +1457,7 @@ async fn handle_grep( &pattern, ignore_case, node_limit, + level_limit, ctx.output_format, ctx.compact, ) diff --git a/openviking/client/local.py b/openviking/client/local.py index 94674c484..241d8a193 100644 --- a/openviking/client/local.py +++ b/openviking/client/local.py @@ -327,6 +327,7 @@ async def grep( case_insensitive: bool = False, node_limit: Optional[int] = None, exclude_uri: Optional[str] = None, + level_limit: int = 10, ) -> Dict[str, Any]: """Content search with pattern.""" return await self._service.fs.grep( @@ -336,6 +337,7 @@ async def grep( case_insensitive=case_insensitive, node_limit=node_limit, exclude_uri=exclude_uri, + level_limit=level_limit, ) async def glob(self, pattern: str, uri: str = "viking://") -> Dict[str, Any]: diff --git a/openviking/server/routers/search.py b/openviking/server/routers/search.py index a0aa97dbf..39f6feda7 100644 --- a/openviking/server/routers/search.py +++ b/openviking/server/routers/search.py @@ -67,6 +67,7 @@ class GrepRequest(BaseModel): pattern: str case_insensitive: bool = False node_limit: Optional[int] = None + level_limit: int = 10 class GlobRequest(BaseModel): @@ -162,6 +163,7 @@ async def grep( exclude_uri=request.exclude_uri, case_insensitive=request.case_insensitive, node_limit=request.node_limit, + level_limit=request.level_limit, ) return Response(status="ok", result=result) diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index ef37bb57c..a333ac6bf 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -165,6 +165,7 @@ async def grep( exclude_uri: Optional[str] = None, case_insensitive: bool = False, node_limit: Optional[int] = None, + level_limit: int = 10, ) -> Dict: """Content search.""" viking_fs = self._ensure_initialized() @@ -174,6 +175,7 @@ async def grep( exclude_uri=exclude_uri, case_insensitive=case_insensitive, node_limit=node_limit, + level_limit=level_limit, ctx=ctx, ) diff --git a/openviking/storage/viking_fs.py b/openviking/storage/viking_fs.py index 6619ca177..577063ca7 100644 --- a/openviking/storage/viking_fs.py +++ b/openviking/storage/viking_fs.py @@ -539,11 +539,21 @@ async def grep( exclude_uri: Optional[str] = None, case_insensitive: bool = False, node_limit: Optional[int] = None, + level_limit: int = 10, ctx: Optional[RequestContext] = None, ) -> Dict: """Content search by pattern or keywords. Grep search implemented at VikingFS layer, supports encrypted files. + + Args: + uri: Viking URI + pattern: Regular expression pattern to search for + exclude_uri: Optional URI prefix to exclude from search + case_insensitive: Whether to perform case-insensitive matching + node_limit: Maximum number of results to return + level_limit: Maximum depth level to traverse (default: 10) + ctx: Request context """ self._ensure_access(uri, ctx) @@ -555,11 +565,15 @@ async def grep( self._ensure_access(excluded_prefix, ctx) results = [] + files_scanned = 0 - async def search_recursive(current_uri: str): + async def search_recursive(current_uri: str, current_depth: int): if node_limit and len(results) >= node_limit: return + if current_depth > level_limit: + return + normalized_current_uri = self._normalize_uri(current_uri) if excluded_prefix and ( normalized_current_uri == excluded_prefix @@ -585,8 +599,10 @@ async def search_recursive(current_uri: str): continue if entry.get("isDir"): - await search_recursive(entry_uri) + await search_recursive(entry_uri, current_depth + 1) else: + nonlocal files_scanned + files_scanned += 1 try: content = await self.read(entry_uri, ctx=ctx) if isinstance(content, bytes): @@ -607,9 +623,9 @@ async def search_recursive(current_uri: str): except Exception as e: logger.debug(f"Failed to grep {entry_uri}: {e}") - await search_recursive(uri) + await search_recursive(uri, 0) - return {"matches": results, "count": len(results)} + return {"matches": results, "match_count": len(results), "files_scanned": files_scanned} async def stat(self, uri: str, ctx: Optional[RequestContext] = None) -> Dict[str, Any]: """ From f2be89b8d954aa054c6ca21506e3857556f5a626 Mon Sep 17 00:00:00 2001 From: openviking Date: Fri, 3 Apr 2026 20:04:06 +0800 Subject: [PATCH 10/16] fix: grep root --- crates/ov_cli/src/main.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index eab8715de..8bcea277f 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -1440,6 +1440,20 @@ async fn handle_grep( level_limit: i32, ctx: CliContext, ) -> Result<()> { + // Prevent grep from root directory to avoid excessive server load and timeouts + if uri == "viking://" || uri == "viking:///" { + eprintln!( + "Error: Cannot grep from root directory 'viking://'.\n\ + Grep from root would search across all scopes (resources, user, agent, session, queue, temp),\n\ + which may cause server timeout or excessive load.\n\ + Please specify a more specific scope, e.g.:\n\ + ov grep --uri=viking://resources '{}'\n\ + ov grep --uri=viking://user '{}'", + pattern, pattern + ); + std::process::exit(1); + } + let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit), format!("-L {}", level_limit)]; if let Some(excluded) = &exclude_uri { params.push(format!("-x {}", excluded)); From bc1d27a3c3ea678604c79168a195fc612a6081da Mon Sep 17 00:00:00 2001 From: openviking Date: Sat, 4 Apr 2026 23:36:12 +0800 Subject: [PATCH 11/16] fix: import error --- openviking/pyagfs/__init__.py | 68 ++++++++++++++++++++++------------- uv.lock | 49 +++++++++++++++++++------ 2 files changed, 81 insertions(+), 36 deletions(-) diff --git a/openviking/pyagfs/__init__.py b/openviking/pyagfs/__init__.py index 728c115b3..75704f3d2 100644 --- a/openviking/pyagfs/__init__.py +++ b/openviking/pyagfs/__init__.py @@ -40,16 +40,19 @@ def _find_ragfs_so(): Returns the path to the ``.so`` / ``.dylib`` / ``.pyd`` file, or *None*. """ - ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") or ".so" - # Exact match first: ragfs_python.cpython-312-darwin.so - exact = _LIB_DIR / f"ragfs_python{ext_suffix}" - if exact.exists(): - return str(exact) - # Glob fallback: ragfs_python.cpython-*.so / ragfs_python.*.pyd - for pattern in ("ragfs_python.cpython-*", "ragfs_python.*"): - matches = glob.glob(str(_LIB_DIR / pattern)) - if matches: - return matches[0] + try: + ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") or ".so" + # Exact match first: ragfs_python.cpython-312-darwin.so + exact = _LIB_DIR / f"ragfs_python{ext_suffix}" + if exact.exists(): + return str(exact) + # Glob fallback: ragfs_python.cpython-*.so / ragfs_python.*.pyd + for pattern in ("ragfs_python.cpython-*", "ragfs_python.*"): + matches = glob.glob(str(_LIB_DIR / pattern)) + if matches: + return matches[0] + except Exception: + pass return None @@ -59,25 +62,31 @@ def _load_rust_binding(): Searches openviking/lib/ for the pre-built native extension first, then falls back to a pip-installed ``ragfs_python`` package. """ - so_path = _find_ragfs_so() - if so_path: - spec = importlib.util.spec_from_file_location("ragfs_python", so_path) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - return mod.RAGFSBindingClient, None + try: + so_path = _find_ragfs_so() + if so_path: + spec = importlib.util.spec_from_file_location("ragfs_python", so_path) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod.RAGFSBindingClient, None - # Fallback: maybe ragfs_python was pip-installed (dev environment) - from ragfs_python import RAGFSBindingClient as _Rust + # Fallback: maybe ragfs_python was pip-installed (dev environment) + from ragfs_python import RAGFSBindingClient as _Rust - return _Rust, None + return _Rust, None + except Exception: + raise ImportError("Rust binding not available") def _load_go_binding(): """Attempt to load the Go (ctypes) binding client.""" - from .binding_client import AGFSBindingClient as _Go - from .binding_client import FileHandle as _GoFH + try: + from .binding_client import AGFSBindingClient as _Go + from .binding_client import FileHandle as _GoFH - return _Go, _GoFH + return _Go, _GoFH + except Exception: + raise ImportError("Go binding not available") def _resolve_binding(impl: str): @@ -112,14 +121,14 @@ def _resolve_binding(impl: str): client, fh = _load_rust_binding() _logger.info("RAGFS_IMPL=auto: loaded Rust binding (ragfs-python)") return client, fh - except ImportError: + except Exception: pass try: client, fh = _load_go_binding() _logger.info("RAGFS_IMPL=auto: Rust unavailable, loaded Go binding (libagfsbinding)") return client, fh - except (ImportError, OSError): + except Exception: pass _logger.warning( @@ -143,7 +152,16 @@ def get_binding_client(config_impl: str = "auto"): # Module-level defaults (used when importing ``from openviking.pyagfs import AGFSBindingClient``) -AGFSBindingClient, BindingFileHandle = _resolve_binding(_RAGFS_IMPL_ENV or "auto") +# Ensure module import never fails, even if bindings are unavailable +try: + AGFSBindingClient, BindingFileHandle = _resolve_binding(_RAGFS_IMPL_ENV or "auto") +except Exception: + _logger.warning( + "Failed to initialize AGFSBindingClient during module import; " + "AGFSBindingClient will be None. Use get_binding_client() for explicit handling." + ) + AGFSBindingClient = None + BindingFileHandle = None __all__ = [ "AGFSClient", diff --git a/uv.lock b/uv.lock index 4b6d38957..4e85bb7ff 100644 --- a/uv.lock +++ b/uv.lock @@ -1550,7 +1550,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, - { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -1558,7 +1557,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -1567,7 +1565,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -1576,7 +1573,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -1585,7 +1581,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -1594,7 +1589,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, - { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -3397,6 +3391,15 @@ dependencies = [ ] [package.optional-dependencies] +benchmark = [ + { name = "datasets" }, + { name = "langchain" }, + { name = "langchain-core" }, + { name = "langchain-openai" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tiktoken" }, +] bot = [ { name = "beautifulsoup4" }, { name = "croniter" }, @@ -3489,6 +3492,7 @@ build = [ dev = [ { name = "mypy" }, { name = "ruff" }, + { name = "setuptools-scm" }, ] doc = [ { name = "myst-parser", version = "4.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -3544,6 +3548,7 @@ requires-dist = [ { name = "cmake", marker = "extra == 'build'", specifier = ">=3.15" }, { name = "croniter", marker = "extra == 'bot'", specifier = ">=2.0.0" }, { name = "cryptography", specifier = ">=42.0.0" }, + { name = "datasets", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "ddgs", marker = "extra == 'bot'", specifier = ">=9.0.0" }, @@ -3561,9 +3566,12 @@ requires-dist = [ { name = "hvac", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "json-repair", specifier = ">=0.25.0" }, + { name = "langchain", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-core", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-openai", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, { name = "langfuse", marker = "extra == 'bot-langfuse'", specifier = ">=3.0.0" }, { name = "lark-oapi", marker = "extra == 'bot-feishu'", specifier = ">=1.0.0" }, - { name = "litellm", specifier = ">=1.0.0,<1.82.6" }, + { name = "litellm", specifier = ">=1.0.0,<1.83.1" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "markdownify", specifier = ">=0.11.0" }, { name = "msgpack", marker = "extra == 'bot'", specifier = ">=1.0.8" }, @@ -3576,6 +3584,7 @@ requires-dist = [ { name = "opensandbox", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, { name = "opensandbox-server", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, { name = "openviking", extras = ["bot", "bot-dingtalk", "bot-feishu", "bot-fuse", "bot-langfuse", "bot-opencode", "bot-qq", "bot-sandbox", "bot-slack", "bot-telegram"], marker = "extra == 'bot-full'" }, + { name = "pandas", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "pdfminer-six", specifier = ">=20251230" }, @@ -3607,12 +3616,14 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, { name = "setuptools", marker = "extra == 'build'", specifier = ">=61.0" }, { name = "setuptools-scm", marker = "extra == 'build'", specifier = ">=8.0" }, + { name = "setuptools-scm", marker = "extra == 'dev'", specifier = ">=10.0.0" }, { name = "slack-sdk", marker = "extra == 'bot-slack'", specifier = ">=3.26.0" }, { name = "socksio", marker = "extra == 'bot'", specifier = ">=1.0.0" }, { name = "sphinx", marker = "extra == 'doc'", specifier = ">=7.0.0" }, { name = "sphinx-rtd-theme", marker = "extra == 'doc'", specifier = ">=1.3.0" }, { name = "tabulate", specifier = ">=0.9.0" }, { name = "tavily-python", marker = "extra == 'bot'", specifier = ">=0.5.0" }, + { name = "tiktoken", marker = "extra == 'benchmark'", specifier = ">=0.5.0" }, { name = "tree-sitter", specifier = ">=0.23.0" }, { name = "tree-sitter-c-sharp", specifier = ">=0.23.0" }, { name = "tree-sitter-cpp", specifier = ">=0.23.0" }, @@ -3635,7 +3646,7 @@ requires-dist = [ { name = "xlrd", specifier = ">=2.0.1" }, { name = "xxhash", specifier = ">=3.0.0" }, ] -provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full"] +provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full", "benchmark"] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=9.0.2" }] @@ -5332,16 +5343,18 @@ wheels = [ [[package]] name = "setuptools-scm" -version = "9.2.2" +version = "10.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, { name = "setuptools" }, { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "vcs-versioning" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7b/b1/19587742aad604f1988a8a362e660e8c3ac03adccdb71c96d86526e5eb62/setuptools_scm-9.2.2.tar.gz", hash = "sha256:1c674ab4665686a0887d7e24c03ab25f24201c213e82ea689d2f3e169ef7ef57", size = 203385, upload-time = "2025-10-19T22:08:05.608Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a5/b1/2a6a8ecd6f9e263754036a0b573360bdbd6873b595725e49e11139722041/setuptools_scm-10.0.5.tar.gz", hash = "sha256:bbba8fe754516cdefd017f4456721775e6ef9662bd7887fb52ae26813d4838c3", size = 56748, upload-time = "2026-03-27T15:57:05.751Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/ea/ac2bf868899d0d2e82ef72d350d97a846110c709bacf2d968431576ca915/setuptools_scm-9.2.2-py3-none-any.whl", hash = "sha256:30e8f84d2ab1ba7cb0e653429b179395d0c33775d54807fc5f1dd6671801aef7", size = 62975, upload-time = "2025-10-19T22:08:04.007Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e1/342c4434df56aa537f6ce7647eefee521d96fbb828b08acd709865767652/setuptools_scm-10.0.5-py3-none-any.whl", hash = "sha256:f611037d8aae618221503b8fa89319f073438252ae3420e01c9ceec249131a0a", size = 21695, upload-time = "2026-03-27T15:57:03.969Z" }, ] [[package]] @@ -6180,6 +6193,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" }, ] +[[package]] +name = "vcs-versioning" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/42/d97a7795055677961c63a1eef8e7b19d5968ed992ed3a70ab8eb012efad8/vcs_versioning-1.1.1.tar.gz", hash = "sha256:fabd75a3cab7dd8ac02fe24a3a9ba936bf258667b5a62ed468c9a1da0f5775bc", size = 97575, upload-time = "2026-03-27T20:42:41.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/60/73603fbcdbe5e803855bcce4414f94eaeed449083bd8183e67161af78188/vcs_versioning-1.1.1-py3-none-any.whl", hash = "sha256:b541e2ba79fc6aaa3850f8a7f88af43d97c1c80649c01142ee4146eddbc599e4", size = 79851, upload-time = "2026-03-27T20:42:40.45Z" }, +] + [[package]] name = "volcengine" version = "1.0.216" From 20a7110bc05e8535d0312648fbddd4673e7198ff Mon Sep 17 00:00:00 2001 From: openviking Date: Sat, 4 Apr 2026 23:55:51 +0800 Subject: [PATCH 12/16] fix: rust code optimazation --- crates/ragfs/src/core/mountable.rs | 46 ++++++++++++----------- crates/ragfs/src/plugins/kvfs/mod.rs | 44 ++++++++++++++++++++++ crates/ragfs/src/plugins/memfs/mod.rs | 37 +++++++++++++++++- crates/ragfs/src/plugins/queuefs/mod.rs | 23 +++++++++--- crates/ragfs/src/plugins/sqlfs/backend.rs | 8 ++-- openviking/storage/viking_fs.py | 7 +++- 6 files changed, 134 insertions(+), 31 deletions(-) diff --git a/crates/ragfs/src/core/mountable.rs b/crates/ragfs/src/core/mountable.rs index 29bdd25f0..7bee90cfd 100644 --- a/crates/ragfs/src/core/mountable.rs +++ b/crates/ragfs/src/core/mountable.rs @@ -155,34 +155,38 @@ impl MountableFS { let normalized_path = normalize_path(path); let mounts = self.mounts.read().await; - // Find the longest matching prefix - let mut best_match: Option<(&String, &MountInfo)> = None; - - for (mount_path, mount_info) in mounts.iter() { - if normalized_path.starts_with(mount_path.as_str()) { - if let Some((best_path, _)) = best_match { - if mount_path.len() > best_path.len() { - best_match = Some((mount_path, mount_info)); - } - } else { - best_match = Some((mount_path, mount_info)); - } - } + // Find the longest matching prefix using radix trie + // Check for exact match first + if let Some(mount_info) = mounts.get(&normalized_path) { + return Ok((mount_info.clone(), "/".to_string())); } - match best_match { - Some((mount_path, mount_info)) => { - // Calculate relative path - let relative_path = if normalized_path.len() == mount_path.len() { - "/".to_string() + // Iterate through ancestors to find longest prefix match + // Start with the longest possible prefix and work backwards + let mut current = normalized_path.as_str(); + loop { + if let Some(mount_info) = mounts.get(current) { + let relative_path = if current == "/" { + normalized_path.clone() } else { - normalized_path[mount_path.len()..].to_string() + normalized_path[current.len()..].to_string() }; + return Ok((mount_info.clone(), relative_path)); + } - Ok((mount_info.clone(), relative_path)) + if current == "/" { + break; + } + + // Find parent path by removing last component + match current.rfind('/') { + Some(0) => current = "/", + Some(pos) => current = ¤t[..pos], + None => break, } - None => Err(Error::MountPointNotFound(normalized_path)), } + + Err(Error::MountPointNotFound(normalized_path)) } } diff --git a/crates/ragfs/src/plugins/kvfs/mod.rs b/crates/ragfs/src/plugins/kvfs/mod.rs index 6d981ee3f..3ced5969c 100644 --- a/crates/ragfs/src/plugins/kvfs/mod.rs +++ b/crates/ragfs/src/plugins/kvfs/mod.rs @@ -291,18 +291,62 @@ impl FileSystem for KVFileSystem { let new_key = Self::path_to_key(new_path); let mut store = self.store.write().await; + // Check old key exists let entry = store .get(&old_key) .ok_or_else(|| Error::not_found(old_path))? .clone(); + // Check new key doesn't exist if store.contains_key(&new_key) { return Err(Error::already_exists(new_path)); } + // Collect all child keys with old prefix + let old_prefix = if old_key == "/" { + "".to_string() + } else { + format!("{}/", old_key) + }; + let new_prefix = if new_key == "/" { + "".to_string() + } else { + format!("{}/", new_key) + }; + + let mut to_move = Vec::new(); + for key in store.keys() { + if key == &old_key { + continue; + } + if !old_prefix.is_empty() && key.starts_with(&old_prefix) { + // Check for conflicts with new path + let new_child_key = format!("{}{}", new_prefix, &key[old_prefix.len()..]); + if store.contains_key(&new_child_key) { + // Convert back to path for error message + let new_child_path = if new_child_key == "/" { + "/".to_string() + } else { + format!("/{}", new_child_key) + }; + return Err(Error::already_exists(&new_child_path)); + } + to_move.push(key.clone()); + } + } + + // Move the main entry store.remove(&old_key); store.insert(new_key, entry); + // Move all child entries + for old_child_key in to_move { + let new_child_key = format!("{}{}", new_prefix, &old_child_key[old_prefix.len()..]); + if let Some(child_entry) = store.remove(&old_child_key) { + store.insert(new_child_key, child_entry); + } + } + Ok(()) } diff --git a/crates/ragfs/src/plugins/memfs/mod.rs b/crates/ragfs/src/plugins/memfs/mod.rs index 301461269..3d9757a73 100644 --- a/crates/ragfs/src/plugins/memfs/mod.rs +++ b/crates/ragfs/src/plugins/memfs/mod.rs @@ -400,10 +400,45 @@ impl FileSystem for MemFileSystem { } } - // Move entry + // Collect all child entries if renaming a directory + let old_prefix = if old_normalized == "/" { + "/".to_string() + } else { + format!("{}/", old_normalized) + }; + let new_prefix = if new_normalized == "/" { + "/".to_string() + } else { + format!("{}/", new_normalized) + }; + + let mut to_move = Vec::new(); + for (path, _) in entries.iter() { + if path == &old_normalized { + continue; + } + if path.starts_with(&old_prefix) { + // Check for conflicts with new path + let new_child_path = format!("{}{}", new_prefix, &path[old_prefix.len()..]); + if entries.contains_key(&new_child_path) { + return Err(Error::already_exists(&new_child_path)); + } + to_move.push(path.clone()); + } + } + + // Move the main entry entries.remove(&old_normalized); entries.insert(new_normalized, entry); + // Move all child entries + for old_child_path in to_move { + let new_child_path = format!("{}{}", new_prefix, &old_child_path[old_prefix.len()..]); + if let Some(child_entry) = entries.remove(&old_child_path) { + entries.insert(new_child_path, child_entry); + } + } + Ok(()) } diff --git a/crates/ragfs/src/plugins/queuefs/mod.rs b/crates/ragfs/src/plugins/queuefs/mod.rs index 220853c62..e851ec03f 100644 --- a/crates/ragfs/src/plugins/queuefs/mod.rs +++ b/crates/ragfs/src/plugins/queuefs/mod.rs @@ -500,6 +500,14 @@ impl ServicePlugin for QueueFSPlugin { #[cfg(test)] mod tests { use super::*; + use serde::Deserialize; + + /// Helper struct to deserialize queue messages in tests + #[derive(Debug, Deserialize)] + struct TestQueueMessage { + id: String, + data: String, + } #[tokio::test] async fn test_queuefs_enqueue_dequeue() { @@ -521,10 +529,12 @@ mod tests { // Dequeue messages let result1 = fs.read("/test/dequeue", 0, 0).await.unwrap(); - assert_eq!(result1, data1); + let msg1: TestQueueMessage = serde_json::from_slice(&result1).unwrap(); + assert_eq!(msg1.data.as_bytes(), data1); let result2 = fs.read("/test/dequeue", 0, 0).await.unwrap(); - assert_eq!(result2, data2); + let msg2: TestQueueMessage = serde_json::from_slice(&result2).unwrap(); + assert_eq!(msg2.data.as_bytes(), data2); // Queue should be empty let result = fs.read("/test/dequeue", 0, 0).await; @@ -546,14 +556,17 @@ mod tests { // Peek should return the message without removing it let result1 = fs.read("/test/peek", 0, 0).await.unwrap(); - assert_eq!(result1, data); + let msg1: TestQueueMessage = serde_json::from_slice(&result1).unwrap(); + assert_eq!(msg1.data.as_bytes(), data); let result2 = fs.read("/test/peek", 0, 0).await.unwrap(); - assert_eq!(result2, data); + let msg2: TestQueueMessage = serde_json::from_slice(&result2).unwrap(); + assert_eq!(msg2.data.as_bytes(), data); // Dequeue should still work let result3 = fs.read("/test/dequeue", 0, 0).await.unwrap(); - assert_eq!(result3, data); + let msg3: TestQueueMessage = serde_json::from_slice(&result3).unwrap(); + assert_eq!(msg3.data.as_bytes(), data); } #[tokio::test] diff --git a/crates/ragfs/src/plugins/sqlfs/backend.rs b/crates/ragfs/src/plugins/sqlfs/backend.rs index 5beb84cc7..7c32dc3e3 100644 --- a/crates/ragfs/src/plugins/sqlfs/backend.rs +++ b/crates/ragfs/src/plugins/sqlfs/backend.rs @@ -157,9 +157,11 @@ impl DatabaseBackend for SQLiteBackend { .prepare_cached("SELECT COUNT(*) FROM files WHERE path = ?1") .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; - let count: i64 = stmt - .query_row(params![path], |row| row.get(0)) - .unwrap_or(0); + let count: i64 = match stmt.query_row(params![path], |row| row.get(0)) { + Ok(count) => count, + Err(rusqlite::Error::QueryReturnedNoRows) => 0, + Err(e) => return Err(Error::internal(format!("query error: {}", e))), + }; Ok(count > 0) } diff --git a/openviking/storage/viking_fs.py b/openviking/storage/viking_fs.py index 577063ca7..c6cef5dfd 100644 --- a/openviking/storage/viking_fs.py +++ b/openviking/storage/viking_fs.py @@ -625,7 +625,12 @@ async def search_recursive(current_uri: str, current_depth: int): await search_recursive(uri, 0) - return {"matches": results, "match_count": len(results), "files_scanned": files_scanned} + return { + "matches": results, + "count": len(results), + "match_count": len(results), + "files_scanned": files_scanned, + } async def stat(self, uri: str, ctx: Optional[RequestContext] = None) -> Dict[str, Any]: """ From 328f6aeee418d43f3870fd094ba5febb74e8bd76 Mon Sep 17 00:00:00 2001 From: openviking Date: Sat, 4 Apr 2026 23:58:45 +0800 Subject: [PATCH 13/16] fix: CI error --- .github/workflows/api_test.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/api_test.yml b/.github/workflows/api_test.yml index f82e562e1..189dfc7e6 100644 --- a/.github/workflows/api_test.yml +++ b/.github/workflows/api_test.yml @@ -57,14 +57,6 @@ jobs: with: python-version: '3.10' - - name: Cache Go modules - uses: actions/cache@v5 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go- - - name: Cache C++ extensions uses: actions/cache@v5 with: @@ -94,7 +86,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v6 with: - go-version: '1.22' + go-version: '1.25.1' - name: Install system dependencies (Ubuntu) if: runner.os == 'Linux' From 7595048ad95eeb0b5122a98b7235ff29dad52289 Mon Sep 17 00:00:00 2001 From: openviking Date: Sun, 5 Apr 2026 00:06:31 +0800 Subject: [PATCH 14/16] fix: CI go mod cache --- .github/workflows/api_test.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/api_test.yml b/.github/workflows/api_test.yml index 189dfc7e6..2c94b5bb1 100644 --- a/.github/workflows/api_test.yml +++ b/.github/workflows/api_test.yml @@ -57,6 +57,15 @@ jobs: with: python-version: '3.10' + - name: Cache Go modules + uses: actions/cache@v5 + continue-on-error: true + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('third_party/agfs/**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + - name: Cache C++ extensions uses: actions/cache@v5 with: From 3c51dc77befe3f5f3ccde052cfddb45c26d45a86 Mon Sep 17 00:00:00 2001 From: openviking Date: Sun, 5 Apr 2026 00:10:46 +0800 Subject: [PATCH 15/16] fix: grep level limit --- openviking/client/local.py | 2 +- openviking/server/routers/search.py | 2 +- openviking/service/fs_service.py | 2 +- openviking/storage/viking_fs.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openviking/client/local.py b/openviking/client/local.py index 241d8a193..a994843a6 100644 --- a/openviking/client/local.py +++ b/openviking/client/local.py @@ -327,7 +327,7 @@ async def grep( case_insensitive: bool = False, node_limit: Optional[int] = None, exclude_uri: Optional[str] = None, - level_limit: int = 10, + level_limit: int = 5, ) -> Dict[str, Any]: """Content search with pattern.""" return await self._service.fs.grep( diff --git a/openviking/server/routers/search.py b/openviking/server/routers/search.py index 39f6feda7..f8c14ead8 100644 --- a/openviking/server/routers/search.py +++ b/openviking/server/routers/search.py @@ -67,7 +67,7 @@ class GrepRequest(BaseModel): pattern: str case_insensitive: bool = False node_limit: Optional[int] = None - level_limit: int = 10 + level_limit: int = 5 class GlobRequest(BaseModel): diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index a333ac6bf..02a909ca7 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -165,7 +165,7 @@ async def grep( exclude_uri: Optional[str] = None, case_insensitive: bool = False, node_limit: Optional[int] = None, - level_limit: int = 10, + level_limit: int = 5, ) -> Dict: """Content search.""" viking_fs = self._ensure_initialized() diff --git a/openviking/storage/viking_fs.py b/openviking/storage/viking_fs.py index c6cef5dfd..3cbdfa439 100644 --- a/openviking/storage/viking_fs.py +++ b/openviking/storage/viking_fs.py @@ -539,7 +539,7 @@ async def grep( exclude_uri: Optional[str] = None, case_insensitive: bool = False, node_limit: Optional[int] = None, - level_limit: int = 10, + level_limit: int = 5, ctx: Optional[RequestContext] = None, ) -> Dict: """Content search by pattern or keywords. @@ -552,7 +552,7 @@ async def grep( exclude_uri: Optional URI prefix to exclude from search case_insensitive: Whether to perform case-insensitive matching node_limit: Maximum number of results to return - level_limit: Maximum depth level to traverse (default: 10) + level_limit: Maximum depth level to traverse (default: 5) ctx: Request context """ self._ensure_access(uri, ctx) From a2d3a46457ec123f14687e0bbf11623d82c76717 Mon Sep 17 00:00:00 2001 From: openviking Date: Sun, 5 Apr 2026 14:03:17 +0800 Subject: [PATCH 16/16] fix: CI --- .github/workflows/api_test.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/api_test.yml b/.github/workflows/api_test.yml index 2c94b5bb1..dacfa4d6d 100644 --- a/.github/workflows/api_test.yml +++ b/.github/workflows/api_test.yml @@ -66,14 +66,6 @@ jobs: restore-keys: | ${{ runner.os }}-go- - - name: Cache C++ extensions - uses: actions/cache@v5 - with: - path: openviking/pyagfs - key: ${{ runner.os }}-cpp-${{ hashFiles('**/CMakeLists.txt', '**/*.cpp', '**/*.h') }} - restore-keys: | - ${{ runner.os }}-cpp- - - name: Cache Python dependencies (Unix) if: runner.os != 'Windows' uses: actions/cache@v5