diff --git a/.github/workflows/ci-pipeline.yaml b/.github/workflows/ci-pipeline.yaml index bf8c66e..ef5c025 100644 --- a/.github/workflows/ci-pipeline.yaml +++ b/.github/workflows/ci-pipeline.yaml @@ -30,6 +30,9 @@ jobs: fetch-depth: 0 persist-credentials: false + - name: Free up storage + run: sudo rm -rf /usr/local/lib/android + - name: Build container and push to local registry env: CONTAINER: ${{ env.NAME }}:${{ env.TAG }} @@ -63,6 +66,9 @@ jobs: fetch-depth: 0 persist-credentials: false + - name: Free up storage + run: sudo rm -rf /usr/local/lib/android + - name: Download image uses: actions/download-artifact@v5 with: @@ -89,32 +95,32 @@ jobs: -e POSTGRES_PASSWORD=pass \ -e POSTGRES_DB=mydb \ -p 5432:5432 \ - ankane/pgvector + pgvector/pgvector:0.8.1-pg18-trixie - name: Start Redis if: matrix.db == 'redis' run: | podman run -d --name redis-test \ -p 6379:6379 \ - redis/redis-stack-server:6.2.6-v19 + redis/redis-stack-server:7.2.0-v19 - name: Start Elasticsearch if: matrix.db == 'elastic' run: | podman run -d --name es-test \ -e "discovery.type=single-node" \ - -e "xpack.security.enabled=true" \ + -e "xpack.security.enabled=false" \ + -e "xpack.security.http.ssl.enabled=false" \ -e "ELASTIC_PASSWORD=changeme" \ - -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ -p 9200:9200 \ - elasticsearch:8.11.1 + docker.elastic.co/elasticsearch/elasticsearch:8.12.1 - name: Start Qdrant if: matrix.db == 'qdrant' run: | podman run -d --name qdrant-test \ -p 6333:6333 \ - qdrant/qdrant + qdrant/qdrant:latest-unprivileged - name: Wait for DB to start run: sleep 30 diff --git a/README.md b/README.md index c0966c0..a978b80 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # 📚 vector-embedder -![Version: 1.0.0](https://img.shields.io/badge/Version-1.0.0-informational?style=flat-square) +![Version: 1.0.1](https://img.shields.io/badge/Version-1.0.1-informational?style=flat-square) [![Quay Repository](https://img.shields.io/badge/Quay.io-vector--embedder-blue?logo=quay)](https://quay.io/repository/validatedpatterns/vector-embedder) [![CI Pipeline](https://github.com/validatedpatterns-sandbox/vector-embedder/actions/workflows/ci-pipeline.yaml/badge.svg?branch=main)](https://github.com/validatedpatterns-sandbox/vector-embedder/actions/workflows/ci-pipeline.yaml) diff --git a/requirements.txt b/requirements.txt index a6ecb1f..400c0e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,47 +8,48 @@ aiofiles==24.1.0 # via unstructured-client aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.12.8 +aiohttp==3.13.0 # via langchain-community -aiosignal==1.3.2 +aiosignal==1.4.0 # via aiohttp annotated-types==0.7.0 # via pydantic -anyio==4.9.0 - # via httpx +anyio==4.11.0 + # via + # httpx + # langchain-redis asyncpg==0.30.0 # via langchain-postgres -attrs==25.3.0 +attrs==25.4.0 # via aiohttp -azure-core==1.34.0 +azure-core==1.35.1 # via azure-identity -azure-identity==1.23.0 +azure-identity==1.25.1 # via langchain-sqlserver backoff==2.2.1 # via unstructured -beautifulsoup4==4.13.4 +beautifulsoup4==4.14.2 # via # -r requirements.in # unstructured -certifi==2025.4.26 +certifi==2025.10.5 # via # elastic-transport # httpcore # httpx + # langchain-redis # requests -cffi==1.17.1 +cffi==2.0.0 # via cryptography -chardet==5.2.0 - # via unstructured -charset-normalizer==3.4.2 - # via requests -click==8.2.1 +charset-normalizer==3.4.3 + # via + # requests + # unstructured +click==8.3.0 # via # nltk # python-oxmsg -coloredlogs==15.0.1 - # via redisvl -cryptography==45.0.3 +cryptography==46.0.2 # via # azure-identity # msal @@ -60,34 +61,34 @@ dataclasses-json==0.6.7 # unstructured elastic-transport==8.17.1 # via elasticsearch -elasticsearch==8.18.1 +elasticsearch==8.19.1 # via langchain-elasticsearch -emoji==2.14.1 +emoji==2.15.0 # via unstructured -filelock==3.18.0 +filelock==3.19.1 # via # huggingface-hub # torch # transformers filetype==1.2.0 # via unstructured -frozenlist==1.6.2 +frozenlist==1.8.0 # via # aiohttp # aiosignal -fsspec==2025.5.1 +fsspec==2025.9.0 # via # huggingface-hub # torch -greenlet==3.2.2 +greenlet==3.2.4 # via sqlalchemy -grpcio==1.72.1 +grpcio==1.75.1 # via qdrant-client h11==0.16.0 # via httpcore -h2==4.2.0 +h2==4.3.0 # via httpx -hf-xet==1.1.3 +hf-xet==1.1.10 # via # -r requirements.in # huggingface-hub @@ -96,22 +97,23 @@ hpack==4.1.0 html5lib==1.1 # via unstructured httpcore==1.0.9 - # via httpx + # via + # httpx + # langchain-redis + # unstructured-client httpx==0.28.1 # via # langsmith # qdrant-client # unstructured-client -httpx-sse==0.4.0 +httpx-sse==0.4.2 # via langchain-community -huggingface-hub==0.32.4 +huggingface-hub==0.35.3 # via # langchain-huggingface # sentence-transformers # tokenizers # transformers -humanfriendly==10.0 - # via coloredlogs hyperframe==6.1.0 # via h2 idna==3.10 @@ -124,21 +126,23 @@ jinja2==3.1.6 # via # langchain-redis # torch -joblib==1.5.1 +joblib==1.5.2 # via # nltk # scikit-learn jsonpatch==1.33 # via langchain-core +jsonpath-ng==1.7.0 + # via redisvl jsonpointer==3.0.0 # via jsonpatch -langchain==0.3.25 +langchain==0.3.27 # via # -r requirements.in # langchain-community -langchain-community==0.3.24 +langchain-community==0.3.31 # via -r requirements.in -langchain-core==0.3.63 +langchain-core==0.3.78 # via # langchain # langchain-community @@ -151,61 +155,58 @@ langchain-core==0.3.63 # langchain-text-splitters langchain-elasticsearch==0.3.2 # via -r requirements.in -langchain-huggingface==0.2.0 +langchain-huggingface==0.3.1 # via -r requirements.in -langchain-postgres==0.0.14 +langchain-postgres==0.0.15 # via -r requirements.in -langchain-qdrant==0.2.0 +langchain-qdrant==0.2.1 # via -r requirements.in -langchain-redis==0.2.0 +langchain-redis==0.2.4 # via -r requirements.in langchain-sqlserver==0.1.2 # via -r requirements.in -langchain-text-splitters==0.3.8 +langchain-text-splitters==0.3.11 # via langchain langdetect==1.0.9 # via unstructured -langsmith==0.3.44 +langsmith==0.4.33 # via # langchain # langchain-community # langchain-core -lxml==5.4.0 +lxml==6.0.2 # via unstructured -markdown==3.8 +markdown==3.9 # via unstructured -markupsafe==3.0.2 +markupsafe==3.0.3 # via jinja2 marshmallow==3.26.1 # via dataclasses-json -ml-dtypes==0.4.1 +ml-dtypes==0.5.3 # via redisvl mpmath==1.3.0 # via sympy -msal==1.32.3 +msal==1.34.0 # via # azure-identity # msal-extensions msal-extensions==1.3.1 # via azure-identity -multidict==6.4.4 +multidict==6.7.0 # via # aiohttp # yarl mypy-extensions==1.1.0 # via typing-inspect -nest-asyncio==1.6.0 - # via unstructured-client networkx==3.5 # via torch -nltk==3.9.1 +nltk==3.9.2 # via unstructured numpy==1.26.4 # via # elasticsearch # langchain-community # langchain-postgres - # langchain-redis # langchain-sqlserver # ml-dtypes # pgvector @@ -215,48 +216,48 @@ numpy==1.26.4 # scipy # transformers # unstructured -nvidia-cublas-cu12==12.6.4.1 +nvidia-cublas-cu12==12.8.4.1 # via # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch -nvidia-cuda-cupti-cu12==12.6.80 +nvidia-cuda-cupti-cu12==12.8.90 # via torch -nvidia-cuda-nvrtc-cu12==12.6.77 +nvidia-cuda-nvrtc-cu12==12.8.93 # via torch -nvidia-cuda-runtime-cu12==12.6.77 +nvidia-cuda-runtime-cu12==12.8.90 # via torch -nvidia-cudnn-cu12==9.5.1.17 +nvidia-cudnn-cu12==9.10.2.21 # via torch -nvidia-cufft-cu12==11.3.0.4 +nvidia-cufft-cu12==11.3.3.83 # via torch -nvidia-cufile-cu12==1.11.1.6 +nvidia-cufile-cu12==1.13.1.3 # via torch -nvidia-curand-cu12==10.3.7.77 +nvidia-curand-cu12==10.3.9.90 # via torch -nvidia-cusolver-cu12==11.7.1.2 +nvidia-cusolver-cu12==11.7.3.90 # via torch -nvidia-cusparse-cu12==12.5.4.2 +nvidia-cusparse-cu12==12.5.8.93 # via # nvidia-cusolver-cu12 # torch -nvidia-cusparselt-cu12==0.6.3 +nvidia-cusparselt-cu12==0.7.1 # via torch -nvidia-nccl-cu12==2.26.2 +nvidia-nccl-cu12==2.27.3 # via torch -nvidia-nvjitlink-cu12==12.6.85 +nvidia-nvjitlink-cu12==12.8.93 # via # nvidia-cufft-cu12 # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 # torch -nvidia-nvtx-cu12==12.6.77 +nvidia-nvtx-cu12==12.8.90 # via torch olefile==0.47 # via python-oxmsg -orjson==3.10.18 +orjson==3.11.3 # via langsmith -packaging==24.2 +packaging==25.0 # via # huggingface-hub # langchain-core @@ -265,27 +266,29 @@ packaging==24.2 # transformers pgvector==0.3.6 # via langchain-postgres -pillow==11.2.1 +pillow==11.3.0 # via sentence-transformers -portalocker==2.10.1 +ply==3.11 + # via jsonpath-ng +portalocker==3.2.0 # via qdrant-client -propcache==0.3.1 +propcache==0.4.0 # via # aiohttp # yarl -protobuf==6.31.1 +protobuf==6.32.1 # via qdrant-client -psutil==7.0.0 +psutil==7.1.0 # via unstructured -psycopg==3.2.9 +psycopg==3.2.10 # via langchain-postgres -psycopg-binary==3.2.9 +psycopg-binary==3.2.10 # via -r requirements.in psycopg-pool==3.2.6 # via langchain-postgres -pycparser==2.22 +pycparser==2.23 # via cffi -pydantic==2.11.5 +pydantic==2.12.0 # via # langchain # langchain-core @@ -295,9 +298,9 @@ pydantic==2.11.5 # qdrant-client # redisvl # unstructured-client -pydantic-core==2.33.2 +pydantic-core==2.41.1 # via pydantic -pydantic-settings==2.9.1 +pydantic-settings==2.11.0 # via langchain-community pyjwt==2.10.1 # via @@ -307,13 +310,13 @@ pyodbc==5.2.0 # via # -r requirements.in # langchain-sqlserver -pypdf==5.6.0 +pypdf==6.1.1 # via # -r requirements.in # unstructured-client python-dateutil==2.9.0.post0 # via elasticsearch -python-dotenv==1.1.0 +python-dotenv==1.1.1 # via # -r requirements.in # pydantic-settings @@ -323,11 +326,11 @@ python-magic==0.4.27 # via unstructured python-oxmsg==0.0.2 # via unstructured -python-ulid==3.0.0 +python-ulid==3.1.0 # via # langchain-redis # redisvl -pyyaml==6.0.2 +pyyaml==6.0.3 # via # huggingface-hub # langchain @@ -335,21 +338,21 @@ pyyaml==6.0.2 # langchain-core # redisvl # transformers -qdrant-client==1.14.2 +qdrant-client==1.15.1 # via # -r requirements.in # langchain-qdrant -rapidfuzz==3.13.0 +rapidfuzz==3.14.1 # via unstructured -redis==5.2.1 +redis==6.4.0 # via redisvl -redisvl==0.4.1 +redisvl==0.9.1 # via langchain-redis -regex==2024.11.6 +regex==2025.9.18 # via # nltk # transformers -requests==2.32.3 +requests==2.32.5 # via # azure-core # huggingface-hub @@ -364,19 +367,17 @@ requests-toolbelt==1.0.0 # via # langsmith # unstructured-client -safetensors==0.5.3 +safetensors==0.6.2 # via transformers -scikit-learn==1.6.1 +scikit-learn==1.7.2 # via sentence-transformers -scipy==1.15.3 +scipy==1.16.2 # via # scikit-learn # sentence-transformers -sentence-transformers==4.1.0 - # via - # -r requirements.in - # langchain-huggingface -simsimd==6.4.7 +sentence-transformers==5.1.1 + # via -r requirements.in +simsimd==6.5.3 # via elasticsearch six==1.17.0 # via @@ -386,9 +387,9 @@ six==1.17.0 # python-dateutil sniffio==1.3.1 # via anyio -soupsieve==2.7 +soupsieve==2.8 # via beautifulsoup4 -sqlalchemy==2.0.41 +sqlalchemy==2.0.43 # via # langchain # langchain-community @@ -396,8 +397,6 @@ sqlalchemy==2.0.41 # langchain-sqlserver sympy==1.14.0 # via torch -tabulate==0.9.0 - # via redisvl tenacity==9.1.2 # via # langchain-community @@ -405,11 +404,11 @@ tenacity==9.1.2 # redisvl threadpoolctl==3.6.0 # via scikit-learn -tokenizers==0.21.1 +tokenizers==0.22.1 # via # langchain-huggingface # transformers -torch==2.7.0 +torch==2.8.0 # via sentence-transformers tqdm==4.67.1 # via @@ -418,21 +417,22 @@ tqdm==4.67.1 # sentence-transformers # transformers # unstructured -transformers==4.52.4 - # via - # langchain-huggingface - # sentence-transformers -triton==3.3.0 +transformers==4.57.0 + # via sentence-transformers +triton==3.4.0 # via torch -typing-extensions==4.14.0 +typing-extensions==4.15.0 # via + # aiosignal # anyio # azure-core # azure-identity # beautifulsoup4 # elasticsearch + # grpcio # huggingface-hub # langchain-core + # langchain-redis # psycopg # psycopg-pool # pydantic @@ -446,26 +446,27 @@ typing-extensions==4.14.0 # unstructured typing-inspect==0.9.0 # via dataclasses-json -typing-inspection==0.4.1 +typing-inspection==0.4.2 # via # pydantic # pydantic-settings -unstructured==0.17.2 +unstructured==0.18.15 # via -r requirements.in -unstructured-client==0.36.0 +unstructured-client==0.42.3 # via unstructured -urllib3==2.4.0 +urllib3==2.5.0 # via # elastic-transport + # langchain-redis # qdrant-client # requests webencodings==0.5.1 # via html5lib -wrapt==1.17.2 +wrapt==1.17.3 # via unstructured -yarl==1.20.0 +yarl==1.22.0 # via aiohttp -zstandard==0.23.0 +zstandard==0.25.0 # via langsmith # The following packages are considered to be unsafe in a requirements file: diff --git a/vector_db/elastic_provider.py b/vector_db/elastic_provider.py index adf27e3..f4ae00e 100644 --- a/vector_db/elastic_provider.py +++ b/vector_db/elastic_provider.py @@ -85,4 +85,4 @@ def add_documents(self, docs: List[Document]) -> None: Args: docs (List[Document]): List of documents to index. """ - self.db.add_documents(docs) + self.db.add_documents(documents=docs)