From ed1686526c85f6baa9a073e9f9396d1115fdbc2e Mon Sep 17 00:00:00 2001 From: cbornet Date: Mon, 17 Mar 2025 18:52:15 +0100 Subject: [PATCH 1/2] Add tests in CI --- .github/workflows/lint.yml | 51 ++++++++++++ .github/workflows/python_test.yml | 50 ++++++++++++ langchain/langchain_vectorize/retrievers.py | 22 ++++- langchain/tests/test_retrievers.py | 90 ++++++++++++++------- langchain/uv.lock | 14 ---- 5 files changed, 182 insertions(+), 45 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/python_test.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..33bf19e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,51 @@ +name: Python lint + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint_langchain: + name: Lint LangChain - Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./langchain + strategy: + matrix: + python-version: ['3.9', '3.13'] + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.ref }} + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "langchain/uv.lock" + - name: "Set up Python" + uses: actions/setup-python@v5 + with: + python-version-file: "langchain/pyproject.toml" + - name: Restore uv cache + uses: actions/cache@v4 + with: + path: /tmp/.uv-cache + key: uv-langchain-${{ hashFiles('langchain/uv.lock') }} + restore-keys: | + uv-langchain-${{ hashFiles('langchain/uv.lock') }} + uv-${{ runner.os }} + - name: Install the project + run: uv sync --dev + - name: Run ruff format check + run: uv run ruff format --check + - name: Run ruff check + run: uv run ruff check + # - name: Run mypy + # run: uv run mypy . + - name: Minimize uv cache + run: uv cache prune --ci diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml new file mode 100644 index 0000000..a5c22da --- /dev/null +++ b/.github/workflows/python_test.yml @@ -0,0 +1,50 @@ +name: Python tests + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build-langchain: + name: LangChain Unit Tests - Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./langchain + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.ref }} + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "langchain/uv.lock" + - name: "Set up Python" + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Restore uv cache + uses: actions/cache@v4 + with: + path: /tmp/.uv-cache + key: uv-langchain-${{ hashFiles('uv.lock') }} + restore-keys: | + uv-langchain-${{ hashFiles('uv.lock') }} + - name: Install the project + run: uv sync --dev + - name: Run unit tests + env: + VECTORIZE_TOKEN: ${{ secrets.VECTORIZE_TOKEN }} + VECTORIZE_ORG: ${{ secrets.VECTORIZE_ORG }} + VECTORIZE_ENV: dev + run: uv run pytest tests -vv + - name: Minimize uv cache + run: uv cache prune --ci diff --git a/langchain/langchain_vectorize/retrievers.py b/langchain/langchain_vectorize/retrievers.py index a1d327a..54d1ed7 100644 --- a/langchain/langchain_vectorize/retrievers.py +++ b/langchain/langchain_vectorize/retrievers.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Literal, Optional import vectorize_client from langchain_core.documents import Document @@ -40,6 +40,8 @@ class VectorizeRetriever(BaseRetriever): api_token: str """The Vectorize API token.""" + environment: Literal["prod", "dev", "local", "staging"] = "prod" + """The Vectorize API environment.""" organization: Optional[str] = None # noqa: UP007 """The Vectorize organization ID.""" pipeline_id: Optional[str] = None # noqa: UP007 @@ -55,7 +57,23 @@ class VectorizeRetriever(BaseRetriever): @override def model_post_init(self, /, context: Any) -> None: - api = ApiClient(Configuration(access_token=self.api_token)) + header_name = None + header_value = None + if self.environment == "prod": + host = "https://api.vectorize.io/v1" + elif self.environment == "dev": + host = "https://api-dev.vectorize.io/v1" + elif self.environment == "local": + host = "http://localhost:3000/api" + header_name = "x-lambda-api-key" + header_value = self.api_token + else: + host = "https://api-staging.vectorize.io/v1" + api = ApiClient( + Configuration(host=host, access_token=self.api_token, debug=True), + header_name, + header_value, + ) self._pipelines = PipelinesApi(api) @staticmethod diff --git a/langchain/tests/test_retrievers.py b/langchain/tests/test_retrievers.py index cc404a3..e676666 100644 --- a/langchain/tests/test_retrievers.py +++ b/langchain/tests/test_retrievers.py @@ -1,28 +1,24 @@ import json import logging import os +import time from collections.abc import Iterator -from dataclasses import dataclass from pathlib import Path +from typing import Literal import pytest +import urllib3 import vectorize_client as v +from vectorize_client import ApiClient from langchain_vectorize.retrievers import VectorizeRetriever -@dataclass -class TestContext: - api_client: v.ApiClient - api_token: str - org_id: str - - @pytest.fixture(scope="session") def api_token() -> str: token = os.getenv("VECTORIZE_TOKEN") if not token: - msg = "Please set VECTORIZE_TOKEN environment variable" + msg = "Please set the VECTORIZE_TOKEN environment variable" raise ValueError(msg) return token @@ -31,21 +27,29 @@ def api_token() -> str: def org_id() -> str: org = os.getenv("VECTORIZE_ORG") if not org: - msg = "Please set VECTORIZE_ORG environment variable" + msg = "Please set the VECTORIZE_ORG environment variable" raise ValueError(msg) return org @pytest.fixture(scope="session") -def api_client(api_token: str) -> Iterator[TestContext]: +def environment() -> Literal["prod", "dev", "local", "staging"]: env = os.getenv("VECTORIZE_ENV", "prod") + if env not in ["prod", "dev", "local", "staging"]: + msg = "Invalid VECTORIZE_ENV environment variable." + raise ValueError(msg) + return env + + +@pytest.fixture(scope="session") +def api_client(api_token: str, environment: str) -> Iterator[ApiClient]: header_name = None header_value = None - if env == "prod": + if environment == "prod": host = "https://api.vectorize.io/v1" - elif env == "dev": + elif environment == "dev": host = "https://api-dev.vectorize.io/v1" - elif env == "local": + elif environment == "local": host = "http://localhost:3000/api" header_name = "x-lambda-api-key" header_value = api_token @@ -87,8 +91,6 @@ def pipeline_id(api_client: v.ApiClient, org_id: str) -> Iterator[str]: ), ) - import urllib3 - http = urllib3.PoolManager() this_dir = Path(__file__).parent file_path = this_dir / "research.pdf" @@ -137,7 +139,9 @@ def pipeline_id(api_client: v.ApiClient, org_id: str) -> Iterator[str]: config={}, ), ai_platform=v.AIPlatformSchema( - id=builtin_ai_platform, type=v.AIPlatformType.VECTORIZE, config=v.AIPlatformConfigSchema() + id=builtin_ai_platform, + type=v.AIPlatformType.VECTORIZE, + config=v.AIPlatformConfigSchema(), ), pipeline_name="Test pipeline", schedule=v.ScheduleSchema(type=v.ScheduleSchemaType.MANUAL), @@ -154,20 +158,48 @@ def pipeline_id(api_client: v.ApiClient, org_id: str) -> Iterator[str]: logging.exception("Failed to delete pipeline %s", pipeline_id) -def test_retrieve_init_args(api_token: str, org_id: str, pipeline_id: str) -> None: +def test_retrieve_init_args( + environment: Literal["prod", "dev", "local", "staging"], + api_token: str, + org_id: str, + pipeline_id: str, +) -> None: retriever = VectorizeRetriever( - api_token=api_token, organization=org_id, pipeline_id=pipeline_id, num_results=2 - ) - docs = retriever.invoke(input="What are you?") - assert len(docs) == 2 - - -def test_retrieve_invoke_args(api_token: str, org_id: str, pipeline_id: str) -> None: - retriever = VectorizeRetriever(api_token=api_token) - docs = retriever.invoke( - input="What are you?", + environment=environment, + api_token=api_token, organization=org_id, pipeline_id=pipeline_id, num_results=2, ) - assert len(docs) == 2 + start = time.time() + while True: + docs = retriever.invoke(input="What are you?") + if len(docs) == 2: + break + if time.time() - start > 180: + msg = "Docs not retrieved in time" + raise RuntimeError(msg) + time.sleep(1) + + +def test_retrieve_invoke_args( + environment: Literal["prod", "dev", "local", "staging"], + api_token: str, + org_id: str, + pipeline_id: str, +) -> None: + retriever = VectorizeRetriever(environment=environment, api_token=api_token) + start = time.time() + while True: + docs = retriever.invoke( + input="What are you?", + organization=org_id, + pipeline_id=pipeline_id, + num_results=2, + ) + if len(docs) == 2: + break + if time.time() - start > 180: + msg = "Docs not retrieved in time" + raise RuntimeError(msg) + time.sleep(1) diff --git a/langchain/uv.lock b/langchain/uv.lock index 598cd84..0b7a37e 100644 --- a/langchain/uv.lock +++ b/langchain/uv.lock @@ -307,7 +307,6 @@ dependencies = [ dev = [ { name = "mypy" }, { name = "pytest" }, - { name = "pytest-asyncio" }, { name = "ruff" }, ] @@ -321,7 +320,6 @@ requires-dist = [ dev = [ { name = "mypy", specifier = ">=1.13.0" }, { name = "pytest", specifier = ">=8.3.3" }, - { name = "pytest-asyncio", specifier = ">=0.24.0" }, { name = "ruff", specifier = ">=0.9.0,<0.10" }, ] @@ -624,18 +622,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, ] -[[package]] -name = "pytest-asyncio" -version = "0.25.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pytest" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467 }, -] - [[package]] name = "python-dateutil" version = "2.9.0.post0" From aebdf7988119ead9d9bc83ea700109f4e0d60055 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Mon, 17 Mar 2025 23:13:23 +0100 Subject: [PATCH 2/2] Update .github/workflows/python_test.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/python_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index a5c22da..9ffe449 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -35,9 +35,9 @@ jobs: uses: actions/cache@v4 with: path: /tmp/.uv-cache - key: uv-langchain-${{ hashFiles('uv.lock') }} + key: uv-langchain-${{ hashFiles('langchain/uv.lock') }} restore-keys: | - uv-langchain-${{ hashFiles('uv.lock') }} + uv-langchain-${{ hashFiles('langchain/uv.lock') }} - name: Install the project run: uv sync --dev - name: Run unit tests