From 20f258f84e9e9bc0c1653d3675321c393cd6c17b Mon Sep 17 00:00:00 2001
From: adeelehsan <aadeel.ehsan@gmail.com>
Date: Tue, 14 Apr 2026 20:40:44 +0500
Subject: [PATCH 1/2] Replace base_client.py with Fern-generated version
 including all modules

Replaces the manually maintained base_client.py with the Fern-generated
version that includes all new API modules: agents, agent_sessions,
agent_events, agent_artifacts, agent_schedules, tools, tool_servers,
hallucination_correctors, factual_consistency, instructions, metadata,
queries, table_extractors, llm.

Adds api_key-only authentication path (Fern only generates token and
OAuth paths, but Vectara supports direct API key auth).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/vectara/base_client.py | 1856 +++++++++++++-----------------------
 1 file changed, 649 insertions(+), 1207 deletions(-)

diff --git a/src/vectara/base_client.py b/src/vectara/base_client.py
index 0431445..7c76d3e 100644
--- a/src/vectara/base_client.py
+++ b/src/vectara/base_client.py
@@ -1,63 +1,46 @@
 # This file was auto-generated by Fern from our API Definition.
 
-import typing
-from .environment import VectaraEnvironment
+from __future__ import annotations
+
 import os
+import typing
+
 import httpx
 from .core.api_error import ApiError
-from .core.oauth_token_provider import OAuthTokenProvider
-from .core.client_wrapper import SyncClientWrapper
-from .corpora.client import CorporaClient
-from .upload.client import UploadClient
-from .documents.client import DocumentsClient
-from .chats.client import ChatsClient
-from .llms.client import LlmsClient
-from .generation_presets.client import GenerationPresetsClient
-from .encoders.client import EncodersClient
-from .rerankers.client import RerankersClient
-from .jobs.client import JobsClient
-from .users.client import UsersClient
-from .api_keys.client import ApiKeysClient
-from .app_clients.client import AppClientsClient
-from .query_history.client import QueryHistoryClient
-from .auth.client import AuthClient
-from .types.search_corpora_parameters import SearchCorporaParameters
-from .types.generation_parameters import GenerationParameters
-from .core.request_options import RequestOptions
-from .types.query_streamed_response import QueryStreamedResponse
-from .core.serialization import convert_and_respect_annotation_metadata
-import httpx_sse
-from .core.pydantic_utilities import parse_obj_as
-import json
-from .errors.bad_request_error import BadRequestError
-from .types.bad_request_error_body import BadRequestErrorBody
-from .errors.forbidden_error import ForbiddenError
-from .types.error import Error
-from .errors.not_found_error import NotFoundError
-from .types.not_found_error_body import NotFoundErrorBody
-from json.decoder import JSONDecodeError
-from .types.query_full_response import QueryFullResponse
-from .types.chat_parameters import ChatParameters
-from .types.chat_streamed_response import ChatStreamedResponse
-from .types.chat_full_response import ChatFullResponse
-from .core.client_wrapper import AsyncClientWrapper
-from .corpora.client import AsyncCorporaClient
-from .upload.client import AsyncUploadClient
-from .documents.client import AsyncDocumentsClient
-from .chats.client import AsyncChatsClient
-from .llms.client import AsyncLlmsClient
-from .generation_presets.client import AsyncGenerationPresetsClient
-from .encoders.client import AsyncEncodersClient
-from .rerankers.client import AsyncRerankersClient
-from .jobs.client import AsyncJobsClient
-from .users.client import AsyncUsersClient
-from .api_keys.client import AsyncApiKeysClient
-from .app_clients.client import AsyncAppClientsClient
-from .query_history.client import AsyncQueryHistoryClient
-from .auth.client import AsyncAuthClient
-
-# this is used as the default value for optional parameters
-OMIT = typing.cast(typing.Any, ...)
+from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from .core.logging import LogConfig, Logger
+from .core.oauth_token_provider import AsyncOAuthTokenProvider, OAuthTokenProvider
+from .environment import VectaraEnvironment
+
+if typing.TYPE_CHECKING:
+    from .agent_artifacts.client import AgentArtifactsClient, AsyncAgentArtifactsClient
+    from .agent_events.client import AgentEventsClient, AsyncAgentEventsClient
+    from .agent_schedules.client import AgentSchedulesClient, AsyncAgentSchedulesClient
+    from .agent_sessions.client import AgentSessionsClient, AsyncAgentSessionsClient
+    from .agents.client import AgentsClient, AsyncAgentsClient
+    from .api_keys.client import ApiKeysClient, AsyncApiKeysClient
+    from .app_clients.client import AppClientsClient, AsyncAppClientsClient
+    from .auth.client import AsyncAuthClient, AuthClient
+    from .chats.client import AsyncChatsClient, ChatsClient
+    from .corpora.client import AsyncCorporaClient, CorporaClient
+    from .documents.client import AsyncDocumentsClient, DocumentsClient
+    from .encoders.client import AsyncEncodersClient, EncodersClient
+    from .factual_consistency.client import AsyncFactualConsistencyClient, FactualConsistencyClient
+    from .generation_presets.client import AsyncGenerationPresetsClient, GenerationPresetsClient
+    from .hallucination_correctors.client import AsyncHallucinationCorrectorsClient, HallucinationCorrectorsClient
+    from .instructions.client import AsyncInstructionsClient, InstructionsClient
+    from .jobs.client import AsyncJobsClient, JobsClient
+    from .llm.client import AsyncLlmClient, LlmClient
+    from .llms.client import AsyncLlmsClient, LlmsClient
+    from .metadata.client import AsyncMetadataClient, MetadataClient
+    from .queries.client import AsyncQueriesClient, QueriesClient
+    from .query_history.client import AsyncQueryHistoryClient, QueryHistoryClient
+    from .rerankers.client import AsyncRerankersClient, RerankersClient
+    from .table_extractors.client import AsyncTableExtractorsClient, TableExtractorsClient
+    from .tool_servers.client import AsyncToolServersClient, ToolServersClient
+    from .tools.client import AsyncToolsClient, ToolsClient
+    from .upload.client import AsyncUploadClient, UploadClient
+    from .users.client import AsyncUsersClient, UsersClient
 
 
 class BaseVectara:
@@ -66,19 +49,27 @@ class BaseVectara:
 
     Parameters
     ----------
-    environment : VectaraEnvironment
-        The environment to use for requests from the client. from .environment import VectaraEnvironment
 
+    client_id : str
+        The client identifier used for authentication.
+
+    client_secret : str
+        The client secret used for authentication.
+
+    timeout : typing.Optional[float]
+        The timeout to be used, in seconds, for requests. By default the timeout is 60 seconds, unless a custom httpx client is used, in which case this default is not enforced.
 
+    follow_redirects : typing.Optional[bool]
+        Whether the default httpx client follows redirects or not, this is irrelevant if a custom httpx client is passed in.
 
-        Defaults to VectaraEnvironment.PRODUCTION
+    httpx_client : typing.Optional[httpx.Client]
+        The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
 
+    # or ...
 
+    token : typing.Callable[[], str]
+        Authenticate by providing a callable that returns a pre-generated bearer token. In this mode, OAuth client credentials are not required.
 
-    api_key : typing.Optional[str]
-    client_id : typing.Optional[str]
-    client_secret : typing.Optional[str]
-    _token_getter_override : typing.Optional[typing.Callable[[], str]]
     timeout : typing.Optional[float]
         The timeout to be used, in seconds, for requests. By default the timeout is 60 seconds, unless a custom httpx client is used, in which case this default is not enforced.
 
@@ -92,53 +83,96 @@ class BaseVectara:
     --------
     from vectara import Vectara
 
+    client = Vectara()
+
+    # or ...
+
+    from vectara import Vectara
+
     client = Vectara(
-        api_key="YOUR_API_KEY",
-        client_id="YOUR_CLIENT_ID",
-        client_secret="YOUR_CLIENT_SECRET",
+        base_url="https://yourhost.com/path/to/api",
+        token="YOUR_BEARER_TOKEN",
     )
     """
 
+    @typing.overload
     def __init__(
         self,
         *,
         environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION,
         api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"),
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+        timeout: typing.Optional[float] = None,
+        follow_redirects: typing.Optional[bool] = True,
+        httpx_client: typing.Optional[httpx.Client] = None,
+        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,
         client_id: typing.Optional[str] = os.getenv("VECTARA_CLIENT_ID"),
         client_secret: typing.Optional[str] = os.getenv("VECTARA_CLIENT_SECRET"),
+    ): ...
+    @typing.overload
+    def __init__(
+        self,
+        *,
+        environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION,
+        api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"),
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+        timeout: typing.Optional[float] = None,
+        follow_redirects: typing.Optional[bool] = True,
+        httpx_client: typing.Optional[httpx.Client] = None,
+        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,
+        token: typing.Callable[[], str],
+    ): ...
+    def __init__(
+        self,
+        *,
+        environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION,
+        api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"),
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+        client_id: typing.Optional[str] = os.getenv("VECTARA_CLIENT_ID"),
+        client_secret: typing.Optional[str] = os.getenv("VECTARA_CLIENT_SECRET"),
+        token: typing.Optional[typing.Callable[[], str]] = None,
         _token_getter_override: typing.Optional[typing.Callable[[], str]] = None,
         timeout: typing.Optional[float] = None,
         follow_redirects: typing.Optional[bool] = True,
         httpx_client: typing.Optional[httpx.Client] = None,
+        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,
     ):
-        _defaulted_timeout = timeout if timeout is not None else 60 if httpx_client is None else None
-        if api_key is not None:
+        _defaulted_timeout = (
+            timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read
+        )
+        if token is not None:
             self._client_wrapper = SyncClientWrapper(
                 environment=environment,
                 api_key=api_key,
+                headers=headers,
                 httpx_client=httpx_client
                 if httpx_client is not None
                 else httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
                 if follow_redirects is not None
                 else httpx.Client(timeout=_defaulted_timeout),
                 timeout=_defaulted_timeout,
-            )            
-        elif client_id is not None and client_secret is not None: 
+                logging=logging,
+                token=_token_getter_override if _token_getter_override is not None else token,
+            )
+        elif client_id is not None and client_secret is not None:
             oauth_token_provider = OAuthTokenProvider(
                 client_id=client_id,
                 client_secret=client_secret,
                 client_wrapper=SyncClientWrapper(
                     environment=environment,
                     api_key=api_key,
+                    headers=headers,
                     httpx_client=httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
                     if follow_redirects is not None
                     else httpx.Client(timeout=_defaulted_timeout),
                     timeout=_defaulted_timeout,
+                    logging=logging,
                 ),
             )
             self._client_wrapper = SyncClientWrapper(
                 environment=environment,
                 api_key=api_key,
+                headers=headers,
                 token=_token_getter_override if _token_getter_override is not None else oauth_token_provider.get_token,
                 httpx_client=httpx_client
                 if httpx_client is not None
@@ -146,583 +180,277 @@ def __init__(
                 if follow_redirects is not None
                 else httpx.Client(timeout=_defaulted_timeout),
                 timeout=_defaulted_timeout,
+                logging=logging,
+            )
+        elif api_key is not None:
+            self._client_wrapper = SyncClientWrapper(
+                environment=environment,
+                api_key=api_key,
+                headers=headers,
+                httpx_client=httpx_client
+                if httpx_client is not None
+                else httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
+                if follow_redirects is not None
+                else httpx.Client(timeout=_defaulted_timeout),
+                timeout=_defaulted_timeout,
+                logging=logging,
             )
-        else: 
+        else:
             raise ApiError(
-                body="The client must be instantiated be either passing in api_key, client_id or client_secret"
-            )  
-        self.corpora = CorporaClient(client_wrapper=self._client_wrapper)
-        self.upload = UploadClient(client_wrapper=self._client_wrapper)
-        self.documents = DocumentsClient(client_wrapper=self._client_wrapper)
-        self.chats = ChatsClient(client_wrapper=self._client_wrapper)
-        self.llms = LlmsClient(client_wrapper=self._client_wrapper)
-        self.generation_presets = GenerationPresetsClient(client_wrapper=self._client_wrapper)
-        self.encoders = EncodersClient(client_wrapper=self._client_wrapper)
-        self.rerankers = RerankersClient(client_wrapper=self._client_wrapper)
-        self.jobs = JobsClient(client_wrapper=self._client_wrapper)
-        self.users = UsersClient(client_wrapper=self._client_wrapper)
-        self.api_keys = ApiKeysClient(client_wrapper=self._client_wrapper)
-        self.app_clients = AppClientsClient(client_wrapper=self._client_wrapper)
-        self.query_history = QueryHistoryClient(client_wrapper=self._client_wrapper)
-        self.auth = AuthClient(client_wrapper=self._client_wrapper)
-
-    def query_stream(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[QueryStreamedResponse]:
-        """
-        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
-
-        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
-        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
-
-        For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
-
-        Parameters
-        ----------
-        query : str
-            The search query string, which is the question the user is asking.
-
-        search : SearchCorporaParameters
-
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
-
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
-
-        generation : typing.Optional[GenerationParameters]
-
-        save_history : typing.Optional[bool]
-            Indicates whether to save the query in the query history.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Yields
-        ------
-        typing.Iterator[QueryStreamedResponse]
-
-
-        Examples
-        --------
-        from vectara import (
-            CitationParameters,
-            ContextConfiguration,
-            GenerationParameters,
-            KeyedSearchCorpus,
-            SearchCorporaParameters,
-            Vectara,
-        )
+                body="The client must be instantiated with either 'api_key', 'token', or both 'client_id' and 'client_secret'"
+            )
+        self._corpora: typing.Optional[CorporaClient] = None
+        self._upload: typing.Optional[UploadClient] = None
+        self._documents: typing.Optional[DocumentsClient] = None
+        self._metadata: typing.Optional[MetadataClient] = None
+        self._queries: typing.Optional[QueriesClient] = None
+        self._query_history: typing.Optional[QueryHistoryClient] = None
+        self._chats: typing.Optional[ChatsClient] = None
+        self._llms: typing.Optional[LlmsClient] = None
+        self._llm: typing.Optional[LlmClient] = None
+        self._generation_presets: typing.Optional[GenerationPresetsClient] = None
+        self._factual_consistency: typing.Optional[FactualConsistencyClient] = None
+        self._encoders: typing.Optional[EncodersClient] = None
+        self._rerankers: typing.Optional[RerankersClient] = None
+        self._table_extractors: typing.Optional[TableExtractorsClient] = None
+        self._hallucination_correctors: typing.Optional[HallucinationCorrectorsClient] = None
+        self._jobs: typing.Optional[JobsClient] = None
+        self._users: typing.Optional[UsersClient] = None
+        self._api_keys: typing.Optional[ApiKeysClient] = None
+        self._app_clients: typing.Optional[AppClientsClient] = None
+        self._auth: typing.Optional[AuthClient] = None
+        self._tool_servers: typing.Optional[ToolServersClient] = None
+        self._tools: typing.Optional[ToolsClient] = None
+        self._instructions: typing.Optional[InstructionsClient] = None
+        self._agents: typing.Optional[AgentsClient] = None
+        self._agent_sessions: typing.Optional[AgentSessionsClient] = None
+        self._agent_events: typing.Optional[AgentEventsClient] = None
+        self._agent_artifacts: typing.Optional[AgentArtifactsClient] = None
+        self._agent_schedules: typing.Optional[AgentSchedulesClient] = None
+
+    @property
+    def corpora(self):
+        if self._corpora is None:
+            from .corpora.client import CorporaClient  # noqa: E402
+
+            self._corpora = CorporaClient(client_wrapper=self._client_wrapper)
+        return self._corpora
+
+    @property
+    def upload(self):
+        if self._upload is None:
+            from .upload.client import UploadClient  # noqa: E402
+
+            self._upload = UploadClient(client_wrapper=self._client_wrapper)
+        return self._upload
+
+    @property
+    def documents(self):
+        if self._documents is None:
+            from .documents.client import DocumentsClient  # noqa: E402
+
+            self._documents = DocumentsClient(client_wrapper=self._client_wrapper)
+        return self._documents
+
+    @property
+    def metadata(self):
+        if self._metadata is None:
+            from .metadata.client import MetadataClient  # noqa: E402
+
+            self._metadata = MetadataClient(client_wrapper=self._client_wrapper)
+        return self._metadata
+
+    @property
+    def queries(self):
+        if self._queries is None:
+            from .queries.client import QueriesClient  # noqa: E402
+
+            self._queries = QueriesClient(client_wrapper=self._client_wrapper)
+        return self._queries
+
+    @property
+    def query_history(self):
+        if self._query_history is None:
+            from .query_history.client import QueryHistoryClient  # noqa: E402
+
+            self._query_history = QueryHistoryClient(client_wrapper=self._client_wrapper)
+        return self._query_history
+
+    @property
+    def chats(self):
+        if self._chats is None:
+            from .chats.client import ChatsClient  # noqa: E402
+
+            self._chats = ChatsClient(client_wrapper=self._client_wrapper)
+        return self._chats
+
+    @property
+    def llms(self):
+        if self._llms is None:
+            from .llms.client import LlmsClient  # noqa: E402
+
+            self._llms = LlmsClient(client_wrapper=self._client_wrapper)
+        return self._llms
+
+    @property
+    def llm(self):
+        if self._llm is None:
+            from .llm.client import LlmClient  # noqa: E402
+
+            self._llm = LlmClient(client_wrapper=self._client_wrapper)
+        return self._llm
+
+    @property
+    def generation_presets(self):
+        if self._generation_presets is None:
+            from .generation_presets.client import GenerationPresetsClient  # noqa: E402
+
+            self._generation_presets = GenerationPresetsClient(client_wrapper=self._client_wrapper)
+        return self._generation_presets
+
+    @property
+    def factual_consistency(self):
+        if self._factual_consistency is None:
+            from .factual_consistency.client import FactualConsistencyClient  # noqa: E402
+
+            self._factual_consistency = FactualConsistencyClient(client_wrapper=self._client_wrapper)
+        return self._factual_consistency
+
+    @property
+    def encoders(self):
+        if self._encoders is None:
+            from .encoders.client import EncodersClient  # noqa: E402
+
+            self._encoders = EncodersClient(client_wrapper=self._client_wrapper)
+        return self._encoders
+
+    @property
+    def rerankers(self):
+        if self._rerankers is None:
+            from .rerankers.client import RerankersClient  # noqa: E402
 
-        client = Vectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
-        response = client.query_stream(
-            query="hello, world?",
-            search=SearchCorporaParameters(
-                corpora=[
-                    KeyedSearchCorpus(
-                        lexical_interpolation=0.005,
-                    )
-                ],
-                offset=0,
-                limit=10,
-                context_configuration=ContextConfiguration(
-                    sentences_before=2,
-                    sentences_after=2,
-                    start_tag="<em>",
-                    end_tag="</em>",
-                ),
-            ),
-            generation=GenerationParameters(
-                max_used_search_results=5,
-                citations=CitationParameters(
-                    style="none",
-                ),
-                response_language="auto",
-            ),
-        )
-        for chunk in response:
-            yield chunk
-        """
-        with self._client_wrapper.httpx_client.stream(
-            "v2/query",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": True,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    _event_source = httpx_sse.EventSource(_response)
-                    for _sse in _event_source.iter_sse():
-                        try:
-                            yield typing.cast(
-                                QueryStreamedResponse,
-                                parse_obj_as(
-                                    type_=QueryStreamedResponse,  # type: ignore
-                                    object_=json.loads(_sse.data),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                _response.read()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            BadRequestErrorBody,
-                            parse_obj_as(
-                                type_=BadRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            Error,
-                            parse_obj_as(
-                                type_=Error,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            NotFoundErrorBody,
-                            parse_obj_as(
-                                type_=NotFoundErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def query(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> QueryFullResponse:
-        """
-        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
+            self._rerankers = RerankersClient(client_wrapper=self._client_wrapper)
+        return self._rerankers
 
-        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
-        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
+    @property
+    def table_extractors(self):
+        if self._table_extractors is None:
+            from .table_extractors.client import TableExtractorsClient  # noqa: E402
 
-        For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
+            self._table_extractors = TableExtractorsClient(client_wrapper=self._client_wrapper)
+        return self._table_extractors
 
-        Parameters
-        ----------
-        query : str
-            The search query string, which is the question the user is asking.
+    @property
+    def hallucination_correctors(self):
+        if self._hallucination_correctors is None:
+            from .hallucination_correctors.client import HallucinationCorrectorsClient  # noqa: E402
 
-        search : SearchCorporaParameters
+            self._hallucination_correctors = HallucinationCorrectorsClient(client_wrapper=self._client_wrapper)
+        return self._hallucination_correctors
 
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
+    @property
+    def jobs(self):
+        if self._jobs is None:
+            from .jobs.client import JobsClient  # noqa: E402
 
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
+            self._jobs = JobsClient(client_wrapper=self._client_wrapper)
+        return self._jobs
 
-        generation : typing.Optional[GenerationParameters]
+    @property
+    def users(self):
+        if self._users is None:
+            from .users.client import UsersClient  # noqa: E402
 
-        save_history : typing.Optional[bool]
-            Indicates whether to save the query in the query history.
+            self._users = UsersClient(client_wrapper=self._client_wrapper)
+        return self._users
 
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
+    @property
+    def api_keys(self):
+        if self._api_keys is None:
+            from .api_keys.client import ApiKeysClient  # noqa: E402
 
-        Returns
-        -------
-        QueryFullResponse
+            self._api_keys = ApiKeysClient(client_wrapper=self._client_wrapper)
+        return self._api_keys
 
+    @property
+    def app_clients(self):
+        if self._app_clients is None:
+            from .app_clients.client import AppClientsClient  # noqa: E402
 
-        Examples
-        --------
-        from vectara import SearchCorporaParameters, Vectara
+            self._app_clients = AppClientsClient(client_wrapper=self._client_wrapper)
+        return self._app_clients
 
-        client = Vectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
-        client.query(
-            query="Am I allowed to bring pets to work?",
-            search=SearchCorporaParameters(),
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v2/query",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": False,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    QueryFullResponse,
-                    parse_obj_as(
-                        type_=QueryFullResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        BadRequestErrorBody,
-                        parse_obj_as(
-                            type_=BadRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        Error,
-                        parse_obj_as(
-                            type_=Error,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        NotFoundErrorBody,
-                        parse_obj_as(
-                            type_=NotFoundErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def chat_stream(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        chat: typing.Optional[ChatParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[ChatStreamedResponse]:
-        """
-        Create a chat while specifying the default retrieval parameters used by the prompt.
+    @property
+    def auth(self):
+        if self._auth is None:
+            from .auth.client import AuthClient  # noqa: E402
 
-        Parameters
-        ----------
-        query : str
-            The chat message or question.
+            self._auth = AuthClient(client_wrapper=self._client_wrapper)
+        return self._auth
 
-        search : SearchCorporaParameters
+    @property
+    def tool_servers(self):
+        if self._tool_servers is None:
+            from .tool_servers.client import ToolServersClient  # noqa: E402
 
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
+            self._tool_servers = ToolServersClient(client_wrapper=self._client_wrapper)
+        return self._tool_servers
 
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
+    @property
+    def tools(self):
+        if self._tools is None:
+            from .tools.client import ToolsClient  # noqa: E402
 
-        generation : typing.Optional[GenerationParameters]
+            self._tools = ToolsClient(client_wrapper=self._client_wrapper)
+        return self._tools
 
-        chat : typing.Optional[ChatParameters]
+    @property
+    def instructions(self):
+        if self._instructions is None:
+            from .instructions.client import InstructionsClient  # noqa: E402
 
-        save_history : typing.Optional[bool]
-            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
+            self._instructions = InstructionsClient(client_wrapper=self._client_wrapper)
+        return self._instructions
 
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
+    @property
+    def agents(self):
+        if self._agents is None:
+            from .agents.client import AgentsClient  # noqa: E402
 
-        Yields
-        ------
-        typing.Iterator[ChatStreamedResponse]
+            self._agents = AgentsClient(client_wrapper=self._client_wrapper)
+        return self._agents
 
+    @property
+    def agent_sessions(self):
+        if self._agent_sessions is None:
+            from .agent_sessions.client import AgentSessionsClient  # noqa: E402
 
-        Examples
-        --------
-        from vectara import SearchCorporaParameters, Vectara
+            self._agent_sessions = AgentSessionsClient(client_wrapper=self._client_wrapper)
+        return self._agent_sessions
 
-        client = Vectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
-        response = client.chat_stream(
-            query="How can I use the Vectara platform?",
-            search=SearchCorporaParameters(),
-        )
-        for chunk in response:
-            yield chunk
-        """
-        with self._client_wrapper.httpx_client.stream(
-            "v2/chats",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "chat": convert_and_respect_annotation_metadata(
-                    object_=chat, annotation=ChatParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": True,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    _event_source = httpx_sse.EventSource(_response)
-                    for _sse in _event_source.iter_sse():
-                        try:
-                            yield typing.cast(
-                                ChatStreamedResponse,
-                                parse_obj_as(
-                                    type_=ChatStreamedResponse,  # type: ignore
-                                    object_=json.loads(_sse.data),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                _response.read()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            BadRequestErrorBody,
-                            parse_obj_as(
-                                type_=BadRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            Error,
-                            parse_obj_as(
-                                type_=Error,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            NotFoundErrorBody,
-                            parse_obj_as(
-                                type_=NotFoundErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    def chat(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        chat: typing.Optional[ChatParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ChatFullResponse:
-        """
-        Create a chat while specifying the default retrieval parameters used by the prompt.
-
-        Parameters
-        ----------
-        query : str
-            The chat message or question.
-
-        search : SearchCorporaParameters
+    @property
+    def agent_events(self):
+        if self._agent_events is None:
+            from .agent_events.client import AgentEventsClient  # noqa: E402
 
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
+            self._agent_events = AgentEventsClient(client_wrapper=self._client_wrapper)
+        return self._agent_events
 
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
+    @property
+    def agent_artifacts(self):
+        if self._agent_artifacts is None:
+            from .agent_artifacts.client import AgentArtifactsClient  # noqa: E402
 
-        generation : typing.Optional[GenerationParameters]
+            self._agent_artifacts = AgentArtifactsClient(client_wrapper=self._client_wrapper)
+        return self._agent_artifacts
 
-        chat : typing.Optional[ChatParameters]
+    @property
+    def agent_schedules(self):
+        if self._agent_schedules is None:
+            from .agent_schedules.client import AgentSchedulesClient  # noqa: E402
 
-        save_history : typing.Optional[bool]
-            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        ChatFullResponse
-
-
-        Examples
-        --------
-        from vectara import SearchCorporaParameters, Vectara
-
-        client = Vectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
-        client.chat(
-            query="How can I use the Vectara platform?",
-            search=SearchCorporaParameters(),
-        )
-        """
-        _response = self._client_wrapper.httpx_client.request(
-            "v2/chats",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "chat": convert_and_respect_annotation_metadata(
-                    object_=chat, annotation=ChatParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": False,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ChatFullResponse,
-                    parse_obj_as(
-                        type_=ChatFullResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        BadRequestErrorBody,
-                        parse_obj_as(
-                            type_=BadRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        Error,
-                        parse_obj_as(
-                            type_=Error,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        NotFoundErrorBody,
-                        parse_obj_as(
-                            type_=NotFoundErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            self._agent_schedules = AgentSchedulesClient(client_wrapper=self._client_wrapper)
+        return self._agent_schedules
 
 
 class AsyncBaseVectara:
@@ -731,19 +459,27 @@ class AsyncBaseVectara:
 
     Parameters
     ----------
-    environment : VectaraEnvironment
-        The environment to use for requests from the client. from .environment import VectaraEnvironment
 
+    client_id : str
+        The client identifier used for authentication.
 
+    client_secret : str
+        The client secret used for authentication.
 
-        Defaults to VectaraEnvironment.PRODUCTION
+    timeout : typing.Optional[float]
+        The timeout to be used, in seconds, for requests. By default the timeout is 60 seconds, unless a custom httpx client is used, in which case this default is not enforced.
 
+    follow_redirects : typing.Optional[bool]
+        Whether the default httpx client follows redirects or not, this is irrelevant if a custom httpx client is passed in.
 
+    httpx_client : typing.Optional[httpx.AsyncClient]
+        The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
+
+    # or ...
+
+    token : typing.Callable[[], str]
+        Authenticate by providing a callable that returns a pre-generated bearer token. In this mode, OAuth client credentials are not required.
 
-    api_key : typing.Optional[str]
-    client_id : typing.Optional[str]
-    client_secret : typing.Optional[str]
-    _token_getter_override : typing.Optional[typing.Callable[[], str]]
     timeout : typing.Optional[float]
         The timeout to be used, in seconds, for requests. By default the timeout is 60 seconds, unless a custom httpx client is used, in which case this default is not enforced.
 
@@ -757,666 +493,372 @@ class AsyncBaseVectara:
     --------
     from vectara import AsyncVectara
 
+    client = AsyncVectara()
+
+    # or ...
+
+    from vectara import AsyncVectara
+
     client = AsyncVectara(
-        api_key="YOUR_API_KEY",
-        client_id="YOUR_CLIENT_ID",
-        client_secret="YOUR_CLIENT_SECRET",
+        base_url="https://yourhost.com/path/to/api",
+        token="YOUR_BEARER_TOKEN",
     )
     """
 
+    @typing.overload
+    def __init__(
+        self,
+        *,
+        environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION,
+        api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"),
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+        timeout: typing.Optional[float] = None,
+        follow_redirects: typing.Optional[bool] = True,
+        httpx_client: typing.Optional[httpx.AsyncClient] = None,
+        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,
+        client_id: typing.Optional[str] = os.getenv("VECTARA_CLIENT_ID"),
+        client_secret: typing.Optional[str] = os.getenv("VECTARA_CLIENT_SECRET"),
+    ): ...
+    @typing.overload
+    def __init__(
+        self,
+        *,
+        environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION,
+        api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"),
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+        timeout: typing.Optional[float] = None,
+        follow_redirects: typing.Optional[bool] = True,
+        httpx_client: typing.Optional[httpx.AsyncClient] = None,
+        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,
+        token: typing.Callable[[], str],
+    ): ...
     def __init__(
         self,
         *,
         environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION,
         api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"),
+        headers: typing.Optional[typing.Dict[str, str]] = None,
         client_id: typing.Optional[str] = os.getenv("VECTARA_CLIENT_ID"),
         client_secret: typing.Optional[str] = os.getenv("VECTARA_CLIENT_SECRET"),
+        token: typing.Optional[typing.Callable[[], str]] = None,
         _token_getter_override: typing.Optional[typing.Callable[[], str]] = None,
         timeout: typing.Optional[float] = None,
         follow_redirects: typing.Optional[bool] = True,
         httpx_client: typing.Optional[httpx.AsyncClient] = None,
+        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,
     ):
-        _defaulted_timeout = timeout if timeout is not None else 60 if httpx_client is None else None
-        if api_key is not None:
+        _defaulted_timeout = (
+            timeout if timeout is not None else 60 if httpx_client is None else httpx_client.timeout.read
+        )
+        if token is not None:
             self._client_wrapper = AsyncClientWrapper(
                 environment=environment,
                 api_key=api_key,
+                headers=headers,
                 httpx_client=httpx_client
                 if httpx_client is not None
                 else httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
                 if follow_redirects is not None
                 else httpx.AsyncClient(timeout=_defaulted_timeout),
                 timeout=_defaulted_timeout,
-            )            
-        elif client_id is not None and client_secret is not None: 
-            oauth_token_provider = OAuthTokenProvider(
+                logging=logging,
+                token=_token_getter_override if _token_getter_override is not None else token,
+            )
+        elif client_id is not None and client_secret is not None:
+            oauth_token_provider = AsyncOAuthTokenProvider(
                 client_id=client_id,
                 client_secret=client_secret,
-                client_wrapper=SyncClientWrapper(
+                client_wrapper=AsyncClientWrapper(
                     environment=environment,
                     api_key=api_key,
-                    httpx_client=httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
+                    headers=headers,
+                    httpx_client=httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
                     if follow_redirects is not None
-                    else httpx.Client(timeout=_defaulted_timeout),
+                    else httpx.AsyncClient(timeout=_defaulted_timeout),
                     timeout=_defaulted_timeout,
+                    logging=logging,
                 ),
             )
             self._client_wrapper = AsyncClientWrapper(
                 environment=environment,
                 api_key=api_key,
-                token=_token_getter_override if _token_getter_override is not None else oauth_token_provider.get_token,
+                headers=headers,
+                token=_token_getter_override,
+                async_token=oauth_token_provider.get_token,
                 httpx_client=httpx_client
                 if httpx_client is not None
                 else httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
                 if follow_redirects is not None
                 else httpx.AsyncClient(timeout=_defaulted_timeout),
                 timeout=_defaulted_timeout,
+                logging=logging,
             )
-        else: 
-            raise ApiError(
-                body="The client must be instantiated be either passing in api_key, client_id or client_secret"
-            )  
-        self.corpora = AsyncCorporaClient(client_wrapper=self._client_wrapper)
-        self.upload = AsyncUploadClient(client_wrapper=self._client_wrapper)
-        self.documents = AsyncDocumentsClient(client_wrapper=self._client_wrapper)
-        self.chats = AsyncChatsClient(client_wrapper=self._client_wrapper)
-        self.llms = AsyncLlmsClient(client_wrapper=self._client_wrapper)
-        self.generation_presets = AsyncGenerationPresetsClient(client_wrapper=self._client_wrapper)
-        self.encoders = AsyncEncodersClient(client_wrapper=self._client_wrapper)
-        self.rerankers = AsyncRerankersClient(client_wrapper=self._client_wrapper)
-        self.jobs = AsyncJobsClient(client_wrapper=self._client_wrapper)
-        self.users = AsyncUsersClient(client_wrapper=self._client_wrapper)
-        self.api_keys = AsyncApiKeysClient(client_wrapper=self._client_wrapper)
-        self.app_clients = AsyncAppClientsClient(client_wrapper=self._client_wrapper)
-        self.query_history = AsyncQueryHistoryClient(client_wrapper=self._client_wrapper)
-        self.auth = AsyncAuthClient(client_wrapper=self._client_wrapper)
-
-    async def query_stream(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[QueryStreamedResponse]:
-        """
-        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
-
-        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
-        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
-
-        For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
-
-        Parameters
-        ----------
-        query : str
-            The search query string, which is the question the user is asking.
-
-        search : SearchCorporaParameters
-
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
-
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
-
-        generation : typing.Optional[GenerationParameters]
-
-        save_history : typing.Optional[bool]
-            Indicates whether to save the query in the query history.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Yields
-        ------
-        typing.AsyncIterator[QueryStreamedResponse]
-
-
-        Examples
-        --------
-        import asyncio
-
-        from vectara import (
-            AsyncVectara,
-            CitationParameters,
-            ContextConfiguration,
-            GenerationParameters,
-            KeyedSearchCorpus,
-            SearchCorporaParameters,
-        )
-
-        client = AsyncVectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
-
-
-        async def main() -> None:
-            response = await client.query_stream(
-                query="hello, world?",
-                search=SearchCorporaParameters(
-                    corpora=[
-                        KeyedSearchCorpus(
-                            lexical_interpolation=0.005,
-                        )
-                    ],
-                    offset=0,
-                    limit=10,
-                    context_configuration=ContextConfiguration(
-                        sentences_before=2,
-                        sentences_after=2,
-                        start_tag="<em>",
-                        end_tag="</em>",
-                    ),
-                ),
-                generation=GenerationParameters(
-                    max_used_search_results=5,
-                    citations=CitationParameters(
-                        style="none",
-                    ),
-                    response_language="auto",
-                ),
+        elif api_key is not None:
+            self._client_wrapper = AsyncClientWrapper(
+                environment=environment,
+                api_key=api_key,
+                headers=headers,
+                httpx_client=httpx_client
+                if httpx_client is not None
+                else httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects)
+                if follow_redirects is not None
+                else httpx.AsyncClient(timeout=_defaulted_timeout),
+                timeout=_defaulted_timeout,
+                logging=logging,
             )
-            async for chunk in response:
-                yield chunk
-
-
-        asyncio.run(main())
-        """
-        async with self._client_wrapper.httpx_client.stream(
-            "v2/query",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": True,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    _event_source = httpx_sse.EventSource(_response)
-                    async for _sse in _event_source.aiter_sse():
-                        try:
-                            yield typing.cast(
-                                QueryStreamedResponse,
-                                parse_obj_as(
-                                    type_=QueryStreamedResponse,  # type: ignore
-                                    object_=json.loads(_sse.data),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                await _response.aread()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            BadRequestErrorBody,
-                            parse_obj_as(
-                                type_=BadRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            Error,
-                            parse_obj_as(
-                                type_=Error,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            NotFoundErrorBody,
-                            parse_obj_as(
-                                type_=NotFoundErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def query(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> QueryFullResponse:
-        """
-        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
-
-        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
-        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
-
-        For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
-
-        Parameters
-        ----------
-        query : str
-            The search query string, which is the question the user is asking.
-
-        search : SearchCorporaParameters
-
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
-
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
-
-        generation : typing.Optional[GenerationParameters]
-
-        save_history : typing.Optional[bool]
-            Indicates whether to save the query in the query history.
-
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
-
-        Returns
-        -------
-        QueryFullResponse
-
-
-        Examples
-        --------
-        import asyncio
-
-        from vectara import AsyncVectara, SearchCorporaParameters
-
-        client = AsyncVectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
-
-
-        async def main() -> None:
-            await client.query(
-                query="Am I allowed to bring pets to work?",
-                search=SearchCorporaParameters(),
+        else:
+            raise ApiError(
+                body="The client must be instantiated with either 'api_key', 'token', or both 'client_id' and 'client_secret'"
             )
+        self._corpora: typing.Optional[AsyncCorporaClient] = None
+        self._upload: typing.Optional[AsyncUploadClient] = None
+        self._documents: typing.Optional[AsyncDocumentsClient] = None
+        self._metadata: typing.Optional[AsyncMetadataClient] = None
+        self._queries: typing.Optional[AsyncQueriesClient] = None
+        self._query_history: typing.Optional[AsyncQueryHistoryClient] = None
+        self._chats: typing.Optional[AsyncChatsClient] = None
+        self._llms: typing.Optional[AsyncLlmsClient] = None
+        self._llm: typing.Optional[AsyncLlmClient] = None
+        self._generation_presets: typing.Optional[AsyncGenerationPresetsClient] = None
+        self._factual_consistency: typing.Optional[AsyncFactualConsistencyClient] = None
+        self._encoders: typing.Optional[AsyncEncodersClient] = None
+        self._rerankers: typing.Optional[AsyncRerankersClient] = None
+        self._table_extractors: typing.Optional[AsyncTableExtractorsClient] = None
+        self._hallucination_correctors: typing.Optional[AsyncHallucinationCorrectorsClient] = None
+        self._jobs: typing.Optional[AsyncJobsClient] = None
+        self._users: typing.Optional[AsyncUsersClient] = None
+        self._api_keys: typing.Optional[AsyncApiKeysClient] = None
+        self._app_clients: typing.Optional[AsyncAppClientsClient] = None
+        self._auth: typing.Optional[AsyncAuthClient] = None
+        self._tool_servers: typing.Optional[AsyncToolServersClient] = None
+        self._tools: typing.Optional[AsyncToolsClient] = None
+        self._instructions: typing.Optional[AsyncInstructionsClient] = None
+        self._agents: typing.Optional[AsyncAgentsClient] = None
+        self._agent_sessions: typing.Optional[AsyncAgentSessionsClient] = None
+        self._agent_events: typing.Optional[AsyncAgentEventsClient] = None
+        self._agent_artifacts: typing.Optional[AsyncAgentArtifactsClient] = None
+        self._agent_schedules: typing.Optional[AsyncAgentSchedulesClient] = None
+
+    @property
+    def corpora(self):
+        if self._corpora is None:
+            from .corpora.client import AsyncCorporaClient  # noqa: E402
+
+            self._corpora = AsyncCorporaClient(client_wrapper=self._client_wrapper)
+        return self._corpora
+
+    @property
+    def upload(self):
+        if self._upload is None:
+            from .upload.client import AsyncUploadClient  # noqa: E402
+
+            self._upload = AsyncUploadClient(client_wrapper=self._client_wrapper)
+        return self._upload
+
+    @property
+    def documents(self):
+        if self._documents is None:
+            from .documents.client import AsyncDocumentsClient  # noqa: E402
+
+            self._documents = AsyncDocumentsClient(client_wrapper=self._client_wrapper)
+        return self._documents
+
+    @property
+    def metadata(self):
+        if self._metadata is None:
+            from .metadata.client import AsyncMetadataClient  # noqa: E402
+
+            self._metadata = AsyncMetadataClient(client_wrapper=self._client_wrapper)
+        return self._metadata
+
+    @property
+    def queries(self):
+        if self._queries is None:
+            from .queries.client import AsyncQueriesClient  # noqa: E402
+
+            self._queries = AsyncQueriesClient(client_wrapper=self._client_wrapper)
+        return self._queries
+
+    @property
+    def query_history(self):
+        if self._query_history is None:
+            from .query_history.client import AsyncQueryHistoryClient  # noqa: E402
+
+            self._query_history = AsyncQueryHistoryClient(client_wrapper=self._client_wrapper)
+        return self._query_history
+
+    @property
+    def chats(self):
+        if self._chats is None:
+            from .chats.client import AsyncChatsClient  # noqa: E402
+
+            self._chats = AsyncChatsClient(client_wrapper=self._client_wrapper)
+        return self._chats
+
+    @property
+    def llms(self):
+        if self._llms is None:
+            from .llms.client import AsyncLlmsClient  # noqa: E402
+
+            self._llms = AsyncLlmsClient(client_wrapper=self._client_wrapper)
+        return self._llms
+
+    @property
+    def llm(self):
+        if self._llm is None:
+            from .llm.client import AsyncLlmClient  # noqa: E402
+
+            self._llm = AsyncLlmClient(client_wrapper=self._client_wrapper)
+        return self._llm
+
+    @property
+    def generation_presets(self):
+        if self._generation_presets is None:
+            from .generation_presets.client import AsyncGenerationPresetsClient  # noqa: E402
+
+            self._generation_presets = AsyncGenerationPresetsClient(client_wrapper=self._client_wrapper)
+        return self._generation_presets
+
+    @property
+    def factual_consistency(self):
+        if self._factual_consistency is None:
+            from .factual_consistency.client import AsyncFactualConsistencyClient  # noqa: E402
+
+            self._factual_consistency = AsyncFactualConsistencyClient(client_wrapper=self._client_wrapper)
+        return self._factual_consistency
+
+    @property
+    def encoders(self):
+        if self._encoders is None:
+            from .encoders.client import AsyncEncodersClient  # noqa: E402
+
+            self._encoders = AsyncEncodersClient(client_wrapper=self._client_wrapper)
+        return self._encoders
+
+    @property
+    def rerankers(self):
+        if self._rerankers is None:
+            from .rerankers.client import AsyncRerankersClient  # noqa: E402
 
+            self._rerankers = AsyncRerankersClient(client_wrapper=self._client_wrapper)
+        return self._rerankers
 
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v2/query",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": False,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    QueryFullResponse,
-                    parse_obj_as(
-                        type_=QueryFullResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        BadRequestErrorBody,
-                        parse_obj_as(
-                            type_=BadRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        Error,
-                        parse_obj_as(
-                            type_=Error,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        NotFoundErrorBody,
-                        parse_obj_as(
-                            type_=NotFoundErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def chat_stream(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        chat: typing.Optional[ChatParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[ChatStreamedResponse]:
-        """
-        Create a chat while specifying the default retrieval parameters used by the prompt.
+    @property
+    def table_extractors(self):
+        if self._table_extractors is None:
+            from .table_extractors.client import AsyncTableExtractorsClient  # noqa: E402
 
-        Parameters
-        ----------
-        query : str
-            The chat message or question.
+            self._table_extractors = AsyncTableExtractorsClient(client_wrapper=self._client_wrapper)
+        return self._table_extractors
 
-        search : SearchCorporaParameters
+    @property
+    def hallucination_correctors(self):
+        if self._hallucination_correctors is None:
+            from .hallucination_correctors.client import AsyncHallucinationCorrectorsClient  # noqa: E402
 
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
+            self._hallucination_correctors = AsyncHallucinationCorrectorsClient(client_wrapper=self._client_wrapper)
+        return self._hallucination_correctors
 
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
+    @property
+    def jobs(self):
+        if self._jobs is None:
+            from .jobs.client import AsyncJobsClient  # noqa: E402
 
-        generation : typing.Optional[GenerationParameters]
+            self._jobs = AsyncJobsClient(client_wrapper=self._client_wrapper)
+        return self._jobs
 
-        chat : typing.Optional[ChatParameters]
+    @property
+    def users(self):
+        if self._users is None:
+            from .users.client import AsyncUsersClient  # noqa: E402
 
-        save_history : typing.Optional[bool]
-            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
+            self._users = AsyncUsersClient(client_wrapper=self._client_wrapper)
+        return self._users
 
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
+    @property
+    def api_keys(self):
+        if self._api_keys is None:
+            from .api_keys.client import AsyncApiKeysClient  # noqa: E402
 
-        Yields
-        ------
-        typing.AsyncIterator[ChatStreamedResponse]
+            self._api_keys = AsyncApiKeysClient(client_wrapper=self._client_wrapper)
+        return self._api_keys
 
+    @property
+    def app_clients(self):
+        if self._app_clients is None:
+            from .app_clients.client import AsyncAppClientsClient  # noqa: E402
 
-        Examples
-        --------
-        import asyncio
+            self._app_clients = AsyncAppClientsClient(client_wrapper=self._client_wrapper)
+        return self._app_clients
 
-        from vectara import AsyncVectara, SearchCorporaParameters
+    @property
+    def auth(self):
+        if self._auth is None:
+            from .auth.client import AsyncAuthClient  # noqa: E402
 
-        client = AsyncVectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
+            self._auth = AsyncAuthClient(client_wrapper=self._client_wrapper)
+        return self._auth
 
+    @property
+    def tool_servers(self):
+        if self._tool_servers is None:
+            from .tool_servers.client import AsyncToolServersClient  # noqa: E402
 
-        async def main() -> None:
-            response = await client.chat_stream(
-                query="How can I use the Vectara platform?",
-                search=SearchCorporaParameters(),
-            )
-            async for chunk in response:
-                yield chunk
-
-
-        asyncio.run(main())
-        """
-        async with self._client_wrapper.httpx_client.stream(
-            "v2/chats",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "chat": convert_and_respect_annotation_metadata(
-                    object_=chat, annotation=ChatParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": True,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        ) as _response:
-            try:
-                if 200 <= _response.status_code < 300:
-                    _event_source = httpx_sse.EventSource(_response)
-                    async for _sse in _event_source.aiter_sse():
-                        try:
-                            yield typing.cast(
-                                ChatStreamedResponse,
-                                parse_obj_as(
-                                    type_=ChatStreamedResponse,  # type: ignore
-                                    object_=json.loads(_sse.data),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
-                await _response.aread()
-                if _response.status_code == 400:
-                    raise BadRequestError(
-                        typing.cast(
-                            BadRequestErrorBody,
-                            parse_obj_as(
-                                type_=BadRequestErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 403:
-                    raise ForbiddenError(
-                        typing.cast(
-                            Error,
-                            parse_obj_as(
-                                type_=Error,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                if _response.status_code == 404:
-                    raise NotFoundError(
-                        typing.cast(
-                            NotFoundErrorBody,
-                            parse_obj_as(
-                                type_=NotFoundErrorBody,  # type: ignore
-                                object_=_response.json(),
-                            ),
-                        )
-                    )
-                _response_json = _response.json()
-            except JSONDecodeError:
-                raise ApiError(status_code=_response.status_code, body=_response.text)
-            raise ApiError(status_code=_response.status_code, body=_response_json)
-
-    async def chat(
-        self,
-        *,
-        query: str,
-        search: SearchCorporaParameters,
-        request_timeout: typing.Optional[int] = None,
-        request_timeout_millis: typing.Optional[int] = None,
-        generation: typing.Optional[GenerationParameters] = OMIT,
-        chat: typing.Optional[ChatParameters] = OMIT,
-        save_history: typing.Optional[bool] = OMIT,
-        request_options: typing.Optional[RequestOptions] = None,
-    ) -> ChatFullResponse:
-        """
-        Create a chat while specifying the default retrieval parameters used by the prompt.
-
-        Parameters
-        ----------
-        query : str
-            The chat message or question.
+            self._tool_servers = AsyncToolServersClient(client_wrapper=self._client_wrapper)
+        return self._tool_servers
 
-        search : SearchCorporaParameters
+    @property
+    def tools(self):
+        if self._tools is None:
+            from .tools.client import AsyncToolsClient  # noqa: E402
 
-        request_timeout : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified seconds or time out.
+            self._tools = AsyncToolsClient(client_wrapper=self._client_wrapper)
+        return self._tools
 
-        request_timeout_millis : typing.Optional[int]
-            The API will make a best effort to complete the request in the specified milliseconds or time out.
+    @property
+    def instructions(self):
+        if self._instructions is None:
+            from .instructions.client import AsyncInstructionsClient  # noqa: E402
 
-        generation : typing.Optional[GenerationParameters]
+            self._instructions = AsyncInstructionsClient(client_wrapper=self._client_wrapper)
+        return self._instructions
 
-        chat : typing.Optional[ChatParameters]
+    @property
+    def agents(self):
+        if self._agents is None:
+            from .agents.client import AsyncAgentsClient  # noqa: E402
 
-        save_history : typing.Optional[bool]
-            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
+            self._agents = AsyncAgentsClient(client_wrapper=self._client_wrapper)
+        return self._agents
 
-        request_options : typing.Optional[RequestOptions]
-            Request-specific configuration.
+    @property
+    def agent_sessions(self):
+        if self._agent_sessions is None:
+            from .agent_sessions.client import AsyncAgentSessionsClient  # noqa: E402
 
-        Returns
-        -------
-        ChatFullResponse
+            self._agent_sessions = AsyncAgentSessionsClient(client_wrapper=self._client_wrapper)
+        return self._agent_sessions
 
+    @property
+    def agent_events(self):
+        if self._agent_events is None:
+            from .agent_events.client import AsyncAgentEventsClient  # noqa: E402
 
-        Examples
-        --------
-        import asyncio
+            self._agent_events = AsyncAgentEventsClient(client_wrapper=self._client_wrapper)
+        return self._agent_events
 
-        from vectara import AsyncVectara, SearchCorporaParameters
-
-        client = AsyncVectara(
-            api_key="YOUR_API_KEY",
-            client_id="YOUR_CLIENT_ID",
-            client_secret="YOUR_CLIENT_SECRET",
-        )
+    @property
+    def agent_artifacts(self):
+        if self._agent_artifacts is None:
+            from .agent_artifacts.client import AsyncAgentArtifactsClient  # noqa: E402
 
+            self._agent_artifacts = AsyncAgentArtifactsClient(client_wrapper=self._client_wrapper)
+        return self._agent_artifacts
 
-        async def main() -> None:
-            await client.chat(
-                query="How can I use the Vectara platform?",
-                search=SearchCorporaParameters(),
-            )
+    @property
+    def agent_schedules(self):
+        if self._agent_schedules is None:
+            from .agent_schedules.client import AsyncAgentSchedulesClient  # noqa: E402
 
-
-        asyncio.run(main())
-        """
-        _response = await self._client_wrapper.httpx_client.request(
-            "v2/chats",
-            base_url=self._client_wrapper.get_environment().default,
-            method="POST",
-            json={
-                "query": query,
-                "search": convert_and_respect_annotation_metadata(
-                    object_=search, annotation=SearchCorporaParameters, direction="write"
-                ),
-                "generation": convert_and_respect_annotation_metadata(
-                    object_=generation, annotation=GenerationParameters, direction="write"
-                ),
-                "chat": convert_and_respect_annotation_metadata(
-                    object_=chat, annotation=ChatParameters, direction="write"
-                ),
-                "save_history": save_history,
-                "stream_response": False,
-            },
-            headers={
-                "Request-Timeout": str(request_timeout) if request_timeout is not None else None,
-                "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None,
-            },
-            request_options=request_options,
-            omit=OMIT,
-        )
-        try:
-            if 200 <= _response.status_code < 300:
-                return typing.cast(
-                    ChatFullResponse,
-                    parse_obj_as(
-                        type_=ChatFullResponse,  # type: ignore
-                        object_=_response.json(),
-                    ),
-                )
-            if _response.status_code == 400:
-                raise BadRequestError(
-                    typing.cast(
-                        BadRequestErrorBody,
-                        parse_obj_as(
-                            type_=BadRequestErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 403:
-                raise ForbiddenError(
-                    typing.cast(
-                        Error,
-                        parse_obj_as(
-                            type_=Error,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            if _response.status_code == 404:
-                raise NotFoundError(
-                    typing.cast(
-                        NotFoundErrorBody,
-                        parse_obj_as(
-                            type_=NotFoundErrorBody,  # type: ignore
-                            object_=_response.json(),
-                        ),
-                    )
-                )
-            _response_json = _response.json()
-        except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
\ No newline at end of file
+            self._agent_schedules = AsyncAgentSchedulesClient(client_wrapper=self._client_wrapper)
+        return self._agent_schedules

From c654170e590d3e0d40d3356cd75531f041913bed Mon Sep 17 00:00:00 2001
From: adeelehsan <aadeel.ehsan@gmail.com>
Date: Tue, 14 Apr 2026 20:44:12 +0500
Subject: [PATCH 2/2] Add base_client.py back to .fernignore to protect custom
 api_key auth

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .fernignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.fernignore b/.fernignore
index f9cfa38..ea72b31 100644
--- a/.fernignore
+++ b/.fernignore
@@ -4,7 +4,7 @@
 .github/ISSUE_TEMPLATE/
 
 src/vectara/client.py
-# src/vectara/base_client.py -- removed to let Fern regenerate with new modules (agents, tools, etc.)
+src/vectara/base_client.py # custom api_key auth path added
 src/vectara/auth/client.py
 
 src/vectara/config/