zenml-io · strickvl · Nov 4, 2024 · Oct 31, 2024 · Nov 1, 2024 · Nov 1, 2024
diff --git a/llm-complete-guide/.assets/huggingface-space-rag-deployment.png b/llm-complete-guide/.assets/huggingface-space-rag-deployment.png
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
@@ -57,9 +57,9 @@ export ZENML_PROJECT_SECRET_NAME=llm-complete
 
 ### Setting up Supabase
 
-[Supabase](https://supabase.com/) is a cloud provider that provides a PostgreSQL
+[Supabase](https://supabase.com/) is a cloud provider that offers a PostgreSQL
 database. It's simple to use and has a free tier that should be sufficient for
-this project. Once you've created a Supabase account and organisation, you'll
+this project. Once you've created a Supabase account and organization, you'll
 need to create a new project.
 
 ![](.assets/supabase-create-project.png)
@@ -76,7 +76,7 @@ string from the Supabase dashboard.
 
 ![](.assets/supabase-connection-string.png)
 
-In case supabase is not an option for you, you can use a different database as the backend. 
+In case Supabase is not an option for you, you can use a different database as the backend.
 
 ### Running the RAG pipeline
 
@@ -114,6 +114,51 @@ Note that Claude will require a different API key from Anthropic. See [the
 `litellm` docs](https://docs.litellm.ai/docs/providers/anthropic) on how to set
 this up.
 
+### Deploying the RAG pipeline
+
+![](.assets/huggingface-space-rag-deployment.png)
+
+You'll need to update and add some secrets to make this work with your Hugging
+Face account. To get your ZenML service account API token and store URL, you can
+first create a new service account:
+
+```bash
+zenml service-account create <SERVICE_ACCOUNT_NAME>
+```
+
+For more information on this part of the process, please refer to the [ZenML
+documentation](https://docs.zenml.io/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account).
+
+Once you have your service account API token and store URL (the URL of your
+deployed ZenML tenant), you can update the secrets with the following command:
+
+```bash
+zenml secret update llm-complete --zenml_api_token=<YOUR_ZENML_SERVICE_ACCOUNT_API_TOKEN> --zenml_store_url=<YOUR_ZENML_STORE_URL>
+```
+
+To set the Hugging Face user space that gets used for the Gradio app deployment,
+you should set an environment variable with the following command:
+
+```bash
+export ZENML_HF_USERNAME=<YOUR_HF_USERNAME>
+export ZENML_HF_SPACE_NAME=<YOUR_HF_SPACE_NAME> # optional, defaults to "llm-complete-guide-rag"
+```
+
+To deploy the RAG pipeline, you can use the following command:
+
+```shell
+python run.py --deploy
+```
+
+Alternatively, you can run the basic RAG pipeline *and* deploy it in one go:
+
+```shell
+python run.py --rag --deploy
+```
+
+This will open a Hugging Face space in your browser where you can interact with
+the RAG pipeline.
+
 ### Run the LLM RAG evaluation pipeline
 
 To run the evaluation pipeline, you can use the following command:
@@ -157,7 +202,6 @@ will need to change the hf repo urls to a space you have permissions to.
 zenml secret update llm-complete -v '{"argilla_api_key": "YOUR_ARGILLA_API_KEY", "argilla_api_url": "YOUR_ARGILLA_API_URL", "hf_token": "YOUR_HF_TOKEN"}'
 ```
 
-
 ### Finetune the embeddings
 
 As with the previous pipeline, you will need to have set up and connected to an Argilla instance for this

diff --git a/llm-complete-guide/deployment_hf.py b/llm-complete-guide/deployment_hf.py
@@ -0,0 +1,13 @@
+import gradio as gr
+from utils.llm_utils import process_input_with_retrieval
+
+
+def predict(message, history):
+    return process_input_with_retrieval(
+        input=message,
+        n_items_retrieved=20,
+        use_reranking=True,
+    )
+
+
+gr.ChatInterface(predict, type="messages").launch()
diff --git a/llm-complete-guide/gh_action_rag.py b/llm-complete-guide/gh_action_rag.py
@@ -21,11 +21,10 @@
 
 import click
 import yaml
+from pipelines.llm_basic_rag import llm_basic_rag
 from zenml.client import Client
 from zenml.exceptions import ZenKeyError
 
-from pipelines.llm_basic_rag import llm_basic_rag
-
 
 @click.command(
     help="""
@@ -39,7 +38,6 @@
     default=False,
     help="Disable cache.",
 )
-
 @click.option(
     "--create-template",
     "create_template",
@@ -51,26 +49,26 @@
     "--config",
     "config",
     default="rag_local_dev.yaml",
-    help="Specify a configuration file"
+    help="Specify a configuration file",
 )
 @click.option(
     "--service-account-id",
     "service_account_id",
     default=None,
-    help="Specify a service account ID"
+    help="Specify a service account ID",
 )
 @click.option(
     "--event-source-id",
     "event_source_id",
     default=None,
-    help="Specify an event source ID"
+    help="Specify an event source ID",
 )
 def main(
     no_cache: bool = False,
-    config: Optional[str]= "rag_local_dev.yaml",
+    config: Optional[str] = "rag_local_dev.yaml",
     create_template: bool = False,
     service_account_id: Optional[str] = None,
-    event_source_id: Optional[str] = None
+    event_source_id: Optional[str] = None,
 ):
     """
     Executes the pipeline to train a basic RAG model.
@@ -86,43 +84,43 @@ def main(
     client = Client()
     config_path = Path(__file__).parent / "configs" / config
 
-    with (open(config_path,"r") as file):
+    with open(config_path, "r") as file:
         config = yaml.safe_load(file)
 
     if create_template:
-
         # run pipeline
         run = llm_basic_rag.with_options(
-            config_path=str(config_path),
-            enable_cache=not no_cache
+            config_path=str(config_path), enable_cache=not no_cache
         )()
         # create new run template
         rt = client.create_run_template(
             name=f"production-llm-complete-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}",
-            deployment_id=run.deployment_id
+            deployment_id=run.deployment_id,
         )
 
         try:
             # Check if an action ahs already be configured for this pipeline
             action = client.get_action(
                 name_id_or_prefix="LLM Complete (production)",
-                allow_name_prefix_match=True
+                allow_name_prefix_match=True,
             )
         except ZenKeyError:
             if not event_source_id:
-                raise RuntimeError("An event source is required for this workflow.")
+                raise RuntimeError(
+                    "An event source is required for this workflow."
+                )
 
             if not service_account_id:
                 service_account_id = client.create_service_account(
                     name="github-action-sa",
-                    description="To allow triggered pipelines to run with M2M authentication."
+                    description="To allow triggered pipelines to run with M2M authentication.",
                 ).id
 
             action_id = client.create_action(
                 name="LLM Complete (production)",
                 configuration={
                     "template_id": str(rt.id),
-                    "run_config": pop_restricted_configs(config)
+                    "run_config": pop_restricted_configs(config),
                 },
                 service_account_id=service_account_id,
                 auth_window=0,
@@ -132,7 +130,7 @@ def main(
                 event_source_id=UUID(event_source_id),
                 event_filter={"event_type": "tag_event"},
                 action_id=action_id,
-                description="Trigger pipeline to reindex everytime the docs are updated through git."
+                description="Trigger pipeline to reindex everytime the docs are updated through git.",
             )
         else:
             # update the action with the new template
@@ -141,14 +139,13 @@ def main(
                 name_id_or_prefix=action.id,
                 configuration={
                     "template_id": str(rt.id),
-                    "run_config": pop_restricted_configs(config)
-                }
+                    "run_config": pop_restricted_configs(config),
+                },
             )
 
     else:
         llm_basic_rag.with_options(
-            config_path=str(config_path),
-            enable_cache=not no_cache
+            config_path=str(config_path), enable_cache=not no_cache
         )()
 
 
@@ -162,22 +159,22 @@ def pop_restricted_configs(run_configuration: dict) -> dict:
         Modified dictionary with restricted items removed
     """
     # Pop top-level restricted items
-    run_configuration.pop('parameters', None)
-    run_configuration.pop('build', None)
-    run_configuration.pop('schedule', None)
+    run_configuration.pop("parameters", None)
+    run_configuration.pop("build", None)
+    run_configuration.pop("schedule", None)
 
     # Pop docker settings if they exist
-    if 'settings' in run_configuration:
-        run_configuration['settings'].pop('docker', None)
+    if "settings" in run_configuration:
+        run_configuration["settings"].pop("docker", None)
 
     # Pop docker settings from steps if they exist
-    if 'steps' in run_configuration:
-        for step in run_configuration['steps'].values():
-            if 'settings' in step:
-                step['settings'].pop('docker', None)
+    if "steps" in run_configuration:
+        for step in run_configuration["steps"].values():
+            if "settings" in step:
+                step["settings"].pop("docker", None)
 
     return run_configuration
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/llm-complete-guide/pipelines/llm_basic_rag.py b/llm-complete-guide/pipelines/llm_basic_rag.py
@@ -15,15 +15,14 @@
 # limitations under the License.
 #
 
-from zenml import pipeline
-
 from steps.populate_index import (
     generate_embeddings,
     index_generator,
     preprocess_documents,
 )
 from steps.url_scraper import url_scraper
 from steps.web_url_loader import web_url_loader
+from zenml import pipeline
 
 
 @pipeline

diff --git a/llm-complete-guide/pipelines/llm_eval.py b/llm-complete-guide/pipelines/llm_eval.py
@@ -13,12 +13,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 from pathlib import Path
 from typing import Optional
 
 import click
-
 from steps.eval_e2e import e2e_evaluation, e2e_evaluation_llm_judged
 from steps.eval_retrieval import (
     retrieval_evaluation_full,
@@ -82,12 +80,9 @@ def llm_eval() -> None:
     "--config",
     "config",
     default="rag_local_dev.yaml",
-    help="Specify a configuration file"
+    help="Specify a configuration file",
 )
-def main(
-        no_cache: bool = False,
-        config: Optional[str] = "rag_eval.yaml"
-):
+def main(no_cache: bool = False, config: Optional[str] = "rag_eval.yaml"):
     """
     Executes the pipeline to train a basic RAG model.
 
@@ -98,10 +93,9 @@ def main(
     config_path = Path(__file__).parent.parent / "configs" / config
 
     llm_eval.with_options(
-        config_path=str(config_path),
-        enable_cache=not no_cache
+        config_path=str(config_path), enable_cache=not no_cache
     )()
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/llm-complete-guide/requirements.txt b/llm-complete-guide/requirements.txt
@@ -1,13 +1,11 @@
 zenml[server]>=0.68.1
-langchain-community
 ratelimit
-langchain>=0.0.325
-langchain-openai
 pgvector
 psycopg2-binary
 beautifulsoup4
 unstructured
 pandas
+openai
 numpy
 sentence-transformers>=3
 transformers

diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py
@@ -151,7 +151,7 @@
     "--config",
     "config",
     default=None,
-    help="Generate chunks for Hugging Face dataset",
+    help="Path to config",
 )
 def main(
     rag: bool = False,
@@ -181,7 +181,7 @@ def main(
         argilla (bool): If `True`, the Argilla annotations will be used.
         chunks (bool): If `True`, the chunks pipeline will be run.
         reranked (bool): If `True`, rerankers will be used
-        config (str: Path to config
+        config (str): Path to config
     """
     pipeline_args = {"enable_cache": not no_cache}
     embeddings_finetune_args = {
@@ -264,4 +264,4 @@ def main(
     materializer_registry.register_materializer_type(
         Document, DocumentMaterializer
     )
-    main()
+    main()
diff --git a/llm-complete-guide/steps/finetune_embeddings.py b/llm-complete-guide/steps/finetune_embeddings.py
@@ -23,7 +23,8 @@
     DATASET_NAME_DISTILABEL,
     EMBEDDINGS_MODEL_ID_BASELINE,
     EMBEDDINGS_MODEL_ID_FINE_TUNED,
-    EMBEDDINGS_MODEL_MATRYOSHKA_DIMS, SECRET_NAME,
+    EMBEDDINGS_MODEL_MATRYOSHKA_DIMS,
+    SECRET_NAME,
 )
 from datasets import DatasetDict, concatenate_datasets, load_dataset
 from datasets.arrow_dataset import Dataset
@@ -294,7 +295,7 @@ def finetune(
     trainer.model.push_to_hub(
         f"zenml/{EMBEDDINGS_MODEL_ID_FINE_TUNED}",
         exist_ok=True,
-        token=zenml_client.get_secret(SECRET_NAME).secret_values["hf_token"]
+        token=zenml_client.get_secret(SECRET_NAME).secret_values["hf_token"],
     )
 
     log_model_metadata(

diff --git a/llm-complete-guide/steps/push_to_argilla.py b/llm-complete-guide/steps/push_to_argilla.py
@@ -16,7 +16,6 @@
 import argilla as rg
 import torch
 from argilla._exceptions import ConflictError
-
 from constants import (
     DATASET_NAME_ARGILLA,
     EMBEDDINGS_MODEL_ID_BASELINE,
@@ -115,7 +114,7 @@ def push_to_argilla(train_dataset: Dataset, test_dataset: Dataset) -> None:
     try:
         ds.create()
     except ConflictError:
-       ds = client.datasets(DATASET_NAME_ARGILLA)
+        ds = client.datasets(DATASET_NAME_ARGILLA)
 
     # process original HF dataset
     try: