diff --git a/llm-complete-guide/.assets/huggingface-space-rag-deployment.png b/llm-complete-guide/.assets/huggingface-space-rag-deployment.png
new file mode 100644
index 00000000..2fecf64b
Binary files /dev/null and b/llm-complete-guide/.assets/huggingface-space-rag-deployment.png differ
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
index 033de4d6..5e5844c4 100644
--- a/llm-complete-guide/README.md
+++ b/llm-complete-guide/README.md
@@ -57,9 +57,9 @@ export ZENML_PROJECT_SECRET_NAME=llm-complete
 
 ### Setting up Supabase
 
-[Supabase](https://supabase.com/) is a cloud provider that provides a PostgreSQL
+[Supabase](https://supabase.com/) is a cloud provider that offers a PostgreSQL
 database. It's simple to use and has a free tier that should be sufficient for
-this project. Once you've created a Supabase account and organisation, you'll
+this project. Once you've created a Supabase account and organization, you'll
 need to create a new project.
 
 ![](.assets/supabase-create-project.png)
@@ -76,7 +76,7 @@ string from the Supabase dashboard.
 
 ![](.assets/supabase-connection-string.png)
 
-In case supabase is not an option for you, you can use a different database as the backend. 
+In case Supabase is not an option for you, you can use a different database as the backend.
 
 ### Running the RAG pipeline
 
@@ -114,6 +114,51 @@ Note that Claude will require a different API key from Anthropic. See [the
 `litellm` docs](https://docs.litellm.ai/docs/providers/anthropic) on how to set
 this up.
 
+### Deploying the RAG pipeline
+
+![](.assets/huggingface-space-rag-deployment.png)
+
+You'll need to update and add some secrets to make this work with your Hugging
+Face account. To get your ZenML service account API token and store URL, you can
+first create a new service account:
+
+```bash
+zenml service-account create <SERVICE_ACCOUNT_NAME>
+```
+
+For more information on this part of the process, please refer to the [ZenML
+documentation](https://docs.zenml.io/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account).
+
+Once you have your service account API token and store URL (the URL of your
+deployed ZenML tenant), you can update the secrets with the following command:
+
+```bash
+zenml secret update llm-complete --zenml_api_token=<YOUR_ZENML_SERVICE_ACCOUNT_API_TOKEN> --zenml_store_url=<YOUR_ZENML_STORE_URL>
+```
+
+To set the Hugging Face user space that gets used for the Gradio app deployment,
+you should set an environment variable with the following command:
+
+```bash
+export ZENML_HF_USERNAME=<YOUR_HF_USERNAME>
+export ZENML_HF_SPACE_NAME=<YOUR_HF_SPACE_NAME> # optional, defaults to "llm-complete-guide-rag"
+```
+
+To deploy the RAG pipeline, you can use the following command:
+
+```shell
+python run.py --deploy
+```
+
+Alternatively, you can run the basic RAG pipeline *and* deploy it in one go:
+
+```shell
+python run.py --rag --deploy
+```
+
+This will open a Hugging Face space in your browser where you can interact with
+the RAG pipeline.
+
 ### Run the LLM RAG evaluation pipeline
 
 To run the evaluation pipeline, you can use the following command:
@@ -157,7 +202,6 @@ will need to change the hf repo urls to a space you have permissions to.
 zenml secret update llm-complete -v '{"argilla_api_key": "YOUR_ARGILLA_API_KEY", "argilla_api_url": "YOUR_ARGILLA_API_URL", "hf_token": "YOUR_HF_TOKEN"}'
 ```
 
-
 ### Finetune the embeddings
 
 As with the previous pipeline, you will need to have set up and connected to an Argilla instance for this
diff --git a/llm-complete-guide/deployment_hf.py b/llm-complete-guide/deployment_hf.py
new file mode 100644
index 00000000..6724fc0f
--- /dev/null
+++ b/llm-complete-guide/deployment_hf.py
@@ -0,0 +1,13 @@
+import gradio as gr
+from utils.llm_utils import process_input_with_retrieval
+
+
+def predict(message, history):
+    return process_input_with_retrieval(
+        input=message,
+        n_items_retrieved=20,
+        use_reranking=True,
+    )
+
+
+gr.ChatInterface(predict, type="messages").launch()
diff --git a/llm-complete-guide/gh_action_rag.py b/llm-complete-guide/gh_action_rag.py
index 49c8c0f3..4828b57d 100644
--- a/llm-complete-guide/gh_action_rag.py
+++ b/llm-complete-guide/gh_action_rag.py
@@ -21,11 +21,10 @@
 
 import click
 import yaml
+from pipelines.llm_basic_rag import llm_basic_rag
 from zenml.client import Client
 from zenml.exceptions import ZenKeyError
 
-from pipelines.llm_basic_rag import llm_basic_rag
-
 
 @click.command(
     help="""
@@ -39,7 +38,6 @@
     default=False,
     help="Disable cache.",
 )
-
 @click.option(
     "--create-template",
     "create_template",
@@ -51,26 +49,26 @@
     "--config",
     "config",
     default="rag_local_dev.yaml",
-    help="Specify a configuration file"
+    help="Specify a configuration file",
 )
 @click.option(
     "--service-account-id",
     "service_account_id",
     default=None,
-    help="Specify a service account ID"
+    help="Specify a service account ID",
 )
 @click.option(
     "--event-source-id",
     "event_source_id",
     default=None,
-    help="Specify an event source ID"
+    help="Specify an event source ID",
 )
 def main(
     no_cache: bool = False,
-    config: Optional[str]= "rag_local_dev.yaml",
+    config: Optional[str] = "rag_local_dev.yaml",
     create_template: bool = False,
     service_account_id: Optional[str] = None,
-    event_source_id: Optional[str] = None
+    event_source_id: Optional[str] = None,
 ):
     """
     Executes the pipeline to train a basic RAG model.
@@ -86,43 +84,43 @@ def main(
     client = Client()
     config_path = Path(__file__).parent / "configs" / config
 
-    with (open(config_path,"r") as file):
+    with open(config_path, "r") as file:
         config = yaml.safe_load(file)
 
     if create_template:
-
         # run pipeline
         run = llm_basic_rag.with_options(
-            config_path=str(config_path),
-            enable_cache=not no_cache
+            config_path=str(config_path), enable_cache=not no_cache
         )()
         # create new run template
         rt = client.create_run_template(
             name=f"production-llm-complete-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}",
-            deployment_id=run.deployment_id
+            deployment_id=run.deployment_id,
         )
 
         try:
             # Check if an action ahs already be configured for this pipeline
             action = client.get_action(
                 name_id_or_prefix="LLM Complete (production)",
-                allow_name_prefix_match=True
+                allow_name_prefix_match=True,
             )
         except ZenKeyError:
             if not event_source_id:
-                raise RuntimeError("An event source is required for this workflow.")
+                raise RuntimeError(
+                    "An event source is required for this workflow."
+                )
 
             if not service_account_id:
                 service_account_id = client.create_service_account(
                     name="github-action-sa",
-                    description="To allow triggered pipelines to run with M2M authentication."
+                    description="To allow triggered pipelines to run with M2M authentication.",
                 ).id
 
             action_id = client.create_action(
                 name="LLM Complete (production)",
                 configuration={
                     "template_id": str(rt.id),
-                    "run_config": pop_restricted_configs(config)
+                    "run_config": pop_restricted_configs(config),
                 },
                 service_account_id=service_account_id,
                 auth_window=0,
@@ -132,7 +130,7 @@ def main(
                 event_source_id=UUID(event_source_id),
                 event_filter={"event_type": "tag_event"},
                 action_id=action_id,
-                description="Trigger pipeline to reindex everytime the docs are updated through git."
+                description="Trigger pipeline to reindex everytime the docs are updated through git.",
             )
         else:
             # update the action with the new template
@@ -141,14 +139,13 @@ def main(
                 name_id_or_prefix=action.id,
                 configuration={
                     "template_id": str(rt.id),
-                    "run_config": pop_restricted_configs(config)
-                }
+                    "run_config": pop_restricted_configs(config),
+                },
             )
 
     else:
         llm_basic_rag.with_options(
-            config_path=str(config_path),
-            enable_cache=not no_cache
+            config_path=str(config_path), enable_cache=not no_cache
         )()
 
 
@@ -162,22 +159,22 @@ def pop_restricted_configs(run_configuration: dict) -> dict:
         Modified dictionary with restricted items removed
     """
     # Pop top-level restricted items
-    run_configuration.pop('parameters', None)
-    run_configuration.pop('build', None)
-    run_configuration.pop('schedule', None)
+    run_configuration.pop("parameters", None)
+    run_configuration.pop("build", None)
+    run_configuration.pop("schedule", None)
 
     # Pop docker settings if they exist
-    if 'settings' in run_configuration:
-        run_configuration['settings'].pop('docker', None)
+    if "settings" in run_configuration:
+        run_configuration["settings"].pop("docker", None)
 
     # Pop docker settings from steps if they exist
-    if 'steps' in run_configuration:
-        for step in run_configuration['steps'].values():
-            if 'settings' in step:
-                step['settings'].pop('docker', None)
+    if "steps" in run_configuration:
+        for step in run_configuration["steps"].values():
+            if "settings" in step:
+                step["settings"].pop("docker", None)
 
     return run_configuration
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/llm-complete-guide/pipelines/llm_basic_rag.py b/llm-complete-guide/pipelines/llm_basic_rag.py
index 3cfb4051..6cf99f08 100644
--- a/llm-complete-guide/pipelines/llm_basic_rag.py
+++ b/llm-complete-guide/pipelines/llm_basic_rag.py
@@ -15,8 +15,6 @@
 # limitations under the License.
 #
 
-from zenml import pipeline
-
 from steps.populate_index import (
     generate_embeddings,
     index_generator,
@@ -24,6 +22,7 @@
 )
 from steps.url_scraper import url_scraper
 from steps.web_url_loader import web_url_loader
+from zenml import pipeline
 
 
 @pipeline
diff --git a/llm-complete-guide/pipelines/llm_eval.py b/llm-complete-guide/pipelines/llm_eval.py
index d310fd18..8f604dac 100644
--- a/llm-complete-guide/pipelines/llm_eval.py
+++ b/llm-complete-guide/pipelines/llm_eval.py
@@ -13,12 +13,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 from pathlib import Path
 from typing import Optional
 
 import click
-
 from steps.eval_e2e import e2e_evaluation, e2e_evaluation_llm_judged
 from steps.eval_retrieval import (
     retrieval_evaluation_full,
@@ -82,12 +80,9 @@ def llm_eval() -> None:
     "--config",
     "config",
     default="rag_local_dev.yaml",
-    help="Specify a configuration file"
+    help="Specify a configuration file",
 )
-def main(
-        no_cache: bool = False,
-        config: Optional[str] = "rag_eval.yaml"
-):
+def main(no_cache: bool = False, config: Optional[str] = "rag_eval.yaml"):
     """
     Executes the pipeline to train a basic RAG model.
 
@@ -98,10 +93,9 @@ def main(
     config_path = Path(__file__).parent.parent / "configs" / config
 
     llm_eval.with_options(
-        config_path=str(config_path),
-        enable_cache=not no_cache
+        config_path=str(config_path), enable_cache=not no_cache
     )()
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/llm-complete-guide/requirements.txt b/llm-complete-guide/requirements.txt
index 13563b92..2c107e4b 100644
--- a/llm-complete-guide/requirements.txt
+++ b/llm-complete-guide/requirements.txt
@@ -1,13 +1,11 @@
 zenml[server]>=0.68.1
-langchain-community
 ratelimit
-langchain>=0.0.325
-langchain-openai
 pgvector
 psycopg2-binary
 beautifulsoup4
 unstructured
 pandas
+openai
 numpy
 sentence-transformers>=3
 transformers
diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py
index 50bbf8fe..2152fda4 100644
--- a/llm-complete-guide/run.py
+++ b/llm-complete-guide/run.py
@@ -151,7 +151,7 @@
     "--config",
     "config",
     default=None,
-    help="Generate chunks for Hugging Face dataset",
+    help="Path to config",
 )
 def main(
     rag: bool = False,
@@ -181,7 +181,7 @@ def main(
         argilla (bool): If `True`, the Argilla annotations will be used.
         chunks (bool): If `True`, the chunks pipeline will be run.
         reranked (bool): If `True`, rerankers will be used
-        config (str: Path to config
+        config (str): Path to config
     """
     pipeline_args = {"enable_cache": not no_cache}
     embeddings_finetune_args = {
@@ -264,4 +264,4 @@ def main(
     materializer_registry.register_materializer_type(
         Document, DocumentMaterializer
     )
-    main()
\ No newline at end of file
+    main()
diff --git a/llm-complete-guide/steps/finetune_embeddings.py b/llm-complete-guide/steps/finetune_embeddings.py
index ad9d9469..3117c473 100644
--- a/llm-complete-guide/steps/finetune_embeddings.py
+++ b/llm-complete-guide/steps/finetune_embeddings.py
@@ -23,7 +23,8 @@
     DATASET_NAME_DISTILABEL,
     EMBEDDINGS_MODEL_ID_BASELINE,
     EMBEDDINGS_MODEL_ID_FINE_TUNED,
-    EMBEDDINGS_MODEL_MATRYOSHKA_DIMS, SECRET_NAME,
+    EMBEDDINGS_MODEL_MATRYOSHKA_DIMS,
+    SECRET_NAME,
 )
 from datasets import DatasetDict, concatenate_datasets, load_dataset
 from datasets.arrow_dataset import Dataset
@@ -294,7 +295,7 @@ def finetune(
     trainer.model.push_to_hub(
         f"zenml/{EMBEDDINGS_MODEL_ID_FINE_TUNED}",
         exist_ok=True,
-        token=zenml_client.get_secret(SECRET_NAME).secret_values["hf_token"]
+        token=zenml_client.get_secret(SECRET_NAME).secret_values["hf_token"],
     )
 
     log_model_metadata(
diff --git a/llm-complete-guide/steps/push_to_argilla.py b/llm-complete-guide/steps/push_to_argilla.py
index 90c3d2d9..e67bf621 100644
--- a/llm-complete-guide/steps/push_to_argilla.py
+++ b/llm-complete-guide/steps/push_to_argilla.py
@@ -16,7 +16,6 @@
 import argilla as rg
 import torch
 from argilla._exceptions import ConflictError
-
 from constants import (
     DATASET_NAME_ARGILLA,
     EMBEDDINGS_MODEL_ID_BASELINE,
@@ -115,7 +114,7 @@ def push_to_argilla(train_dataset: Dataset, test_dataset: Dataset) -> None:
     try:
         ds.create()
     except ConflictError:
-       ds = client.datasets(DATASET_NAME_ARGILLA)
+        ds = client.datasets(DATASET_NAME_ARGILLA)
 
     # process original HF dataset
     try:
diff --git a/llm-complete-guide/steps/rag_deployment.py b/llm-complete-guide/steps/rag_deployment.py
index 7779339c..a750dde6 100644
--- a/llm-complete-guide/steps/rag_deployment.py
+++ b/llm-complete-guide/steps/rag_deployment.py
@@ -1,8 +1,44 @@
-import time
+import os
+import webbrowser
 
-import gradio as gr
+from huggingface_hub import HfApi
 from utils.llm_utils import process_input_with_retrieval
 from zenml import step
+from zenml.client import Client
+from zenml.integrations.registry import integration_registry
+
+secret = Client().get_secret("llm-complete")
+
+ZENML_API_TOKEN = secret.secret_values["zenml_api_token"]
+ZENML_STORE_URL = secret.secret_values["zenml_store_url"]
+HF_TOKEN = os.getenv("HF_TOKEN")
+SPACE_USERNAME = os.environ.get("ZENML_HF_USERNAME", "zenml")
+SPACE_NAME = os.environ.get("ZENML_HF_SPACE_NAME", "llm-complete-guide-rag")
+
+hf_repo_id = f"{SPACE_USERNAME}/{SPACE_NAME}"
+gcp_reqs = integration_registry.select_integration_requirements("gcp")
+
+hf_repo_requirements = f"""
+zenml>=0.68.1
+ratelimit
+pgvector
+psycopg2-binary
+beautifulsoup4
+pandas
+openai
+numpy
+sentence-transformers>=3
+transformers
+litellm
+tiktoken
+matplotlib
+pyarrow
+rerankers[flashrank]
+datasets
+torch
+huggingface-hub
+{chr(10).join(gcp_reqs)}
+"""
 
 
 def predict(message, history):
@@ -13,15 +49,69 @@ def predict(message, history):
     )
 
 
-@step
+def upload_files_to_repo(
+    api, repo_id: str, files_mapping: dict, token: str = HF_TOKEN
+):
+    """Upload multiple files to a Hugging Face repository
+
+    Args:
+        api: Hugging Face API client
+        repo_id: Target repository ID
+        files_mapping: Dict mapping local files to repo destinations
+        token: HF API token
+    """
+    for local_path, repo_path in files_mapping.items():
+        content = (
+            local_path.encode()
+            if isinstance(local_path, str) and not os.path.exists(local_path)
+            else local_path
+        )
+        api.upload_file(
+            path_or_fileobj=content,
+            path_in_repo=repo_path,
+            repo_id=repo_id,
+            repo_type="space",
+            token=token,
+        )
+
+
+@step(enable_cache=False)
 def gradio_rag_deployment() -> None:
     """Launches a Gradio chat interface with the slow echo demo.
 
     Starts a web server with a chat interface that echoes back user messages.
     The server runs indefinitely until manually stopped.
     """
-    demo = gr.ChatInterface(predict, type="messages")
-    demo.launch(share=True, inbrowser=True)
-    # Keep the step running
-    while True:
-        time.sleep(1)
+    api = HfApi()
+    api.create_repo(
+        repo_id=hf_repo_id,
+        repo_type="space",
+        space_sdk="gradio",
+        private=True,
+        exist_ok=True,
+        token=HF_TOKEN,
+    )
+    api.add_space_secret(
+        repo_id=hf_repo_id,
+        key="ZENML_STORE_API_KEY",
+        value=ZENML_API_TOKEN,
+    )
+    api.add_space_secret(
+        repo_id=hf_repo_id,
+        key="ZENML_STORE_URL",
+        value=ZENML_STORE_URL,
+    )
+
+    files_to_upload = {
+        "deployment_hf.py": "app.py",
+        "utils/llm_utils.py": "utils/llm_utils.py",
+        "utils/openai_utils.py": "utils/openai_utils.py",
+        "utils/__init__.py": "utils/__init__.py",
+        "constants.py": "constants.py",
+        "structures.py": "structures.py",
+        hf_repo_requirements: "requirements.txt",
+    }
+
+    upload_files_to_repo(api, hf_repo_id, files_to_upload, HF_TOKEN)
+
+    webbrowser.open(f"https://huggingface.co/spaces/{hf_repo_id}")