zenml-io · AlexejPenner · Oct 28, 2024 · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024
diff --git a/.github/workflows/run_complete_llm.yml b/.github/workflows/run_complete_llm.yml
@@ -0,0 +1,50 @@
+name: Staging Trigger LLM-COMPLETE
+on:
+  pull_request:
+    types: [opened, synchronize]
+    branches: [staging, main]
+concurrency:
+  # New commit on branch cancels running workflows of the same branch
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run-staging-workflow:
+    runs-on: ubuntu-dind-runners
+    env:
+      ZENML_HOST: ${{ secrets.ZENML_HOST }}
+      ZENML_API_KEY: ${{ secrets.ZENML_API_KEY }}
+      ZENML_STAGING_STACK: 51a49786-b82a-4646-bde7-a460efb0a9c5
+      ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
+      ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
+      ZENML_DEBUG: true
+      ZENML_ANALYTICS_OPT_IN: false
+      ZENML_LOGGING_VERBOSITY: INFO
+      ZENML_PROJECT_SECRET_NAME: llm-complete
+
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install requirements
+        run: |
+          pip3 install -r requirements.txt
+          zenml integration install gcp -y
+
+      - name: Connect to ZenML server
+        run: |
+          zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
+
+      - name: Set stack (Staging)
+        if: ${{ github.base_ref == 'staging' }}
+        run: |
+          zenml stack set ${{ env.ZENML_STAGING_STACK }}
+
+      - name: Run pipeline (Staging)
+        if: ${{ github.base_ref == 'staging' }}
+        run: |
+          python run.py --rag --evaluation --no-cache
diff --git a/.gitignore b/.gitignore
@@ -162,6 +162,8 @@ llm-lora-finetuning/configs/shopify.yaml
 finetuned-matryoshka/
 finetuned-all-MiniLM-L6-v2/
 finetuned-snowflake-arctic-embed-m/
+finetuned-snowflake-arctic-embed-m-v1.5/
+.gradio/
 
 # ollama ignores
 nohup.out
diff --git a/llm-complete-guide/.assets/argilla_secret.png b/llm-complete-guide/.assets/argilla_secret.png
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
@@ -43,11 +43,16 @@ environment and install the dependencies using the following command:
 pip install -r requirements.txt
 ```
 
+Depending on your hardware you may run into some issues when running the `pip install` command with the
+`flash_attn` package. In that case running `FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation` 
+could help you.
+
 In order to use the default LLM for this query, you'll need an account and an
-API key from OpenAI specified as another environment variable:
+API key from OpenAI specified as a ZenML secret:
 
 ```shell
-export OPENAI_API_KEY=<your-openai-api-key>
+zenml secret create llm-complete --openai_api_key=<your-openai-api-key>
+export ZENML_PROJECT_SECRET_NAME=llm-complete
 ```
 
 ### Setting up Supabase
@@ -63,22 +68,15 @@ You'll want to save the Supabase database password as a ZenML secret so that it
 isn't stored in plaintext. You can do this by running the following command:
 
 ```shell
-zenml secret create supabase_postgres_db --password="YOUR_PASSWORD"
+zenml secret update llm-complete -v '{"supabase_password": "YOUR_PASSWORD", "supabase_user": "YOUR_USER", "supabase_host": "YOUR_HOST", "supabase_port": "YOUR_PORT"}'
 ```
 
-You'll then want to connect to this database instance by getting the connection
+You can get the user, host and port for this database instance by getting the connection
 string from the Supabase dashboard.
 
 ![](.assets/supabase-connection-string.png)
 
-You can use these details to populate some environment variables where the
-pipeline code expects them:
-
-```shell
-export ZENML_POSTGRES_USER=<your-supabase-user>
-export ZENML_POSTGRES_HOST=<your-supabase-host>
-export ZENML_POSTGRES_PORT=<your-supabase-port>
-```
+In case supabase is not an option for you, you can use a different database as the backend. 
 
 ### Running the RAG pipeline
 
@@ -151,16 +149,17 @@ documentation](https://docs.zenml.io/v/docs/stack-components/annotators/argilla)
 will guide you through the process of connecting to your instance as a stack
 component.
 
-### Finetune the embeddings
-
-To run the pipeline for finetuning the embeddings, you can use the following
-commands:
+Please use the secret from above to track all the secrets. Here we are also
+setting a Huggingface write key. In order to make the rest of the pipeline work for you, you
+will need to change the hf repo urls to a space you have permissions to.
 
-```shell
-pip install -r requirements-argilla.txt # special requirements
-python run.py --embeddings
+```bash
+zenml secret update llm-complete -v '{"argilla_api_key": "YOUR_ARGILLA_API_KEY", "argilla_api_url": "YOUR_ARGILLA_API_URL", "hf_token": "YOUR_HF_TOKEN"}'
 ```
 
+
+### Finetune the embeddings
+
 As with the previous pipeline, you will need to have set up and connected to an Argilla instance for this
 to work. Please follow the instructions in the [Argilla
 documentation](https://docs.argilla.io/latest/getting_started/quickstart/)
@@ -170,6 +169,17 @@ documentation](https://docs.zenml.io/v/docs/stack-components/annotators/argilla)
 will guide you through the process of connecting to your instance as a stack
 component.
 
+The pipeline assumes that your argilla secret is stored within a ZenML secret called `argilla_secrets`. 
+![Argilla Secret](.assets/argilla_secret.png)
+
+To run the pipeline for finetuning the embeddings, you can use the following
+commands:
+
+```shell
+pip install -r requirements-argilla.txt # special requirements
+python run.py --embeddings
+```
+
 *Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka
 loss function](https://www.philschmid.de/fine-tune-embedding-model-for-rag) which we adapted for this project.*
 

diff --git a/llm-complete-guide/configs/embeddings.yaml b/llm-complete-guide/configs/embeddings.yaml
@@ -0,0 +1,39 @@
+# enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    parent_image: "zenmldocker/prepare-release:base-0.68.0"
+    requirements:
+      - langchain-community
+      - ratelimit
+      - langchain>=0.0.325
+      - langchain-openai
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers[torch]
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+
+
+# configuration of the Model Control Plane
+model:
+  name: finetuned-zenml-docs-embeddings
+  version: latest
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
diff --git a/llm-complete-guide/configs/rag_eval.yaml b/llm-complete-guide/configs/rag_eval.yaml
@@ -0,0 +1,21 @@
+enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - unstructured
+      - sentence-transformers>=3
+      - pgvector
+      - datasets
+      - litellm
+      - numpy
+      - psycopg2-binary
+      - tiktoken
+
+# configuration of the Model Control Plane
+model:
+  name: finetuned-zenml-docs-embeddings
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
diff --git a/llm-complete-guide/configs/rag_gcp.yaml b/llm-complete-guide/configs/rag_gcp.yaml
@@ -0,0 +1,36 @@
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - unstructured
+      - sentence-transformers>=3
+      - pgvector
+      - datasets
+      - litellm
+      - numpy
+      - psycopg2-binary
+      - tiktoken
+      - ratelimit
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+      ZENML_ENABLE_RICH_TRACEBACK: FALSE
+      ZENML_LOGGING_VERBOSITY: INFO
+
+steps:
+  url_scraper:
+    parameters:
+      docs_url: https://docs.zenml.io
+  generate_embeddings:
+    step_operator: "terraform-gcp-6c0fd52233ca"
+    settings:
+      step_operator.vertex:
+        accelerator_type: "NVIDIA_TESLA_P100"
+        accelerator_count: 1
+        machine_type: "n1-standard-8"
+
+# configuration of the Model Control Plane
+model:
+  name: finetuned-zenml-docs-embeddings
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
diff --git a/llm-complete-guide/configs/rag_local_dev.yaml b/llm-complete-guide/configs/rag_local_dev.yaml
@@ -0,0 +1,32 @@
+enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - unstructured
+      - sentence-transformers>=3
+      - pgvector
+      - datasets
+      - litellm
+      - numpy
+      - psycopg2-binary
+      - tiktoken
+      - ratelimit
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+      ZENML_ENABLE_RICH_TRACEBACK: FALSE
+      ZENML_LOGGING_VERBOSITY: INFO
+
+
+# configuration of the Model Control Plane
+model:
+  name: finetuned-zenml-docs-embeddings
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
+
+steps:
+  url_scraper:
+    parameters:
+      docs_url: https://docs.zenml.io/stack-components/orchestrators
diff --git a/llm-complete-guide/configs/synthetic.yaml b/llm-complete-guide/configs/synthetic.yaml
@@ -0,0 +1,39 @@
+# enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - langchain-community
+      - ratelimit
+      - langchain>=0.0.325
+      - langchain-openai
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+      - distilabel
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+
+
+# configuration of the Model Control Plane
+model:
+  name: finetuned-zenml-docs-embeddings
+  version: latest
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]
diff --git a/llm-complete-guide/constants.py b/llm-complete-guide/constants.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
 
 # Vector Store constants
 CHUNK_SIZE = 2000
@@ -57,20 +58,23 @@
 
 # embeddings finetuning constants
 EMBEDDINGS_MODEL_NAME_ZENML = "finetuned-zenml-docs-embeddings"
-DATASET_NAME_DEFAULT = "zenml/rag_qa_embedding_questions_0_60_0"
+# DATASET_NAME_DEFAULT = "zenml/rag_qa_embedding_questions_0_60_0"
+DATASET_NAME_DEFAULT = "zenml/rag_qa_embedding_questions"
 DATASET_NAME_DISTILABEL = f"{DATASET_NAME_DEFAULT}_distilabel"
 DATASET_NAME_ARGILLA = DATASET_NAME_DEFAULT.replace("zenml/", "")
 OPENAI_MODEL_GEN = "gpt-4o"
 OPENAI_MODEL_GEN_KWARGS_EMBEDDINGS = {
     "temperature": 0.7,
     "max_new_tokens": 512,
 }
-EMBEDDINGS_MODEL_ID_BASELINE = "Snowflake/snowflake-arctic-embed-m"
-EMBEDDINGS_MODEL_ID_FINE_TUNED = "finetuned-snowflake-arctic-embed-m"
+EMBEDDINGS_MODEL_ID_BASELINE = "Snowflake/snowflake-arctic-embed-m-v1.5"
+EMBEDDINGS_MODEL_ID_FINE_TUNED = "finetuned-snowflake-arctic-embed-m-v1.5"
 EMBEDDINGS_MODEL_MATRYOSHKA_DIMS: list[int] = [
     384,
     256,
     128,
     64,
 ]  # Important: large to small
 USE_ARGILLA_ANNOTATIONS = False
+
+SECRET_NAME = os.getenv("ZENML_PROJECT_SECRET_NAME", "llm-complete")
diff --git a/llm-complete-guide/most_basic_eval.py b/llm-complete-guide/most_basic_eval.py
@@ -20,6 +20,8 @@
 
 from openai import OpenAI
 
+from utils.openai_utils import get_openai_api_key
+
 
 def preprocess_text(text):
     text = text.lower()
@@ -51,7 +53,7 @@ def answer_question(query, corpus, top_n=2):
         return "I don't have enough information to answer the question."
 
     context = "\n".join(relevant_chunks)
-    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+    client = OpenAI(api_key=get_openai_api_key())
     chat_completion = client.chat.completions.create(
         messages=[
             {
@@ -117,7 +119,7 @@ def evaluate_retrieval(question, expected_answer, corpus, top_n=2):
 
 
 def evaluate_generation(question, expected_answer, generated_answer):
-    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+    client = OpenAI(api_key=get_openai_api_key())
     chat_completion = client.chat.completions.create(
         messages=[
             {