zylon-ai · lukaboljevic · Mar 8, 2024 · Mar 12, 2024
diff --git a/Dockerfile.local b/Dockerfile.local
@@ -1,5 +1,3 @@
-### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
-### You will run into a segfault in mac
 FROM python:3.11.6-slim-bookworm as base
 
 # Install poetry
@@ -20,31 +18,41 @@ RUN apt update && apt install -y \
 # https://python-poetry.org/docs/configuration/#virtualenvsin-project
 ENV POETRY_VIRTUALENVS_IN_PROJECT=true
 
+#########################
 FROM base as dependencies
+#########################
+
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./
 
+RUN poetry config installer.max-workers 10
 RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
 
+################
 FROM base as app
+################
 
 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
 EXPOSE 8080
 
 # Prepare a non-root user
-RUN adduser --system worker
+RUN adduser worker
 WORKDIR /home/worker/app
 
-RUN mkdir local_data; chown worker local_data
-RUN mkdir models; chown worker models
+RUN mkdir -p local_data; chown -R worker local_data
+RUN mkdir -p models; chown -R worker models
 COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
 COPY --chown=worker private_gpt/ private_gpt
 COPY --chown=worker fern/ fern
 COPY --chown=worker *.yaml *.md ./
 COPY --chown=worker scripts/ scripts
+COPY --chown=worker pyproject.toml poetry.lock ./
+
+# Copy the entry point script into the container and make it executable
+COPY --chown=worker entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
 
 ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
 
-USER worker
-ENTRYPOINT python -m private_gpt
+ENTRYPOINT ["/entrypoint.sh", "python", "-m", "private_gpt"]
diff --git a/Dockerfile.local.gpu b/Dockerfile.local.gpu
@@ -0,0 +1,75 @@
+FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as base
+
+# For tzdata
+ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC"
+
+# Install Python 3.11 and set it as default
+RUN apt-get update && \
+    apt-get install -y software-properties-common && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get update && \ 
+    apt-get install -y python3.11 python3.11-venv python3-pip && \
+    ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
+    python3 --version
+
+# Install poetry
+RUN pip install pipx
+RUN python3 -m pipx ensurepath
+RUN pipx install poetry
+ENV PATH="/root/.local/bin:$PATH"
+ENV PATH=".venv/bin/:$PATH"
+
+# Dependencies to build llama-cpp
+RUN apt update && apt install -y \
+  libopenblas-dev\
+  ninja-build\
+  build-essential\
+  pkg-config\
+  wget\
+  gcc
+
+# https://python-poetry.org/docs/configuration/#virtualenvsin-project
+ENV POETRY_VIRTUALENVS_IN_PROJECT=true
+
+#########################
+FROM base as dependencies
+#########################
+
+WORKDIR /home/worker/app
+COPY pyproject.toml poetry.lock ./
+
+RUN poetry config installer.max-workers 10
+RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
+
+# Enable GPU support
+ENV LLAMA_CUBLAS=1
+RUN CMAKE_ARGS='-DLLAMA_CUBLAS=on' FORCE_CMAKE=1 poetry run pip install --upgrade --force-reinstall --no-cache-dir llama-cpp-python
+
+################
+FROM base as app
+################
+
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8080
+EXPOSE 8080
+
+# Prepare a non-root user
+RUN adduser worker
+WORKDIR /home/worker/app
+
+RUN mkdir -p local_data; chown -R worker local_data
+RUN mkdir -p models; chown -R worker models
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
+COPY --chown=worker private_gpt/ private_gpt
+COPY --chown=worker fern/ fern
+COPY --chown=worker *.yaml *.md ./
+COPY --chown=worker scripts/ scripts
+COPY --chown=worker pyproject.toml poetry.lock ./
+
+# Copy the entry point script into the container and make it executable
+COPY --chown=worker entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
+
+ENTRYPOINT ["/entrypoint.sh", "python", "-m", "private_gpt"]
diff --git a/docker-compose-gpu.yaml b/docker-compose-gpu.yaml
@@ -0,0 +1,21 @@
+services:
+  private-gpt-gpu:
+    build:
+      dockerfile: Dockerfile.local.gpu
+    volumes:
+      - ./local_data/:/home/worker/app/local_data
+      - ./models/:/home/worker/app/models
+    ports:
+      - 8001:8080
+    environment:
+      PORT: 8080
+      PGPT_PROFILES: docker
+      PGPT_LLM_MODE: llamacpp
+      PGPT_EMBEDDING_MODE: huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -10,5 +10,6 @@ services:
     environment:
       PORT: 8080
       PGPT_PROFILES: docker
-      PGPT_MODE: local
+      PGPT_LLM_MODE: llamacpp
+      PGPT_EMBEDDING_MODE: huggingface
 
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+## Choose the model, tokenizer and prompt style
+export PGPT_HF_REPO_ID="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+export PGPT_HF_MODEL_FILE="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+export PGPT_TOKENIZER="mistralai/Mistral-7B-Instruct-v0.2"
+export PGPT_PROMPT_STYLE="mistral"
+
+## Optionally, choose a different embedding model
+# export PGPT_EMBEDDING_HF_MODEL_NAME="BAAI/bge-small-en-v1.5"
+
+## Download the embedding and model files
+echo "Running setup script"
+poetry run python scripts/setup
+
+## Execute the main container command
+exec "$@"
diff --git a/settings-docker.yaml b/settings-docker.yaml
@@ -3,14 +3,16 @@ server:
   port: ${PORT:8080}
 
 llm:
-  mode: ${PGPT_MODE:mock}
+  mode: ${PGPT_LLM_MODE:mock}
+  tokenizer: ${PGPT_TOKENIZER:mistralai/Mistral-7B-Instruct-v0.2}
 
 embedding:
-  mode: ${PGPT_MODE:sagemaker}
+  mode: ${PGPT_EMBEDDING_MODE:sagemaker}
 
 llamacpp:
-  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
-  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  prompt_style: ${PGPT_PROMPT_STYLE:mistral}
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.2-GGUF}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.2.Q4_K_M.gguf}
 
 huggingface:
   embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}