Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU support in Docker, other Docker updates #1690

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions Dockerfile.local
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
### You will run into a segfault in mac
FROM python:3.11.6-slim-bookworm as base

# Install poetry
Expand All @@ -20,31 +18,41 @@ RUN apt update && apt install -y \
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
ENV POETRY_VIRTUALENVS_IN_PROJECT=true

#########################
FROM base as dependencies
#########################

WORKDIR /home/worker/app
COPY pyproject.toml poetry.lock ./

RUN poetry config installer.max-workers 10
RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"

################
FROM base as app
################

ENV PYTHONUNBUFFERED=1
ENV PORT=8080
EXPOSE 8080

# Prepare a non-root user
RUN adduser --system worker
RUN adduser worker
WORKDIR /home/worker/app

RUN mkdir local_data; chown worker local_data
RUN mkdir models; chown worker models
RUN mkdir -p local_data; chown -R worker local_data
RUN mkdir -p models; chown -R worker models
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
COPY --chown=worker private_gpt/ private_gpt
COPY --chown=worker fern/ fern
COPY --chown=worker *.yaml *.md ./
COPY --chown=worker scripts/ scripts
COPY --chown=worker pyproject.toml poetry.lock ./

# Copy the entry point script into the container and make it executable
COPY --chown=worker entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"

USER worker
ENTRYPOINT python -m private_gpt
ENTRYPOINT ["/entrypoint.sh", "python", "-m", "private_gpt"]
75 changes: 75 additions & 0 deletions Dockerfile.local.gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as base

# For tzdata
ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC"

# Install Python 3.11 and set it as default
RUN apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y python3.11 python3.11-venv python3-pip && \
ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
python3 --version

# Install poetry
RUN pip install pipx
RUN python3 -m pipx ensurepath
RUN pipx install poetry
ENV PATH="/root/.local/bin:$PATH"
ENV PATH=".venv/bin/:$PATH"

# Dependencies to build llama-cpp
RUN apt update && apt install -y \
libopenblas-dev\
ninja-build\
build-essential\
pkg-config\
wget\
gcc

# https://python-poetry.org/docs/configuration/#virtualenvsin-project
ENV POETRY_VIRTUALENVS_IN_PROJECT=true

#########################
FROM base as dependencies
#########################

WORKDIR /home/worker/app
COPY pyproject.toml poetry.lock ./

RUN poetry config installer.max-workers 10
RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"

# Enable GPU support
ENV LLAMA_CUBLAS=1
RUN CMAKE_ARGS='-DLLAMA_CUBLAS=on' FORCE_CMAKE=1 poetry run pip install --upgrade --force-reinstall --no-cache-dir llama-cpp-python

################
FROM base as app
################

ENV PYTHONUNBUFFERED=1
ENV PORT=8080
EXPOSE 8080

# Prepare a non-root user
RUN adduser worker
WORKDIR /home/worker/app

RUN mkdir -p local_data; chown -R worker local_data
RUN mkdir -p models; chown -R worker models
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
COPY --chown=worker private_gpt/ private_gpt
COPY --chown=worker fern/ fern
COPY --chown=worker *.yaml *.md ./
COPY --chown=worker scripts/ scripts
COPY --chown=worker pyproject.toml poetry.lock ./

# Copy the entry point script into the container and make it executable
COPY --chown=worker entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"

ENTRYPOINT ["/entrypoint.sh", "python", "-m", "private_gpt"]
21 changes: 21 additions & 0 deletions docker-compose-gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
services:
private-gpt-gpu:
build:
dockerfile: Dockerfile.local.gpu
volumes:
- ./local_data/:/home/worker/app/local_data
- ./models/:/home/worker/app/models
ports:
- 8001:8080
environment:
PORT: 8080
PGPT_PROFILES: docker
PGPT_LLM_MODE: llamacpp
PGPT_EMBEDDING_MODE: huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
3 changes: 2 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ services:
environment:
PORT: 8080
PGPT_PROFILES: docker
PGPT_MODE: local
PGPT_LLM_MODE: llamacpp
PGPT_EMBEDDING_MODE: huggingface

17 changes: 17 additions & 0 deletions entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh

## Choose the model, tokenizer and prompt style
export PGPT_HF_REPO_ID="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
export PGPT_HF_MODEL_FILE="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
export PGPT_TOKENIZER="mistralai/Mistral-7B-Instruct-v0.2"
export PGPT_PROMPT_STYLE="mistral"

## Optionally, choose a different embedding model
# export PGPT_EMBEDDING_HF_MODEL_NAME="BAAI/bge-small-en-v1.5"

## Download the embedding and model files
echo "Running setup script"
poetry run python scripts/setup

## Execute the main container command
exec "$@"
10 changes: 6 additions & 4 deletions settings-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ server:
port: ${PORT:8080}

llm:
mode: ${PGPT_MODE:mock}
mode: ${PGPT_LLM_MODE:mock}
tokenizer: ${PGPT_TOKENIZER:mistralai/Mistral-7B-Instruct-v0.2}

embedding:
mode: ${PGPT_MODE:sagemaker}
mode: ${PGPT_EMBEDDING_MODE:sagemaker}

llamacpp:
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
prompt_style: ${PGPT_PROMPT_STYLE:mistral}
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.2-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.2.Q4_K_M.gguf}

huggingface:
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
Expand Down