fix(docker): add retry logic for ML model downloads (#248)

slayoffer · claude · web-flow · commit ecc590cb7928 · 2026-01-30T09:42:16.000+01:00
- Add 3 retries with exponential backoff (10s -&gt; 20s -&gt; 40s)
- Set HF_HUB_DOWNLOAD_TIMEOUT=600 for longer timeout
- Fixes transient network failures during HuggingFace downloads
- Applied to both api-only and standalone stages

Co-authored-by: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/docker/standalone/Dockerfile b/docker/standalone/Dockerfile
@@ -169,16 +169,33 @@ ENV PATH="/app/api/.venv/bin:${PATH}"
 
 # Pre-download ML models to avoid runtime download (conditional)
 # Only runs if both PRELOAD_ML_MODELS=true AND INCLUDE_LOCAL_MODELS=true
+# Includes retry logic with exponential backoff for transient network failures
 ARG PRELOAD_ML_MODELS
 ARG INCLUDE_LOCAL_MODELS
+ENV HF_HUB_DOWNLOAD_TIMEOUT=600
 RUN if [ "$PRELOAD_ML_MODELS" = "true" ] && [ "$INCLUDE_LOCAL_MODELS" = "true" ]; then \
-    /app/api/.venv/bin/python -c "\
+    MAX_RETRIES=3; \
+    RETRY_DELAY=10; \
+    for i in $(seq 1 $MAX_RETRIES); do \
+      echo "Attempt $i/$MAX_RETRIES: Downloading ML models..."; \
+      /app/api/.venv/bin/python -c "\
+import os; os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '600'; \
 from sentence_transformers import SentenceTransformer, CrossEncoder; \
 print('Downloading embedding model...'); \
 SentenceTransformer('BAAI/bge-small-en-v1.5'); \
 print('Downloading cross-encoder model...'); \
 CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2'); \
-print('Models cached successfully')"; \
+print('Models cached successfully')" && break; \
+      if [ $i -lt $MAX_RETRIES ]; then \
+        echo "Attempt $i failed, retrying in ${RETRY_DELAY}s..."; \
+        sleep $RETRY_DELAY; \
+        RETRY_DELAY=$((RETRY_DELAY * 2)); \
+      fi; \
+    done; \
+    if [ $i -eq $MAX_RETRIES ] && ! /app/api/.venv/bin/python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-small-en-v1.5')" 2>/dev/null; then \
+      echo "ERROR: Failed to download models after $MAX_RETRIES attempts"; \
+      exit 1; \
+    fi; \
     elif [ "$INCLUDE_LOCAL_MODELS" != "true" ]; then echo "Skipping ML model preload (local-models not included)"; \
     else echo "Skipping ML model preload"; fi
 
@@ -277,16 +294,33 @@ ENV PATH="/app/api/.venv/bin:${PATH}"
 
 # Pre-download ML models to avoid runtime download (conditional)
 # Only runs if both PRELOAD_ML_MODELS=true AND INCLUDE_LOCAL_MODELS=true
+# Includes retry logic with exponential backoff for transient network failures
 ARG PRELOAD_ML_MODELS
 ARG INCLUDE_LOCAL_MODELS
+ENV HF_HUB_DOWNLOAD_TIMEOUT=600
 RUN if [ "$PRELOAD_ML_MODELS" = "true" ] && [ "$INCLUDE_LOCAL_MODELS" = "true" ]; then \
-    /app/api/.venv/bin/python -c "\
+    MAX_RETRIES=3; \
+    RETRY_DELAY=10; \
+    for i in $(seq 1 $MAX_RETRIES); do \
+      echo "Attempt $i/$MAX_RETRIES: Downloading ML models..."; \
+      /app/api/.venv/bin/python -c "\
+import os; os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '600'; \
 from sentence_transformers import SentenceTransformer, CrossEncoder; \
 print('Downloading embedding model...'); \
 SentenceTransformer('BAAI/bge-small-en-v1.5'); \
 print('Downloading cross-encoder model...'); \
 CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2'); \
-print('Models cached successfully')"; \
+print('Models cached successfully')" && break; \
+      if [ $i -lt $MAX_RETRIES ]; then \
+        echo "Attempt $i failed, retrying in ${RETRY_DELAY}s..."; \
+        sleep $RETRY_DELAY; \
+        RETRY_DELAY=$((RETRY_DELAY * 2)); \
+      fi; \
+    done; \
+    if [ $i -eq $MAX_RETRIES ] && ! /app/api/.venv/bin/python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-small-en-v1.5')" 2>/dev/null; then \
+      echo "ERROR: Failed to download models after $MAX_RETRIES attempts"; \
+      exit 1; \
+    fi; \
     elif [ "$INCLUDE_LOCAL_MODELS" != "true" ]; then echo "Skipping ML model preload (local-models not included)"; \
     else echo "Skipping ML model preload"; fi