@@ -169,16 +169,33 @@ ENV PATH="/app/api/.venv/bin:${PATH}"
169169
170170# Pre-download ML models to avoid runtime download (conditional)
171171# Only runs if both PRELOAD_ML_MODELS=true AND INCLUDE_LOCAL_MODELS=true
172+ # Includes retry logic with exponential backoff for transient network failures
172173ARG PRELOAD_ML_MODELS
173174ARG INCLUDE_LOCAL_MODELS
175+ ENV HF_HUB_DOWNLOAD_TIMEOUT=600
174176RUN if [ "$PRELOAD_ML_MODELS" = "true" ] && [ "$INCLUDE_LOCAL_MODELS" = "true" ]; then \
175- /app/api/.venv/bin/python -c "\
177+ MAX_RETRIES=3; \
178+ RETRY_DELAY=10; \
179+ for i in $(seq 1 $MAX_RETRIES); do \
180+ echo "Attempt $i/$MAX_RETRIES: Downloading ML models..." ; \
181+ /app/api/.venv/bin/python -c "\
182+ import os; os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '600'; \
176183from sentence_transformers import SentenceTransformer, CrossEncoder; \
177184print('Downloading embedding model...'); \
178185SentenceTransformer('BAAI/bge-small-en-v1.5'); \
179186print('Downloading cross-encoder model...'); \
180187CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2'); \
181- print('Models cached successfully')" ; \
188+ print('Models cached successfully')" && break; \
189+ if [ $i -lt $MAX_RETRIES ]; then \
190+ echo "Attempt $i failed, retrying in ${RETRY_DELAY}s..." ; \
191+ sleep $RETRY_DELAY; \
192+ RETRY_DELAY=$((RETRY_DELAY * 2)); \
193+ fi; \
194+ done; \
195+ if [ $i -eq $MAX_RETRIES ] && ! /app/api/.venv/bin/python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-small-en-v1.5')" 2>/dev/null; then \
196+ echo "ERROR: Failed to download models after $MAX_RETRIES attempts" ; \
197+ exit 1; \
198+ fi; \
182199 elif [ "$INCLUDE_LOCAL_MODELS" != "true" ]; then echo "Skipping ML model preload (local-models not included)" ; \
183200 else echo "Skipping ML model preload" ; fi
184201
@@ -277,16 +294,33 @@ ENV PATH="/app/api/.venv/bin:${PATH}"
277294
278295# Pre-download ML models to avoid runtime download (conditional)
279296# Only runs if both PRELOAD_ML_MODELS=true AND INCLUDE_LOCAL_MODELS=true
297+ # Includes retry logic with exponential backoff for transient network failures
280298ARG PRELOAD_ML_MODELS
281299ARG INCLUDE_LOCAL_MODELS
300+ ENV HF_HUB_DOWNLOAD_TIMEOUT=600
282301RUN if [ "$PRELOAD_ML_MODELS" = "true" ] && [ "$INCLUDE_LOCAL_MODELS" = "true" ]; then \
283- /app/api/.venv/bin/python -c "\
302+ MAX_RETRIES=3; \
303+ RETRY_DELAY=10; \
304+ for i in $(seq 1 $MAX_RETRIES); do \
305+ echo "Attempt $i/$MAX_RETRIES: Downloading ML models..." ; \
306+ /app/api/.venv/bin/python -c "\
307+ import os; os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '600'; \
284308from sentence_transformers import SentenceTransformer, CrossEncoder; \
285309print('Downloading embedding model...'); \
286310SentenceTransformer('BAAI/bge-small-en-v1.5'); \
287311print('Downloading cross-encoder model...'); \
288312CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2'); \
289- print('Models cached successfully')" ; \
313+ print('Models cached successfully')" && break; \
314+ if [ $i -lt $MAX_RETRIES ]; then \
315+ echo "Attempt $i failed, retrying in ${RETRY_DELAY}s..." ; \
316+ sleep $RETRY_DELAY; \
317+ RETRY_DELAY=$((RETRY_DELAY * 2)); \
318+ fi; \
319+ done; \
320+ if [ $i -eq $MAX_RETRIES ] && ! /app/api/.venv/bin/python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-small-en-v1.5')" 2>/dev/null; then \
321+ echo "ERROR: Failed to download models after $MAX_RETRIES attempts" ; \
322+ exit 1; \
323+ fi; \
290324 elif [ "$INCLUDE_LOCAL_MODELS" != "true" ]; then echo "Skipping ML model preload (local-models not included)" ; \
291325 else echo "Skipping ML model preload" ; fi
292326
0 commit comments