From d68d8abdb95b5c7135e0f320659c8f03cbde5974 Mon Sep 17 00:00:00 2001
From: JaredforReal <w13431838023@gmail.com>
Date: Wed, 24 Sep 2025 13:05:41 +0800
Subject: [PATCH 1/3] docs: network tips for CN Mainlain

Signed-off-by: JaredforReal <w13431838023@gmail.com>
---
 .../docs/getting-started/cn-network-tips.md   | 188 ++++++++++++++++++
 website/sidebars.js                           |   1 +
 2 files changed, 189 insertions(+)
 create mode 100644 website/docs/getting-started/cn-network-tips.md

diff --git a/website/docs/getting-started/cn-network-tips.md b/website/docs/getting-started/cn-network-tips.md
new file mode 100644
index 00000000..891afa44
--- /dev/null
+++ b/website/docs/getting-started/cn-network-tips.md
@@ -0,0 +1,188 @@
+---
+title: China Mainland Network Tips
+sidebar_label: China Network Tips
+---
+
+This guide shows exactly how to build and run in Mainland China without modifying repo files. You’ll use small local override files and a compose override so the codebase stays clean.
+
+What you’ll solve:
+
+- Hugging Face model downloads blocked/slow
+- Go modules fetching blocked during Docker build
+- PyPI access for the mock-vLLM test image
+
+## TL;DR: Choose your path
+
+- Fastest and most reliable: use local models in `./models` and skip HF network entirely.
+- Otherwise: mount an HF cache + set mirror env vars via a compose override.
+- For building: use a CN-override Dockerfile to set Go mirrors.
+- For mock-vllm: use a CN-override Dockerfile to set pip mirror.
+
+You can mix these based on your situation.
+
+## 1. Hugging Face models
+
+The router will download embedding models on first run unless you provide them locally. Prefer Option A if possible.
+
+### Option A — Use local models (no external network)
+
+1) Download the required model(s) with any reachable method (VPN/offline) into the repo’s `./models` folder. Example layout:
+
+   - `models/all-MiniLM-L12-v2/`
+   - `models/category_classifier_modernbert-base_model`
+
+2) In `config/config.yaml`, point to the local path. Example:
+
+   ```yaml
+   bert_model:
+     # point to a local folder under /app/models (already mounted by compose)
+     model_id: /app/models/all-MiniLM-L12-v2
+   ```
+
+3) No extra env is required. `docker-compose.yml` already mounts `./models:/app/models:ro`.
+
+### Option B — Use HF cache + mirror
+
+Create a compose override to persist cache and use a China mirror. Save as `docker-compose.override.yml` in the repo root:
+
+```yaml
+services:
+  semantic-router:
+    volumes:
+      - ~/.cache/huggingface:/root/.cache/huggingface
+    environment:
+      - HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface
+      - HF_HUB_ENABLE_HF_TRANSFER=1
+      - HF_ENDPOINT=https://hf-mirror.com
+```
+
+Optional: pre-warm cache on the host (only if you have `huggingface_hub` installed):
+
+```bash
+python -m pip install -U huggingface_hub
+python - <<'PY'
+from huggingface_hub import snapshot_download
+snapshot_download(repo_id="sentence-transformers/all-MiniLM-L6-v2", local_dir="~/.cache/huggingface/hub/models--sentence-transformers--all-MiniLM-L6-v2")
+PY
+```
+
+## 2. Build with Go mirrors (Dockerfile override)
+
+When building `Dockerfile.extproc`, the Go stage may hang on `proxy.golang.org`. Create a China override Dockerfile that enables mirrors without touching the original.
+
+1) Create `Dockerfile.extproc.cn` at repo root with this content:
+
+```Dockerfile
+# syntax=docker/dockerfile:1
+
+FROM rust:1.85 AS rust-builder
+RUN apt-get update && apt-get install -y make build-essential pkg-config && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY tools/make/ tools/make/
+COPY Makefile ./
+COPY candle-binding/Cargo.toml candle-binding/
+COPY candle-binding/src/ candle-binding/src/
+RUN make rust
+
+FROM golang:1.24 AS go-builder
+WORKDIR /app
+
+# China-friendly Go mirrors
+ENV GOPROXY=https://goproxy.cn,direct
+ENV GOSUMDB=sum.golang.google.cn
+
+RUN mkdir -p src/semantic-router
+COPY src/semantic-router/go.mod src/semantic-router/go.sum src/semantic-router/
+COPY candle-binding/go.mod candle-binding/semantic-router.go candle-binding/
+
+# Pre-download modules to fail fast if mirrors are unreachable
+RUN cd src/semantic-router && go mod download && \
+    cd /app/candle-binding && go mod download
+
+COPY src/semantic-router/ src/semantic-router/
+COPY --from=rust-builder /app/candle-binding/target/release/libcandle_semantic_router.so /app/candle-binding/target/release/
+
+ENV CGO_ENABLED=1
+ENV LD_LIBRARY_PATH=/app/candle-binding/target/release
+RUN mkdir -p bin && cd src/semantic-router && go build -o ../../bin/router cmd/main.go
+
+FROM quay.io/centos/centos:stream9
+WORKDIR /app
+COPY --from=go-builder /app/bin/router /app/extproc-server
+COPY --from=go-builder /app/candle-binding/target/release/libcandle_semantic_router.so /app/lib/
+COPY config/config.yaml /app/config/
+ENV LD_LIBRARY_PATH=/app/lib
+EXPOSE 50051
+COPY scripts/entrypoint.sh /app/entrypoint.sh
+RUN chmod +x /app/entrypoint.sh
+ENTRYPOINT ["/app/entrypoint.sh"]
+```
+
+2) Point compose to the CN Dockerfile by extending `docker-compose.override.yml`:
+
+```yaml
+services:
+  semantic-router:
+    build:
+      dockerfile: Dockerfile.extproc.cn
+```
+
+## 3. Mock vLLM (PyPI mirror via Dockerfile override)
+
+For the optional testing profile, create a CN Dockerfile to configure pip mirrors.
+
+1) Create `tools/mock-vllm/Dockerfile.cn`:
+
+```Dockerfile
+FROM python:3.11-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
+
+# Pip mirror (TUNA)
+RUN python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
+    python -m pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn
+
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY app.py /app/app.py
+EXPOSE 8000
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
+```
+
+2) Extend `docker-compose.override.yml` to use the CN Dockerfile for `mock-vllm`:
+
+```yaml
+services:
+  mock-vllm:
+    build:
+      dockerfile: Dockerfile.cn
+```
+
+## 4. Build and run
+
+With the overrides in place, build and run normally (Compose will auto-merge):
+
+```bash
+# Build all images with overrides
+docker compose -f docker-compose.yml -f docker-compose.override.yml build
+
+# Run router + envoy
+docker compose -f docker-compose.yml -f docker-compose.override.yml up -d
+
+# If you need the testing profile (mock-vllm)
+docker compose -f docker-compose.yml -f docker-compose.override.yml --profile testing up -d
+```
+
+## 5. Troubleshooting
+
+- Go modules still time out:
+  - Verify `GOPROXY` and `GOSUMDB` are present in the go-builder stage logs.
+  - Try a clean build: `docker compose build --no-cache`.
+
+- HF models still download slowly:
+  - Prefer Option A (local models).
+  - Ensure the cache volume is mounted and `HF_ENDPOINT`/`HF_HUB_ENABLE_HF_TRANSFER` are set.
+
+- PyPI slow for mock-vllm:
+  - Confirm the CN Dockerfile is being used for that service.
diff --git a/website/sidebars.js b/website/sidebars.js
index ff075eeb..df6c6673 100644
--- a/website/sidebars.js
+++ b/website/sidebars.js
@@ -51,6 +51,7 @@ const sidebars = {
         'getting-started/docker-quickstart',
         'getting-started/reasoning',
         'getting-started/configuration',
+        'getting-started/cn-network-tips',
       ],
     },
     {

From 0056c9c7870f1fe769c34fd761dbb3824c65aacc Mon Sep 17 00:00:00 2001
From: JaredforReal <w13431838023@gmail.com>
Date: Wed, 24 Sep 2025 21:55:53 +0800
Subject: [PATCH 2/3] using CN just for example

Signed-off-by: JaredforReal <w13431838023@gmail.com>
---
 docker-compose.yml                            | 29 +++++++++++++++++++
 .../network-tips.md}                          | 26 ++++++++---------
 website/sidebars.js                           |  8 ++++-
 3 files changed, 49 insertions(+), 14 deletions(-)
 rename website/docs/{getting-started/cn-network-tips.md => troubleshooting/network-tips.md} (82%)

diff --git a/docker-compose.yml b/docker-compose.yml
index afc7e7e1..d9084ff3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -63,6 +63,35 @@ services:
       retries: 5
       start_period: 5s
 
+  # #  Monitoring Stack: Prometheus + Grafana
+  # prometheus:
+  #   image: prom/prometheus:v2.53.0
+  #   container_name: prometheus
+  #   volumes:
+  #     - ./config/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
+  #   command:
+  #     - --config.file=/etc/prometheus/prometheus.yaml
+  #     - --storage.tsdb.retention.time=15d
+  #   ports:
+  #     - "9090:9090"
+  #   networks:
+  #     - semantic-network
+
+  # grafana:
+  #   image: grafana/grafana:11.5.1
+  #   container_name: grafana
+  #   environment:
+  #     - GF_SECURITY_ADMIN_USER=admin
+  #     - GF_SECURITY_ADMIN_PASSWORD=admin
+  #   ports:
+  #     - "3000:3000"
+  #   volumes:
+  #     - ./config/grafana/datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
+  #     - ./config/grafana/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro
+  #     - ./deploy/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro
+  #   networks:
+  #     - semantic-network
+
 networks:
   semantic-network:
     driver: bridge
diff --git a/website/docs/getting-started/cn-network-tips.md b/website/docs/troubleshooting/network-tips.md
similarity index 82%
rename from website/docs/getting-started/cn-network-tips.md
rename to website/docs/troubleshooting/network-tips.md
index 891afa44..88610311 100644
--- a/website/docs/getting-started/cn-network-tips.md
+++ b/website/docs/troubleshooting/network-tips.md
@@ -1,9 +1,9 @@
 ---
-title: China Mainland Network Tips
-sidebar_label: China Network Tips
+title: Network Tips
+sidebar_label: Network Tips
 ---
 
-This guide shows exactly how to build and run in Mainland China without modifying repo files. You’ll use small local override files and a compose override so the codebase stays clean.
+This guide shows how to build and run in restricted or slow network environments without modifying repo files. You’ll use small local override files and a compose override so the codebase stays clean.
 
 What you’ll solve:
 
@@ -15,8 +15,8 @@ What you’ll solve:
 
 - Fastest and most reliable: use local models in `./models` and skip HF network entirely.
 - Otherwise: mount an HF cache + set mirror env vars via a compose override.
-- For building: use a CN-override Dockerfile to set Go mirrors.
-- For mock-vllm: use a CN-override Dockerfile to set pip mirror.
+- For building: use an override Dockerfile to set Go mirrors (examples provided).
+- For mock-vllm: use an override Dockerfile to set pip mirror (examples provided).
 
 You can mix these based on your situation.
 
@@ -43,7 +43,7 @@ The router will download embedding models on first run unless you provide them l
 
 ### Option B — Use HF cache + mirror
 
-Create a compose override to persist cache and use a China mirror. Save as `docker-compose.override.yml` in the repo root:
+Create a compose override to persist cache and use a regional mirror (example below uses a China mirror). Save as `docker-compose.override.yml` in the repo root:
 
 ```yaml
 services:
@@ -53,7 +53,7 @@ services:
     environment:
       - HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface
       - HF_HUB_ENABLE_HF_TRANSFER=1
-      - HF_ENDPOINT=https://hf-mirror.com
+      - HF_ENDPOINT=https://hf-mirror.com  # example mirror endpoint (China)
 ```
 
 Optional: pre-warm cache on the host (only if you have `huggingface_hub` installed):
@@ -68,7 +68,7 @@ PY
 
 ## 2. Build with Go mirrors (Dockerfile override)
 
-When building `Dockerfile.extproc`, the Go stage may hang on `proxy.golang.org`. Create a China override Dockerfile that enables mirrors without touching the original.
+When building `Dockerfile.extproc`, the Go stage may hang on `proxy.golang.org`. Create an override Dockerfile that enables mirrors without touching the original.
 
 1) Create `Dockerfile.extproc.cn` at repo root with this content:
 
@@ -87,7 +87,7 @@ RUN make rust
 FROM golang:1.24 AS go-builder
 WORKDIR /app
 
-# China-friendly Go mirrors
+# Go module mirrors (example: goproxy.cn)
 ENV GOPROXY=https://goproxy.cn,direct
 ENV GOSUMDB=sum.golang.google.cn
 
@@ -118,7 +118,7 @@ RUN chmod +x /app/entrypoint.sh
 ENTRYPOINT ["/app/entrypoint.sh"]
 ```
 
-2) Point compose to the CN Dockerfile by extending `docker-compose.override.yml`:
+2) Point compose to the override Dockerfile by extending `docker-compose.override.yml`:
 
 ```yaml
 services:
@@ -129,7 +129,7 @@ services:
 
 ## 3. Mock vLLM (PyPI mirror via Dockerfile override)
 
-For the optional testing profile, create a CN Dockerfile to configure pip mirrors.
+For the optional testing profile, create an override Dockerfile to configure pip mirrors.
 
 1) Create `tools/mock-vllm/Dockerfile.cn`:
 
@@ -138,7 +138,7 @@ FROM python:3.11-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
 
-# Pip mirror (TUNA)
+# Pip mirror (example: TUNA mirror in China)
 RUN python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
     python -m pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn
 
@@ -150,7 +150,7 @@ EXPOSE 8000
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
 ```
 
-2) Extend `docker-compose.override.yml` to use the CN Dockerfile for `mock-vllm`:
+2) Extend `docker-compose.override.yml` to use the override Dockerfile for `mock-vllm`:
 
 ```yaml
 services:
diff --git a/website/sidebars.js b/website/sidebars.js
index df6c6673..f9f1a376 100644
--- a/website/sidebars.js
+++ b/website/sidebars.js
@@ -51,7 +51,6 @@ const sidebars = {
         'getting-started/docker-quickstart',
         'getting-started/reasoning',
         'getting-started/configuration',
-        'getting-started/cn-network-tips',
       ],
     },
     {
@@ -70,6 +69,13 @@ const sidebars = {
         'api/classification',
       ],
     },
+    {
+      type: 'category',
+      label: 'Troubleshooting',
+      items: [
+        'troubleshooting/network-tips',
+      ],
+    },
   ],
 }
 

From 2c85feaaf175888ee74c5fe7ea7e9d3a7bde0d4a Mon Sep 17 00:00:00 2001
From: JaredforReal <w13431838023@gmail.com>
Date: Wed, 24 Sep 2025 23:12:37 +0800
Subject: [PATCH 3/3] get rid of grafana and prometheus docker compose

Signed-off-by: JaredforReal <w13431838023@gmail.com>
---
 docker-compose.yml | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index d9084ff3..afc7e7e1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -63,35 +63,6 @@ services:
       retries: 5
       start_period: 5s
 
-  # #  Monitoring Stack: Prometheus + Grafana
-  # prometheus:
-  #   image: prom/prometheus:v2.53.0
-  #   container_name: prometheus
-  #   volumes:
-  #     - ./config/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
-  #   command:
-  #     - --config.file=/etc/prometheus/prometheus.yaml
-  #     - --storage.tsdb.retention.time=15d
-  #   ports:
-  #     - "9090:9090"
-  #   networks:
-  #     - semantic-network
-
-  # grafana:
-  #   image: grafana/grafana:11.5.1
-  #   container_name: grafana
-  #   environment:
-  #     - GF_SECURITY_ADMIN_USER=admin
-  #     - GF_SECURITY_ADMIN_PASSWORD=admin
-  #   ports:
-  #     - "3000:3000"
-  #   volumes:
-  #     - ./config/grafana/datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
-  #     - ./config/grafana/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro
-  #     - ./deploy/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro
-  #   networks:
-  #     - semantic-network
-
 networks:
   semantic-network:
     driver: bridge