From e6de94398a5e38a717dd216b6cd4ec135a4c9d1d Mon Sep 17 00:00:00 2001
From: Ming Yang <minos.future@gmail.com>
Date: Tue, 9 Sep 2025 00:00:53 -0700
Subject: [PATCH 1/5] Add DCP to CI

Signed-off-by: Ming Yang <minos.future@gmail.com>
---
 .buildkite/test-pipeline.yaml              | 17 ++++++++++++++++-
 tests/distributed/test_context_parallel.py | 11 +++++++----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index b0d4c4456d33..b2c2eb8de7e9 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -851,7 +851,6 @@ steps:
   commands:
   - pytest -v -s distributed/test_pp_cudagraph.py
   - pytest -v -s distributed/test_pipeline_parallel.py
-  # - pytest -v -s distributed/test_context_parallel.py # TODO: enable it on Hopper runners or add triton MLA support
 
 - label: LoRA TP Test (Distributed) # 17 min
   timeout_in_minutes: 30
@@ -925,9 +924,25 @@ steps:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
 
+##### H200 test #####
 - label: Qwen MoE EP Test # optional
   gpu: h200
   optional: true
   num_gpus: 2
   commands:
     - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 /vllm-workspace/examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048
+
+
+- label: Hopper Decode Context Parallelism Test # optional
+  gpu: h200
+  optional: true
+  num_gpus: 2
+  commands:
+    - pytest -v -s tests/distributed/test_context_parallel.py
+
+- label: Blackwell Decode Context Parallelism Test # optional
+  gpu: b200
+  optional: true
+  num_gpus: 2
+  commands:
+    - pytest -v -s tests/distributed/test_context_parallel.py
diff --git a/tests/distributed/test_context_parallel.py b/tests/distributed/test_context_parallel.py
index 23be703a3068..11685bc90c41 100644
--- a/tests/distributed/test_context_parallel.py
+++ b/tests/distributed/test_context_parallel.py
@@ -71,12 +71,13 @@ def detailed(
         parallel_setups = []
         for eager_mode_val in [False]:
             for pp_multiplier in [1]:
-                for dcp_multiplier in [2, 4]:
+                for dcp_multiplier in [0.5, 1]:
                     for chunked_prefill_val in [True]:
                         parallel_setups.append(
                             ParallelSetup(tp_size=tp_base,
                                           pp_size=pp_multiplier * pp_base,
-                                          dcp_size=dcp_multiplier * dcp_base,
+                                          dcp_size=int(dcp_multiplier *
+                                                       tp_base),
                                           eager_mode=eager_mode_val,
                                           chunked_prefill=chunked_prefill_val))
         return CPTestSettings(
@@ -223,7 +224,9 @@ def _compare_cp_with_tp(
 
 CP_TEXT_GENERATION_MODELS = {
     # [MLA attention only]
-    "deepseek-ai/DeepSeek-V2-Lite-Chat": CPTestSettings.detailed(),
+    "deepseek-ai/DeepSeek-V2-Lite-Chat":
+    [CPTestSettings.detailed(),
+     CPTestSettings.detailed(tp_base=2)],
 }
 
 CP_TEST_MODELS = [
@@ -238,7 +241,7 @@ def _compare_cp_with_tp(
      "runner", "test_options"),
     [
         params for model_id, settings in CP_TEXT_GENERATION_MODELS.items()
-        for params in settings.iter_params(model_id)
+        for setting in settings for params in setting.iter_params(model_id)
         if model_id in CP_TEST_MODELS
     ],
 )

From e1a4326750b439f35d5fdd8f7007733d489bad69 Mon Sep 17 00:00:00 2001
From: Ming Yang <minos.future@gmail.com>
Date: Tue, 9 Sep 2025 00:03:48 -0700
Subject: [PATCH 2/5] add comment

Signed-off-by: Ming Yang <minos.future@gmail.com>
---
 .buildkite/test-pipeline.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index b2c2eb8de7e9..c2e9b1e55f45 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -940,6 +940,7 @@ steps:
   commands:
     - pytest -v -s tests/distributed/test_context_parallel.py
 
+##### B200 test #####
 - label: Blackwell Decode Context Parallelism Test # optional
   gpu: b200
   optional: true

From 07a0b956fa1bc8ddf66e0271c579e621f188aeff Mon Sep 17 00:00:00 2001
From: Ming Yang <minos.future@gmail.com>
Date: Tue, 9 Sep 2025 11:11:11 -0700
Subject: [PATCH 3/5] address comment: combine CI tests

Signed-off-by: Ming Yang <minos.future@gmail.com>
---
 .buildkite/test-pipeline.yaml | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index c2e9b1e55f45..8b23c3481a37 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -925,23 +925,16 @@ steps:
   - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
 
 ##### H200 test #####
-- label: Qwen MoE EP Test # optional
+- label: Distrubted Tests (H200) # optional
   gpu: h200
   optional: true
   num_gpus: 2
   commands:
     - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 /vllm-workspace/examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048
-
-
-- label: Hopper Decode Context Parallelism Test # optional
-  gpu: h200
-  optional: true
-  num_gpus: 2
-  commands:
     - pytest -v -s tests/distributed/test_context_parallel.py
 
 ##### B200 test #####
-- label: Blackwell Decode Context Parallelism Test # optional
+- label: Distributed Tests (B200) # optional
   gpu: b200
   optional: true
   num_gpus: 2

From 0d62d75202254c09e0408707d2979ee848d0738a Mon Sep 17 00:00:00 2001
From: Ming Yang <minos.future@gmail.com>
Date: Fri, 12 Sep 2025 20:19:41 -0700
Subject: [PATCH 4/5] add working_dir; fix relative path

Signed-off-by: Ming Yang <minos.future@gmail.com>
---
 .buildkite/test-pipeline.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index f3f776973a03..32ad01aa150d 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -975,15 +975,17 @@ steps:
 - label: Distrubted Tests (H200) # optional
   gpu: h200
   optional: true
+  working_dir: "/vllm-workspace/"
   num_gpus: 2
   commands:
-    - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 /vllm-workspace/examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048
+    - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048
     - pytest -v -s tests/distributed/test_context_parallel.py
 
 ##### B200 test #####
 - label: Distributed Tests (B200) # optional
   gpu: b200
   optional: true
+  working_dir: "/vllm-workspace/"
   num_gpus: 2
   commands:
     - pytest -v -s tests/distributed/test_context_parallel.py

From 8375e797c21c29016b018a09ea713a6b5495dd29 Mon Sep 17 00:00:00 2001
From: Ming Yang <minos.future@gmail.com>
Date: Fri, 12 Sep 2025 23:35:17 -0700
Subject: [PATCH 5/5] run dcp test before moe (for testing purpose)

Signed-off-by: Ming Yang <minos.future@gmail.com>
---
 .buildkite/test-pipeline.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 32ad01aa150d..0d944e911be1 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -978,8 +978,8 @@ steps:
   working_dir: "/vllm-workspace/"
   num_gpus: 2
   commands:
-    - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048
     - pytest -v -s tests/distributed/test_context_parallel.py
+    - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048
 
 ##### B200 test #####
 - label: Distributed Tests (B200) # optional