Skip to content

Commit 26258af

Browse files
committed
add test_health to test health api
Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
1 parent e3e2159 commit 26258af

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import threading
5+
import time
6+
from http import HTTPStatus
7+
8+
import pytest
9+
import requests
10+
11+
from tests.utils import RemoteOpenAIServer
12+
13+
MODEL_NAME = "Qwen/Qwen3-0.6B"
14+
15+
16+
@pytest.fixture(scope="class")
17+
def server():
18+
args = [
19+
"--enforce-eager", "--max-model-len", "100",
20+
"--gpu-memory-utilization", "0.8"
21+
]
22+
23+
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
24+
yield remote_server
25+
26+
27+
class TestHealth:
28+
29+
def test_health_basic(self, server: RemoteOpenAIServer):
30+
"""Test basic health check endpoint."""
31+
response = requests.get(server.url_for("health"))
32+
assert response.status_code == HTTPStatus.OK
33+
34+
def test_health_with_generate(self, server: RemoteOpenAIServer):
35+
"""Test health check with generate parameter."""
36+
response = requests.get(server.url_for("health"),
37+
params={"generate": "true"})
38+
assert response.status_code == HTTPStatus.OK
39+
40+
def test_health_with_running_query(self, server: RemoteOpenAIServer):
41+
generation_errors: list[Exception] = []
42+
start_event = threading.Event()
43+
done_event = threading.Event()
44+
45+
def _run_generate() -> None:
46+
try:
47+
client = server.get_client()
48+
start_event.set()
49+
client.completions.create(
50+
model=MODEL_NAME,
51+
prompt="Ping health endpoint",
52+
max_tokens=50,
53+
temperature=0.0,
54+
)
55+
except Exception as e:
56+
generation_errors.append(e)
57+
finally:
58+
done_event.set()
59+
60+
generate_thread = threading.Thread(target=_run_generate, daemon=True)
61+
generate_thread.start()
62+
63+
time.sleep(1) # Ensure the generation has started
64+
response = requests.get(server.url_for("health"),
65+
params={"generate": "true"})
66+
assert response.status_code == HTTPStatus.OK
67+
68+
assert start_event.wait(
69+
timeout=10), "Generation thread failed to start"
70+
assert done_event.wait(timeout=300), "Generation thread did not finish"
71+
generate_thread.join(timeout=0)
72+
if generation_errors:
73+
raise generation_errors[0]

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
import pydantic
2626
import regex as re
2727
import uvloop
28-
from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request
28+
from fastapi import (APIRouter, Depends, FastAPI, Form, HTTPException, Query,
29+
Request)
2930
from fastapi.exceptions import RequestValidationError
3031
from fastapi.middleware.cors import CORSMiddleware
3132
from fastapi.responses import JSONResponse, Response, StreamingResponse

0 commit comments

Comments
 (0)