1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
+
4
+ import threading
5
+ import time
6
+ from http import HTTPStatus
7
+
8
+ import pytest
9
+ import requests
10
+
11
+ from tests .utils import RemoteOpenAIServer
12
+
13
+ MODEL_NAME = "Qwen/Qwen3-0.6B"
14
+
15
+
16
+ @pytest .fixture (scope = "class" )
17
+ def server ():
18
+ args = [
19
+ "--enforce-eager" , "--max-model-len" , "100" ,
20
+ "--gpu-memory-utilization" , "0.8"
21
+ ]
22
+
23
+ with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
24
+ yield remote_server
25
+
26
+
27
+ class TestHealth :
28
+
29
+ def test_health_basic (self , server : RemoteOpenAIServer ):
30
+ """Test basic health check endpoint."""
31
+ response = requests .get (server .url_for ("health" ))
32
+ assert response .status_code == HTTPStatus .OK
33
+
34
+ def test_health_with_generate (self , server : RemoteOpenAIServer ):
35
+ """Test health check with generate parameter."""
36
+ response = requests .get (server .url_for ("health" ),
37
+ params = {"generate" : "true" })
38
+ assert response .status_code == HTTPStatus .OK
39
+
40
+ def test_health_with_running_query (self , server : RemoteOpenAIServer ):
41
+ generation_errors : list [Exception ] = []
42
+ start_event = threading .Event ()
43
+ done_event = threading .Event ()
44
+
45
+ def _run_generate () -> None :
46
+ try :
47
+ client = server .get_client ()
48
+ start_event .set ()
49
+ client .completions .create (
50
+ model = MODEL_NAME ,
51
+ prompt = "Ping health endpoint" ,
52
+ max_tokens = 50 ,
53
+ temperature = 0.0 ,
54
+ )
55
+ except Exception as e :
56
+ generation_errors .append (e )
57
+ finally :
58
+ done_event .set ()
59
+
60
+ generate_thread = threading .Thread (target = _run_generate , daemon = True )
61
+ generate_thread .start ()
62
+
63
+ time .sleep (1 ) # Ensure the generation has started
64
+ response = requests .get (server .url_for ("health" ),
65
+ params = {"generate" : "true" })
66
+ assert response .status_code == HTTPStatus .OK
67
+
68
+ assert start_event .wait (
69
+ timeout = 10 ), "Generation thread failed to start"
70
+ assert done_event .wait (timeout = 300 ), "Generation thread did not finish"
71
+ generate_thread .join (timeout = 0 )
72
+ if generation_errors :
73
+ raise generation_errors [0 ]
0 commit comments