Skip to content

Commit 3e75069

Browse files
authored
[DOC] Add reasoning capability to vLLM streamlit code (#19557)
1 parent ee35e96 commit 3e75069

File tree

1 file changed

+165
-43
lines changed

1 file changed

+165
-43
lines changed

examples/online_serving/streamlit_openai_chatbot_webserver.py

Lines changed: 165 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Streaming response display
1212
- Configurable API endpoint
1313
- Real-time chat history
14+
- Reasoning Display: Optional thinking process visualization
1415
1516
Requirements:
1617
pip install streamlit openai
@@ -51,13 +52,33 @@
5152
if "active_session" not in st.session_state:
5253
st.session_state.active_session = None
5354

55+
# Add new session state for reasoning
56+
if "show_reasoning" not in st.session_state:
57+
st.session_state.show_reasoning = {}
58+
5459
# Initialize session state for API base URL
5560
if "api_base_url" not in st.session_state:
5661
st.session_state.api_base_url = openai_api_base
5762

5863

5964
def create_new_chat_session():
60-
"""Create a new chat session with timestamp as ID"""
65+
"""Create a new chat session with timestamp as unique identifier.
66+
67+
This function initializes a new chat session by:
68+
1. Generating a timestamp-based session ID
69+
2. Creating an empty message list for the new session
70+
3. Setting the new session as both current and active session
71+
4. Resetting the messages list for the new session
72+
73+
Returns:
74+
None
75+
76+
Session State Updates:
77+
- sessions: Adds new empty message list with timestamp key
78+
- current_session: Sets to new session ID
79+
- active_session: Sets to new session ID
80+
- messages: Resets to empty list
81+
"""
6182
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
6283
st.session_state.sessions[session_id] = []
6384
st.session_state.current_session = session_id
@@ -66,30 +87,98 @@ def create_new_chat_session():
6687

6788

6889
def switch_to_chat_session(session_id):
69-
"""Switch to a different chat session"""
90+
"""Switch the active chat context to a different session.
91+
92+
Args:
93+
session_id (str): The timestamp ID of the session to switch to
94+
95+
This function handles chat session switching by:
96+
1. Setting the specified session as current
97+
2. Updating the active session marker
98+
3. Loading the messages history from the specified session
99+
100+
Session State Updates:
101+
- current_session: Updated to specified session_id
102+
- active_session: Updated to specified session_id
103+
- messages: Loaded from sessions[session_id]
104+
"""
70105
st.session_state.current_session = session_id
71106
st.session_state.active_session = session_id
72107
st.session_state.messages = st.session_state.sessions[session_id]
73108

74109

75-
def get_llm_response(messages, model):
76-
"""Get streaming response from llm
110+
def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None):
111+
"""Generate and stream LLM response with optional reasoning process.
77112
78113
Args:
79-
messages: List of message dictionaries
80-
model: Name of model
114+
messages (list): List of conversation message dicts with 'role' and 'content'
115+
model (str): The model identifier to use for generation
116+
reason (bool): Whether to enable and display reasoning process
117+
content_ph (streamlit.empty): Placeholder for streaming response content
118+
reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process
81119
82120
Returns:
83-
Streaming response object or error message string
121+
tuple: (str, str)
122+
- First string contains the complete response text
123+
- Second string contains the complete reasoning text (if enabled)
124+
125+
Features:
126+
- Streams both reasoning and response text in real-time
127+
- Handles model API errors gracefully
128+
- Supports live updating of thinking process
129+
- Maintains separate content and reasoning displays
130+
131+
Raises:
132+
Exception: Wrapped in error message if API call fails
133+
134+
Note:
135+
The function uses streamlit placeholders for live updates.
136+
When reason=True, the reasoning process appears above the response.
84137
"""
138+
full_text = ""
139+
think_text = ""
140+
live_think = None
141+
# Build request parameters
142+
params = {"model": model, "messages": messages, "stream": True}
143+
if reason:
144+
params["extra_body"] = {"chat_template_kwargs": {"enable_thinking": True}}
145+
85146
try:
86-
response = client.chat.completions.create(
87-
model=model, messages=messages, stream=True
88-
)
89-
return response
147+
response = client.chat.completions.create(**params)
148+
if isinstance(response, str):
149+
if content_ph:
150+
content_ph.markdown(response)
151+
return response, ""
152+
153+
# Prepare reasoning expander above content
154+
if reason and reasoning_ph:
155+
exp = reasoning_ph.expander("💭 Thinking Process (live)", expanded=True)
156+
live_think = exp.empty()
157+
158+
# Stream chunks
159+
for chunk in response:
160+
delta = chunk.choices[0].delta
161+
# Stream reasoning first
162+
if reason and hasattr(delta, "reasoning_content") and live_think:
163+
rc = delta.reasoning_content
164+
if rc:
165+
think_text += rc
166+
live_think.markdown(think_text + "▌")
167+
# Then stream content
168+
if hasattr(delta, "content") and delta.content and content_ph:
169+
full_text += delta.content
170+
content_ph.markdown(full_text + "▌")
171+
172+
# Finalize displays: reasoning remains above, content below
173+
if reason and live_think:
174+
live_think.markdown(think_text)
175+
if content_ph:
176+
content_ph.markdown(full_text)
177+
178+
return full_text, think_text
90179
except Exception as e:
91180
st.error(f"Error details: {str(e)}")
92-
return f"Error: {str(e)}"
181+
return f"Error: {str(e)}", ""
93182

94183

95184
# Sidebar - API Settings first
@@ -108,6 +197,7 @@ def get_llm_response(messages, model):
108197
if st.sidebar.button("New Session"):
109198
create_new_chat_session()
110199

200+
111201
# Display all sessions in reverse chronological order
112202
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
113203
# Mark the active session with a pinned button
@@ -143,47 +233,79 @@ def get_llm_response(messages, model):
143233
create_new_chat_session()
144234
st.session_state.active_session = st.session_state.current_session
145235

146-
# Display chat history for current session
147-
for message in st.session_state.messages:
148-
with st.chat_message(message["role"]):
149-
st.write(message["content"])
236+
# Update the chat history display section
237+
for idx, msg in enumerate(st.session_state.messages):
238+
# Render user messages normally
239+
if msg["role"] == "user":
240+
with st.chat_message("user"):
241+
st.write(msg["content"])
242+
# Render assistant messages with reasoning above
243+
else:
244+
# If reasoning exists for this assistant message, show it above the content
245+
if idx in st.session_state.show_reasoning:
246+
with st.expander("💭 Thinking Process", expanded=False):
247+
st.markdown(st.session_state.show_reasoning[idx])
248+
with st.chat_message("assistant"):
249+
st.write(msg["content"])
250+
251+
252+
# Setup & Cache reasoning support check
253+
@st.cache_data(show_spinner=False)
254+
def server_supports_reasoning():
255+
"""Check if the current model supports reasoning capability.
256+
257+
Returns:
258+
bool: True if the model supports reasoning, False otherwise
259+
"""
260+
resp = client.chat.completions.create(
261+
model=model,
262+
messages=[{"role": "user", "content": "Hi"}],
263+
stream=False,
264+
)
265+
return hasattr(resp.choices[0].message, "reasoning_content") and bool(
266+
resp.choices[0].message.reasoning_content
267+
)
150268

151-
# Handle user input and generate llm response
269+
270+
# Check support
271+
supports_reasoning = server_supports_reasoning()
272+
273+
# Add reasoning toggle in sidebar if supported
274+
reason = False # Default to False
275+
if supports_reasoning:
276+
reason = st.sidebar.checkbox("Enable Reasoning", value=False)
277+
else:
278+
st.sidebar.markdown(
279+
"<span style='color:gray;'>Reasoning unavailable for this model.</span>",
280+
unsafe_allow_html=True,
281+
)
282+
# reason remains False
283+
284+
# Update the input handling section
152285
if prompt := st.chat_input("Type your message here..."):
153-
# Save user message to session
286+
# Save and display user message
154287
st.session_state.messages.append({"role": "user", "content": prompt})
155288
st.session_state.sessions[st.session_state.current_session] = (
156289
st.session_state.messages
157290
)
158-
159-
# Display user message
160291
with st.chat_message("user"):
161292
st.write(prompt)
162293

163-
# Prepare messages for llm
164-
messages_for_llm = [
294+
# Prepare LLM messages
295+
msgs = [
165296
{"role": m["role"], "content": m["content"]} for m in st.session_state.messages
166297
]
167298

168-
# Generate and display llm response
299+
# Stream assistant response
169300
with st.chat_message("assistant"):
170-
message_placeholder = st.empty()
171-
full_response = ""
172-
173-
# Get streaming response from llm
174-
response = get_llm_response(messages_for_llm, model)
175-
if isinstance(response, str):
176-
message_placeholder.markdown(response)
177-
full_response = response
178-
else:
179-
for chunk in response:
180-
if hasattr(chunk.choices[0].delta, "content"):
181-
content = chunk.choices[0].delta.content
182-
if content:
183-
full_response += content
184-
message_placeholder.markdown(full_response + "▌")
185-
186-
message_placeholder.markdown(full_response)
187-
188-
# Save llm response to session history
189-
st.session_state.messages.append({"role": "assistant", "content": full_response})
301+
# Placeholders: reasoning above, content below
302+
reason_ph = st.empty()
303+
content_ph = st.empty()
304+
full, think = get_llm_response(msgs, model, reason, content_ph, reason_ph)
305+
# Determine index for this new assistant message
306+
message_index = len(st.session_state.messages)
307+
# Save assistant reply
308+
st.session_state.messages.append({"role": "assistant", "content": full})
309+
# Persist reasoning in session state if any
310+
if reason and think:
311+
st.session_state.show_reasoning[message_index] = think

0 commit comments

Comments
 (0)