fix: handle stream ends with no finish reasons (#9195)

ugur-vaadin · web-flow · commit b88af5845b33 · 2026-04-27T10:47:56.000Z
## Description In some cases, the response terminates without a `finish_reason`. One case it happens is a request going over the context limit, and the result is treated like an empty response rather than an error. This is reproducible using LM Studio with a local model and setting the context length less than 4k for a chart generation query. This PR reports any response without a `finish_reason` as an exception. This allows the developer to see the server logs for the details, and the user to see a generic error message that they can report. ## Type of change - [x] Bugfix - [ ] Feature ## Checklist - [x] I have read the contribution guide: https://vaadin.com/docs/latest/contributing/overview - [x] I have added a description following the guideline. - [ ] The issue is created in the corresponding repository and I have referenced it. - [x] I have added tests to ensure my change is effective and works as intended. - [x] New and existing tests are passing locally with my change. - [x] I have performed self-review and corrected misspellings.
diff --git a/vaadin-ai-components-flow-parent/vaadin-ai-components-flow/src/main/java/com/vaadin/flow/component/ai/provider/SpringAILLMProvider.java b/vaadin-ai-components-flow-parent/vaadin-ai-components-flow/src/main/java/com/vaadin/flow/component/ai/provider/SpringAILLMProvider.java
@@ -20,6 +20,7 @@
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -29,6 +30,7 @@
 import org.springframework.ai.chat.messages.AssistantMessage;
 import org.springframework.ai.chat.messages.UserMessage;
 import org.springframework.ai.chat.model.ChatModel;
+import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.content.Media;
 import org.springframework.ai.tool.ToolCallback;
 import org.springframework.ai.tool.annotation.Tool;
@@ -188,12 +190,58 @@ private static org.springframework.ai.chat.messages.Message toVendorMessage(
 
     private Flux<String> executeStreamingChat(LLMRequest request) {
         try {
-            return getPromptSpec(request).stream().content();
+            var chatResponses = getPromptSpec(request).stream().chatResponse();
+            return failOnMissingFinishReason(chatResponses)
+                    .map(SpringAILLMProvider::getAssistantText)
+                    .filter(text -> !text.isEmpty());
         } catch (Exception e) {
             return Flux.error(e);
         }
     }
 
+    /**
+     * Passes the stream through unchanged, raising an
+     * {@link IllegalStateException} on completion if no chunk carried a
+     * finish_reason.
+     * <p>
+     * A compliant OpenAI-style streaming response terminates with a chunk whose
+     * finish_reason is set; any termination reason that legitimately yields
+     * empty content (TOOL_CALLS, CONTENT_FILTER, STOP with no text, etc.) still
+     * provides one. A stream that completes without ever carrying a
+     * finish_reason therefore indicates abnormal termination, typically an
+     * error that was not surfaced by the transport.
+     */
+    private static Flux<ChatResponse> failOnMissingFinishReason(
+            Flux<ChatResponse> source) {
+        var finishReasonSeen = new AtomicBoolean(false);
+        return source.doOnNext(response -> {
+            if (!finishReasonSeen.get() && hasFinishReason(response)) {
+                finishReasonSeen.set(true);
+            }
+        }).concatWith(Flux.defer(() -> finishReasonSeen.get() ? Flux.empty()
+                : Flux.error(new IllegalStateException(
+                        "LLM stream ended without a finish reason, "
+                                + "indicating abnormal termination."))));
+    }
+
+    private static boolean hasFinishReason(ChatResponse response) {
+        var result = response.getResult();
+        if (result == null) {
+            return false;
+        }
+        var reason = result.getMetadata().getFinishReason();
+        return reason != null && !reason.isEmpty();
+    }
+
+    private static String getAssistantText(ChatResponse response) {
+        var result = response.getResult();
+        if (result == null) {
+            return "";
+        }
+        var text = result.getOutput().getText();
+        return text != null ? text : "";
+    }
+
     private ChatClient.ChatClientRequestSpec getPromptSpec(LLMRequest request) {
         var promptSpec = chatClient.prompt();
         promptSpec = promptSpec.user(userSpec -> {
diff --git a/vaadin-ai-components-flow-parent/vaadin-ai-components-flow/src/test/java/com/vaadin/flow/component/ai/provider/SpringAILLMProviderTest.java b/vaadin-ai-components-flow-parent/vaadin-ai-components-flow/src/test/java/com/vaadin/flow/component/ai/provider/SpringAILLMProviderTest.java
@@ -28,12 +28,15 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.RegisterExtension;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.NullAndEmptySource;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
 import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.ai.chat.messages.AssistantMessage;
 import org.springframework.ai.chat.messages.SystemMessage;
 import org.springframework.ai.chat.messages.UserMessage;
+import org.springframework.ai.chat.metadata.ChatGenerationMetadata;
 import org.springframework.ai.chat.model.ChatModel;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.model.Generation;
@@ -803,15 +806,192 @@ void setHistory_withEmptyAttachmentMap_behavesLikeTextOnly() {
         Assertions.assertTrue(userMsg.getMedia().isEmpty());
     }
 
+    // --- Streaming finish_reason / abnormal termination tests ---
+
+    @ParameterizedTest
+    @NullAndEmptySource
+    void stream_streamingWithMissingFinishReason_throwsIllegalStateException(
+            String reason) {
+        // OpenAI-compatible backends emit "" for an unset finish_reason;
+        // both "" and null must be treated as missing.
+        var request = createSimpleRequest("Hello");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(mockChatResponse("", reason)));
+
+        Assertions.assertThrows(IllegalStateException.class,
+                () -> provider.stream(request).collectList().block());
+    }
+
+    @Test
+    void stream_streamingCompletesEmptyWithNoChunks_throwsIllegalStateException() {
+        // Zero-chunk stream: doOnNext never fires; the concatWith tail raises.
+        var request = createSimpleRequest("Hello");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.empty());
+
+        Assertions.assertThrows(IllegalStateException.class,
+                () -> provider.stream(request).collectList().block());
+    }
+
+    @Test
+    void stream_streamingWithValidFinishReasonButEmptyContent_completesWithoutError() {
+        // Tool-only turns and content-filter stops produce empty text but
+        // always carry a finish_reason; not errors.
+        var request = createSimpleRequest("Hello");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(mockChatResponse("", "STOP")));
+
+        var results = provider.stream(request).collectList().block();
+
+        Assertions.assertNotNull(results);
+        Assertions.assertTrue(results.isEmpty());
+    }
+
+    @Test
+    void stream_streamingWithLengthFinishReason_emitsPartialContent() {
+        var request = createSimpleRequest("Hello");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(mockChatResponse("partial", "LENGTH")));
+
+        var results = provider.stream(request).collectList().block();
+
+        Assertions.assertEquals(List.of("partial"), results);
+    }
+
+    @Test
+    void stream_streamingWithFinishReasonOnlyOnLastChunk_completesNormally() {
+        // Real OpenAI streams set finish_reason only on the terminal chunk.
+        var request = createSimpleRequest("Hello");
+        var chunk1 = mockChatResponse("Hel", null);
+        var chunk2 = mockChatResponse("lo", null);
+        var terminal = mockChatResponse(" World", "STOP");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(chunk1, chunk2, terminal));
+
+        var results = provider.stream(request).collectList().block();
+
+        Assertions.assertEquals(List.of("Hel", "lo", " World"), results);
+    }
+
+    @Test
+    void stream_streamingWithNullGeneration_throwsIllegalStateException() {
+        // ChatResponse(emptyList()) yields getResult() == null and no
+        // finish_reason: indistinguishable from an abort.
+        var request = createSimpleRequest("Hello");
+        var responseWithNoResult = new ChatResponse(Collections.emptyList());
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(responseWithNoResult));
+
+        Assertions.assertThrows(IllegalStateException.class,
+                () -> provider.stream(request).collectList().block());
+    }
+
+    @Test
+    void stream_streamingWithNullGenerationButFollowedByFinish_completesNormally() {
+        // A null-result chunk is tolerated as long as another chunk signs
+        // the stream off with a finish_reason.
+        var request = createSimpleRequest("Hello");
+        var empty = new ChatResponse(Collections.emptyList());
+        var terminal = mockChatResponse("ok", "STOP");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(empty, terminal));
+
+        var results = provider.stream(request).collectList().block();
+
+        Assertions.assertEquals(List.of("ok"), results);
+    }
+
+    @Test
+    void stream_streamingWithNullTextInMessage_filtersOut() {
+        // AssistantMessage.getText() is @Nullable; null text is filtered
+        // rather than propagated as the empty string.
+        var request = createSimpleRequest("Hello");
+        var nullTextMessage = new AssistantMessage((String) null);
+        var response = new ChatResponse(
+                List.of(new Generation(nullTextMessage, ChatGenerationMetadata
+                        .builder().finishReason("STOP").build())));
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(response));
+
+        var results = provider.stream(request).collectList().block();
+
+        Assertions.assertNotNull(results);
+        Assertions.assertTrue(results.isEmpty());
+    }
+
+    @Test
+    void stream_streamingWithMultipleChunksAndMixedEmptyContent_emitsOnlyNonEmpty() {
+        var request = createSimpleRequest("Hello");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.just(mockChatResponse("", null),
+                        mockChatResponse("Hello", null),
+                        mockChatResponse("", null),
+                        mockChatResponse(" World", "STOP")));
+
+        var results = provider.stream(request).collectList().block();
+
+        Assertions.assertEquals(List.of("Hello", " World"), results);
+    }
+
+    @Test
+    void stream_streamingUpstreamErrorsDuringStream_propagatesOriginalError() {
+        var request = createSimpleRequest("Hello");
+        var originalError = new RuntimeException("network broken");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(Flux.error(originalError));
+
+        var thrown = Assertions.assertThrows(RuntimeException.class,
+                () -> provider.stream(request).collectList().block());
+        Assertions.assertEquals(originalError, thrown);
+    }
+
+    @Test
+    void stream_streamingUpstreamErrorsAfterFinishReason_propagatesOriginalError() {
+        // finish_reason was already seen, yet an upstream error must still
+        // win over our abort detector.
+        var request = createSimpleRequest("Hello");
+        var chunk = mockChatResponse("data", "STOP");
+        var originalError = new RuntimeException("broken after chunk");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenReturn(
+                        Flux.just(chunk).concatWith(Flux.error(originalError)));
+
+        var thrown = Assertions.assertThrows(RuntimeException.class,
+                () -> provider.stream(request).collectList().block());
+        Assertions.assertEquals(originalError, thrown);
+    }
+
+    @Test
+    void stream_streamingChatModelThrowsSynchronously_propagatesError() {
+        var request = createSimpleRequest("Hello");
+        var originalError = new RuntimeException("stream API down");
+        Mockito.when(mockChatModel.stream(Mockito.any(Prompt.class)))
+                .thenThrow(originalError);
+
+        var thrown = Assertions.assertThrows(RuntimeException.class,
+                () -> provider.stream(request).collectList().block());
+        Assertions.assertEquals(originalError, thrown);
+    }
+
     private void mockSimpleChat(String responseText) {
         var response = mockSimpleChatResponse(responseText);
         Mockito.when(mockChatModel.call(Mockito.any(Prompt.class)))
                 .thenReturn(response);
     }
 
     private ChatResponse mockSimpleChatResponse(String text) {
+        // Single-chunk responses are always terminal; tag them with STOP so
+        // the finish_reason gate is satisfied.
+        return mockChatResponse(text, "STOP");
+    }
+
+    private static ChatResponse mockChatResponse(String text,
+            String finishReason) {
         var assistantMessage = new AssistantMessage(text);
-        var generation = new Generation(assistantMessage);
+        var metadata = finishReason == null ? ChatGenerationMetadata.NULL
+                : ChatGenerationMetadata.builder().finishReason(finishReason)
+                        .build();
+        var generation = new Generation(assistantMessage, metadata);
         return new ChatResponse(List.of(generation));
     }