diff --git a/tests/reasoning/test_exaone4_reasoning_parser.py b/tests/reasoning/test_exaone4_reasoning_parser.py new file mode 100644 index 000000000000..8ca7ae2e45ba --- /dev/null +++ b/tests/reasoning/test_exaone4_reasoning_parser.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest +from transformers import AutoTokenizer + +from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage +from vllm.reasoning import ( + DeepSeekR1ReasoningParser, + Exaone4ReasoningParser, + IdentityReasoningParser, +) + +REASONING_MODEL_NAME = "LGAI-EXAONE/EXAONE-4.0-1.2B" + + +@pytest.fixture(scope="module") +def tokenizer(): + return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME) + + +@pytest.mark.parametrize( + "enable_thinking,expected_parser_type", + [ + (True, DeepSeekR1ReasoningParser), + (False, IdentityReasoningParser), + ], +) +def test_parser_selection(tokenizer, enable_thinking, expected_parser_type): + parser = Exaone4ReasoningParser( + tokenizer, chat_template_kwargs={"enable_thinking": enable_thinking} + ) + + assert isinstance(parser._parser, expected_parser_type) + + +def test_identity_reasoning_parser_basic(tokenizer): + parser = IdentityReasoningParser(tokenizer) + + # Test is_reasoning_end always returns True + input_text = "This is some output" + input_tokens = tokenizer.tokenize(input_text) + input_ids = tokenizer.convert_tokens_to_ids(input_tokens) + assert parser.is_reasoning_end(input_ids) is True + + # Test extract_content_ids returns all input_ids + assert parser.extract_content_ids(input_ids) == input_ids + + # Test extract_reasoning_content returns (None, model_output) + request = ChatCompletionRequest(model="test-model", messages=[], temperature=1.0) + reasoning, content = parser.extract_reasoning_content(input_text, request) + assert reasoning is None + assert content == input_text + + # Test extract_reasoning_content_streaming returns DeltaMessage or None + result = parser.extract_reasoning_content_streaming( + previous_text="", + current_text="Hello world", + delta_text="Hello world", + previous_token_ids=[], + current_token_ids=input_ids, + delta_token_ids=input_ids, + ) + assert isinstance(result, DeltaMessage) + assert result.content == "Hello world" + + # If delta_text is empty, should return None + result_none = parser.extract_reasoning_content_streaming( + previous_text="Hello world", + current_text="Hello world", + delta_text="", + previous_token_ids=input_ids, + current_token_ids=input_ids, + delta_token_ids=[], + ) + assert result_none is None diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py index 3d666882efb5..a224716adef4 100644 --- a/vllm/reasoning/__init__.py +++ b/vllm/reasoning/__init__.py @@ -6,6 +6,7 @@ from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser from .deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser from .ernie45_reasoning_parser import Ernie45ReasoningParser +from .exaone4_reasoning_parser import Exaone4ReasoningParser from .glm4_moe_reasoning_parser import Glm4MoeModelReasoningParser from .gptoss_reasoning_parser import GptOssReasoningParser from .granite_reasoning_parser import GraniteReasoningParser @@ -26,6 +27,7 @@ "IdentityReasoningParser", "DeepSeekV3ReasoningParser", "Ernie45ReasoningParser", + "Exaone4ReasoningParser", "GraniteReasoningParser", "HunyuanA13BReasoningParser", "Qwen3ReasoningParser", diff --git a/vllm/reasoning/exaone4_reasoning_parser.py b/vllm/reasoning/exaone4_reasoning_parser.py new file mode 100644 index 000000000000..17191b3413c2 --- /dev/null +++ b/vllm/reasoning/exaone4_reasoning_parser.py @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + + +from transformers import PreTrainedTokenizerBase + +from vllm.logger import init_logger +from vllm.reasoning import ( + DeepSeekR1ReasoningParser, + DeepSeekV3ReasoningParser, + ReasoningParserManager, +) + +from .identity_reasoning_parser import IdentityReasoningParser + +logger = init_logger(__name__) + + +@ReasoningParserManager.register_module("exaone4") +class Exaone4ReasoningParser(DeepSeekV3ReasoningParser): + """ + Reasoning parser for EXAONE 4.0 model. + + The EXAONE 4.0 model uses ... tokens to denote reasoning + text. This parser extracts the reasoning content from the model output. + """ + + def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): + super().__init__(tokenizer, *args, **kwargs) + print("Exaone4ReasoningParser init") + + chat_kwargs = kwargs.pop("chat_template_kwargs", {}) or {} + enable_thinking = bool(chat_kwargs.pop("enable_thinking", False)) + + if enable_thinking: + self._parser = DeepSeekR1ReasoningParser(tokenizer, *args, **kwargs) + else: + self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs)