# QWen tool calling demo

In [1]:
from pprint import pprint

In [2]:
import json
import torch

def get_current_temperature(location: str, unit: str = "celsius"):
    """Get current temperature at a location.

    Args:
        location: The location to get the temperature for, in the format "City, State, Country".
        unit: The unit to return the temperature in. Defaults to "celsius". (choices: ["celsius", "fahrenheit"])

    Returns:
        the temperature, the location, and the unit in a dict
    """
    return {
        "temperature": 26.1,
        "location": location,
        "unit": unit,
    }


def get_temperature_date(location: str, date: str, unit: str = "celsius"):
    """Get temperature at a location and date.

    Args:
        location: The location to get the temperature for, in the format "City, State, Country".
        date: The date to get the temperature for, in the format "Year-Month-Day".
        unit: The unit to return the temperature in. Defaults to "celsius". (choices: ["celsius", "fahrenheit"])

    Returns:
        the temperature, the location, the date and the unit in a dict
    """
    return {
        "temperature": 25.9,
        "location": location,
        "date": date,
        "unit": unit,
    }


def get_function_by_name(name):
    if name == "get_current_temperature":
        return get_current_temperature
    if name == "get_temperature_date":
        return get_temperature_date

TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "get_current_temperature",
            "description": "Get current temperature at a location.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": 'The location to get the temperature for, in the format "City, State, Country".',
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit to return the temperature in. Defaults to "celsius".',
                    },
                },
                "required": ["location"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_temperature_date",
            "description": "Get temperature at a location and date.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": 'The location to get the temperature for, in the format "City, State, Country".',
                    },
                    "date": {
                        "type": "string",
                        "description": 'The date to get the temperature for, in the format "Year-Month-Day".',
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit to return the temperature in. Defaults to "celsius".',
                    },
                },
                "required": ["location", "date"],
            },
        },
    },
]
MESSAGES = [
    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30"},
    {"role": "user",  "content": "What's the temperature in San Francisco now? How about tomorrow?"},
]

## Using huggingface transformer

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name_or_path = "Qwen/Qwen2.5-7B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    torch_dtype=torch.bfloat16,
)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
tools = TOOLS
messages = MESSAGES[:]

text = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, tokenize=False)

print(text)

<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.

Current Date: 2024-09-30

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "get_current_temperature", "description": "Get current temperature at a location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location"]}}}
{"type": "function", "function": {"name": "get_temperature_date", "description": "Get temperature at a location and date.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temp

In [5]:
print(tokenizer.chat_template)

{%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0]['role'] == 'system' %}
        {{- messages[0]['content'] }}
    {%- else %}
        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
    {%- endif %}
    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0]['role'] == 'system' %}
        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
    {%- else %}
        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba C

In [6]:
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=512)
output_text = tokenizer.batch_decode(outputs)[0][len(text):]

In [7]:
print(text + output_text)

<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.

Current Date: 2024-09-30

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "get_current_temperature", "description": "Get current temperature at a location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location"]}}}
{"type": "function", "function": {"name": "get_temperature_date", "description": "Get temperature at a location and date.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temp

In [8]:
import re

def try_parse_tool_calls(content: str):
    """Try parse the tool calls."""
    tool_calls = []
    offset = 0
    for i, m in enumerate(re.finditer(r"<tool_call>\n(.+)?\n</tool_call>", content)):
        if i == 0:
            offset = m.start()
        try:
            func = json.loads(m.group(1))
            tool_calls.append({"type": "function", "function": func})
            if isinstance(func["arguments"], str):
                func["arguments"] = json.loads(func["arguments"])
        except json.JSONDecodeError as e:
            print(f"Failed to parse tool calls: the content is {m.group(1)} and {e}")
            pass
    if tool_calls:
        if offset > 0 and content[:offset].strip():
            c = content[:offset]
        else: 
            c = ""
        return {"role": "assistant", "content": c, "tool_calls": tool_calls}
    return {"role": "assistant", "content": re.sub(r"<\|im_end\|>$", "", content)}

In [9]:
messages.append(try_parse_tool_calls(output_text))

In [10]:
print(messages)

[{'role': 'system', 'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30'}, {'role': 'user', 'content': "What's the temperature in San Francisco now? How about tomorrow?"}, {'role': 'assistant', 'content': '', 'tool_calls': [{'type': 'function', 'function': {'name': 'get_current_temperature', 'arguments': {'location': 'San Francisco, CA, USA'}}}, {'type': 'function', 'function': {'name': 'get_temperature_date', 'arguments': {'location': 'San Francisco, CA, USA', 'date': '2024-10-01'}}}]}]


In [11]:
# add function args to the messages
if tool_calls := messages[-1].get("tool_calls", None):
    for tool_call in tool_calls:
        if fn_call := tool_call.get("function"):
            fn_name: str = fn_call["name"]
            fn_args: dict = fn_call["arguments"]

            fn_res: str = json.dumps(get_function_by_name(fn_name)(**fn_args))

            messages.append({
                "role": "tool",
                "name": fn_name,
                "content": fn_res,
            })

In [12]:
pprint(messages)
# note that tools won't be shown in message because it will be appended to the system prompt during apply_chat_template

[{'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful '
             'assistant.\n'
             '\n'
             'Current Date: 2024-09-30',
  'role': 'system'},
 {'content': "What's the temperature in San Francisco now? How about tomorrow?",
  'role': 'user'},
 {'content': '',
  'role': 'assistant',
  'tool_calls': [{'function': {'arguments': {'location': 'San Francisco, CA, '
                                                         'USA'},
                               'name': 'get_current_temperature'},
                  'type': 'function'},
                 {'function': {'arguments': {'date': '2024-10-01',
                                             'location': 'San Francisco, CA, '
                                                         'USA'},
                               'name': 'get_temperature_date'},
                  'type': 'function'}]},
 {'content': '{"temperature": 26.1, "location": "San Francisco, CA, USA", '
             '"unit": "celsius"}',
 

In [13]:
# final response
text = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, tokenize=False)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
print(text)

<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.

Current Date: 2024-09-30

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "get_current_temperature", "description": "Get current temperature at a location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location"]}}}
{"type": "function", "function": {"name": "get_temperature_date", "description": "Get temperature at a location and date.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temp

In [14]:
outputs = model.generate(**inputs, max_new_tokens=512)
output_text = tokenizer.batch_decode(outputs)[0][len(text):]

In [15]:
print(output_text)

Currently, the temperature in San Francisco is approximately 26.1°C. Tomorrow, on October 1, 2024, the temperature is expected to be around 25.9°C.<|im_end|>


In [16]:
messages.append(try_parse_tool_calls(output_text))

In [17]:
pprint(messages)

[{'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful '
             'assistant.\n'
             '\n'
             'Current Date: 2024-09-30',
  'role': 'system'},
 {'content': "What's the temperature in San Francisco now? How about tomorrow?",
  'role': 'user'},
 {'content': '',
  'role': 'assistant',
  'tool_calls': [{'function': {'arguments': {'location': 'San Francisco, CA, '
                                                         'USA'},
                               'name': 'get_current_temperature'},
                  'type': 'function'},
                 {'function': {'arguments': {'date': '2024-10-01',
                                             'location': 'San Francisco, CA, '
                                                         'USA'},
                               'name': 'get_temperature_date'},
                  'type': 'function'}]},
 {'content': '{"temperature": 26.1, "location": "San Francisco, CA, USA", '
             '"unit": "celsius"}',
 

## Use vLLM

In [18]:
tools = TOOLS
messages = MESSAGES[:]

In [19]:
from openai import OpenAI

openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

model_name = "Qwen/Qwen2.5-7B-Instruct"

In [20]:
response = client.chat.completions.create(
    model=model_name,
    messages=messages,
    tools=tools,
    temperature=0.7,
    top_p=0.8,
    max_tokens=512,
    extra_body={
        "repetition_penalty": 1.05,
    },
)

In [22]:
pprint(response)

ChatCompletion(id='chatcmpl-149b787438884e918867c88e34e534b1', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='chatcmpl-tool-7dd3c0f8f6024df9bc1981236b0be109', function=Function(arguments='{"location": "San Francisco, CA, USA"}', name='get_current_temperature'), type='function'), ChatCompletionMessageToolCall(id='chatcmpl-tool-943de79f68014d7d9fe36a66c7071642', function=Function(arguments='{"location": "San Francisco, CA, USA", "date": "2024-10-01"}', name='get_temperature_date'), type='function')], reasoning_content=None), stop_reason=None)], created=1748417080, model='Qwen/Qwen2.5-7B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=67, prompt_tokens=412, total_tokens=479, completion_tokens_details=None, prompt_tokens_details

In [23]:
messages.append(response.choices[0].message.model_dump())

if tool_calls := messages[-1].get("tool_calls", None):
    for tool_call in tool_calls:
        call_id: str = tool_call["id"]
        if fn_call := tool_call.get("function"):
            fn_name: str = fn_call["name"]
            fn_args: dict = json.loads(fn_call["arguments"])
        
            fn_res: str = json.dumps(get_function_by_name(fn_name)(**fn_args))

            messages.append({
                "role": "tool",
                "content": fn_res,
                "tool_call_id": call_id,
            })

In [25]:
pprint(messages)

[{'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful '
             'assistant.\n'
             '\n'
             'Current Date: 2024-09-30',
  'role': 'system'},
 {'content': "What's the temperature in San Francisco now? How about tomorrow?",
  'role': 'user'},
 {'annotations': None,
  'audio': None,
  'content': None,
  'function_call': None,
  'reasoning_content': None,
  'refusal': None,
  'role': 'assistant',
  'tool_calls': [{'function': {'arguments': '{"location": "San Francisco, CA, '
                                            'USA"}',
                               'name': 'get_current_temperature'},
                  'id': 'chatcmpl-tool-7dd3c0f8f6024df9bc1981236b0be109',
                  'type': 'function'},
                 {'function': {'arguments': '{"location": "San Francisco, CA, '
                                            'USA", "date": "2024-10-01"}',
                               'name': 'get_temperature_date'},
                  'id': 'chatcm

In [26]:
response = client.chat.completions.create(
    model=model_name,
    messages=messages,
    tools=tools,
    temperature=0.7,
    top_p=0.8,
    max_tokens=512,
    extra_body={
        "repetition_penalty": 1.05,
    },
)

messages.append(response.choices[0].message.model_dump())

In [27]:
pprint(messages)

[{'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful '
             'assistant.\n'
             '\n'
             'Current Date: 2024-09-30',
  'role': 'system'},
 {'content': "What's the temperature in San Francisco now? How about tomorrow?",
  'role': 'user'},
 {'annotations': None,
  'audio': None,
  'content': None,
  'function_call': None,
  'reasoning_content': None,
  'refusal': None,
  'role': 'assistant',
  'tool_calls': [{'function': {'arguments': '{"location": "San Francisco, CA, '
                                            'USA"}',
                               'name': 'get_current_temperature'},
                  'id': 'chatcmpl-tool-7dd3c0f8f6024df9bc1981236b0be109',
                  'type': 'function'},
                 {'function': {'arguments': '{"location": "San Francisco, CA, '
                                            'USA", "date": "2024-10-01"}',
                               'name': 'get_temperature_date'},
                  'id': 'chatcm