In [1]:
import torch
from transformers import AutoTokenizer, AutoModel
from transformers import Qwen2ForCausalLM, Qwen2TokenizerFast
from transformers import AutoModelForCausalLM, AutoTokenizer

from tools import messages

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "get_current_temperature",
            "description": "Get current temperature at a location.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": 'The location to get the temperature for, in the format "City, State, Country".',
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit to return the temperature in. Defaults to "celsius".',
                    },
                },
                "required": ["location"],
            },
        },
    }
]
MESSAGES = [
    {"role": "system",
     "content": "你是Qwen, 由阿里巴巴创建.\n\nCurrent Date: 2024-09-30"},
    {"role": "user", "content": "北京的气温是多少？"},
]

# 获取模板

In [4]:
model_name_or_path = "Qwen/Qwen2.5-7B-Instruct"
tokenizer = Qwen2TokenizerFast.from_pretrained(model_name_or_path)
print(tokenizer.get_chat_template())

{%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0]['role'] == 'system' %}
        {{- messages[0]['content'] }}
    {%- else %}
        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
    {%- endif %}
    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0]['role'] == 'system' %}
        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
    {%- else %}
        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba C

# 应用模板并生成 Prompt

In [6]:
messages = MESSAGES
tools = TOOLS
text = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, tokenize=False)
print(text)

<|im_start|>system
你是Qwen, 由阿里巴巴创建.

Current Date: 2024-09-30

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "get_current_temperature", "description": "Get current temperature at a location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location"]}}}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
北京的气温是多少？<|im_end|>
<|im_start|>assistant



# 运行模型并解析输出

In [7]:
model = Qwen2ForCausalLM.from_pretrained(
    model_name_or_path,
    torch_dtype="auto",
    device_map="auto",
)

inputs = tokenizer(text, return_tensors="pt").to(model.device)

model.eval()
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=512)
    output_text = tokenizer.batch_decode(outputs)[0][len(text):]
    print(output_text)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.49it/s]


<tool_call>
{"name": "get_current_temperature", "arguments": {"location": "北京, 北京市, 中国", "unit": "celsius"}}
</tool_call><|im_end|>
