# Running Qwen, the base language model, is also simple.

In [1]:
from modelscope import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer

# Downloading model checkpoint to a local dir model_dir
# model_dir = snapshot_download('qwen/Qwen-7B')
# model_dir = snapshot_download('qwen/Qwen-7B-Chat')
# model_dir = snapshot_download('qwen/Qwen-14B')
model_dir = snapshot_download('qwen/Qwen-1_8B-Chat-Int4')

# Loading local checkpoints
# trust_remote_code is still set as True since we still load codes from local dir instead of transformers
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    device_map="auto",
    trust_remote_code=True
).eval()

2024-06-22 03:29:44,240 - modelscope - INFO - PyTorch version 2.3.1+cu118 Found.
2024-06-22 03:29:44,242 - modelscope - INFO - Loading ast index from /home/ubuntu/.cache/modelscope/ast_indexer
2024-06-22 03:29:44,293 - modelscope - INFO - Loading done! Current index file version is 1.15.0, with md5 1ded9c52ecd449140f691a5f467acd25 and a total number of 980 components indexed
  torch.utils._pytree._register_pytree_node(
CUDA extension not installed.
CUDA extension not installed.
Try importing flash-attention for faster inference...
Some weights of the model checkpoint at /home/ubuntu/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat-Int4 were not used when initializing QWenLMHeadModel: ['transformer.h.8.mlp.w2.bias', 'transformer.h.21.mlp.w1.bias', 'transformer.h.22.attn.c_proj.bias', 'transformer.h.12.mlp.w1.bias', 'transformer.h.12.mlp.w2.bias', 'transformer.h.4.attn.c_proj.bias', 'transformer.h.7.mlp.w1.bias', 'transformer.h.15.mlp.w1.bias', 'transformer.h.19.mlp.w1.bias', 'transformer.h.21.

In [3]:
model_name = "qwen/Qwen-1_8B-Chat-Int4"

In [4]:
from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig

# Model names: "qwen/Qwen-7B-Chat", "qwen/Qwen-14B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_name, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参

response, history = model.chat(tokenizer, "你好", history=None)
print(response)
response, history = model.chat(tokenizer, "浙江的省会在哪里？", history=history) 
print(response)
response, history = model.chat(tokenizer, "它有什么好玩的景点", history=history)
print(response)

2024-06-22 06:24:05,359 - modelscope - INFO - PyTorch version 2.3.1+cu118 Found.
2024-06-22 06:24:05,361 - modelscope - INFO - Loading ast index from /home/ubuntu/.cache/modelscope/ast_indexer
2024-06-22 06:24:05,849 - modelscope - INFO - Loading done! Current index file version is 1.15.0, with md5 1ded9c52ecd449140f691a5f467acd25 and a total number of 980 components indexed
  torch.utils._pytree._register_pytree_node(
CUDA extension not installed.
CUDA extension not installed.
Try importing flash-attention for faster inference...
Some weights of the model checkpoint at /home/ubuntu/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat-Int4 were not used when initializing QWenLMHeadModel: ['transformer.h.3.mlp.w2.bias', 'transformer.h.17.attn.c_proj.bias', 'transformer.h.19.attn.c_proj.bias', 'transformer.h.19.mlp.w2.bias', 'transformer.h.20.mlp.c_proj.bias', 'transformer.h.14.attn.c_proj.bias', 'transformer.h.0.attn.c_proj.bias', 'transformer.h.4.mlp.w2.bias', 'transformer.h.11.attn.c_proj.bias',

你好！很高兴见到你。有什么我可以帮助你的吗？
浙江省的省会是杭州。
杭州有许多有趣的地方，其中最著名的可能是西湖和灵隐寺。西湖是中国最著名的风景名胜区之一，拥有许多美丽的景点，如雷峰塔、三潭印月等。灵隐寺是中国最早的佛教寺庙之一，它有着悠久的历史，并且保存了许多古代文物。此外，杭州还有许多其他有趣的景点，例如江湾、楼外楼、断桥残雪等。


In [1]:
transformers_model_id = "/home/ubuntu/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat-Int4"

# Prompt 

In [1]:
Q = "1月2号上海天气预报"

prompt_template = """
分析问题中出现过的城市和日期

交互格式如下：

问题：用户的原始问题，里面可能包含了城市和日期信息
回答：提炼问题中出现的城市名称和具体日期，按照格式整理返回，
    城市：值是城市的名字
    日期：值是具体日期

现在开始。

问题：%s
回答：
"""

prompt = prompt_template%(Q,)

print(prompt)



分析问题中出现过的城市和日期

交互格式如下：

问题：用户的原始问题，里面可能包含了城市和日期信息
回答：提炼问题中出现的城市名称和具体日期，按照格式整理返回，
    城市：值是城市的名字
    日期：值是具体日期

现在开始。

问题：1月2号上海天气预报
回答：



In [9]:
resp,hist = model.chat(tokenizer,prompt,history=None)
print(resp)

问题：1月2号的上海天气预报
回答：01/02/2023 14:05:00 阴转多云 
城市：上海  
日期：2023-01-02


# Finetuning

In [None]:
# 安装依赖
# pip install "peft<0.8.0" deepspeed

## 准备数据

In [10]:
[
  {
    "id": "identity_0",
    "conversations": [
      {
        "from": "user",
        "value": "你好"
      },
      {
        "from": "assistant",
        "value": "我是一个语言模型，我叫通义千问。"
      }
    ]
  }
]

[{'id': 'identity_0',
  'conversations': [{'from': 'user', 'value': '你好'},
   {'from': 'assistant', 'value': '我是一个语言模型，我叫通义千问。'}]}]

In [15]:
import random
import json
import time

train_data = []

Q_list = [
    ('{city}{year}年{month}月{day}日的天气','%Y-%m-%d'),
    ('{city}{year}年{month}月{day}号的天气','%Y-%m-%d'),
    ('{city}{month}月{day}日的天气','%m-%d'),
    ('{city}{month}月{day}号的天气','%m-%d'),

    ('{year}年{month}月{day}日{city}的天气','%Y-%m-%d'),
    ('{year}年{month}月{day}号{city}的天气','%Y-%m-%d'),
    ('{month}月{day}日{city}的天气','%m-%d'),
    ('{month}月{day}号{city}的天气','%m-%d'),
    
    
]

city_list = ['上海','北京','南京','苏州','杭州','南通','盐城','南昌','西安','成都','东京','台北']

for i in range(1000):
    Q = Q_list[random.randint(0,len(Q_list)-1)]
    city = city_list[random.randint(0,len(city_list)-1)]
    year = random.randint(1990,2025)
    month = random.randint(1,12)
    day = random.randint(1,28)
    time_str = '{}-{}-{}'.format(year,month,day)
    date_field = time.strftime(Q[1],time.strptime(time_str,'%Y-%m-%d'))

    Q=Q[0].format(city=city,year=year,month=month,day=day)
    A='城市:%s\n日期:%s'%(city,date_field)

    example = {
        'id':'identity_{}'.format(i),
        'conversations':[
            {
                "from": "user",
                "value": prompt_template%(Q,)
            },
            {
                "from": "assistant",
                "value": A,
            }
        ]
        
    }
    # print(example)
    train_data.append(example)

with open('train_data.text','w',encoding='utf-8') as fp:
    fp.write(json.dumps(train_data))

print("样本数量:",len(train_data))


样本数量: 1000


# 测试微调之后的模型

In [17]:
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
    '/home/ubuntu/nn/Qwen/output_qwen', # path to the output directory
    device_map="auto",
    trust_remote_code=True
).eval()

Try importing flash-attention for faster inference...
Some weights of the model checkpoint at /home/ubuntu/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat-Int4 were not used when initializing QWenLMHeadModel: ['transformer.h.3.mlp.w2.bias', 'transformer.h.17.attn.c_proj.bias', 'transformer.h.19.attn.c_proj.bias', 'transformer.h.19.mlp.w2.bias', 'transformer.h.20.mlp.c_proj.bias', 'transformer.h.14.attn.c_proj.bias', 'transformer.h.0.attn.c_proj.bias', 'transformer.h.4.mlp.w2.bias', 'transformer.h.11.attn.c_proj.bias', 'transformer.h.1.mlp.c_proj.bias', 'transformer.h.11.mlp.w2.bias', 'transformer.h.23.mlp.c_proj.bias', 'transformer.h.12.mlp.w1.bias', 'transformer.h.11.mlp.c_proj.bias', 'transformer.h.21.attn.c_proj.bias', 'transformer.h.17.mlp.w1.bias', 'transformer.h.10.attn.c_proj.bias', 'transformer.h.16.attn.c_proj.bias', 'transformer.h.2.mlp.w2.bias', 'transformer.h.2.mlp.c_proj.bias', 'transformer.h.18.attn.c_proj.bias', 'transformer.h.22.attn.c_proj.bias', 'transformer.h.14.mlp.c_proj

In [18]:
model.generation_config.top_p = 0 


In [19]:
Q_list = ['2024年4月15号三亚下雨吗？',
          '上海3-16号天气预报',
          '5月5号下雨吗？城市是苏州',
          '杭州2024年3月1日有雾霾',
          '我计划6月1号去南京旅游，请问天气怎么样？']

for Q in Q_list:
    prompt=prompt_template%(Q,)
    resp,hist=model.chat(tokenizer,prompt,history=None)
    print('Q:%s,\nA:%s\n'%(Q,resp))

Q:2024年4月15号三亚下雨吗？,
A:城市:三亚
日期:2024-04-15

Q:上海3-16号天气预报,
A:城市:上海
日期:03-16

Q:5月5号下雨吗？城市是苏州,
A:城市:苏州
日期:05-05

Q:杭州2024年3月1日有雾霾,
A:城市:杭州
日期:2024-03-01

Q:我计划6月1号去南京旅游，请问天气怎么样？,
A:城市:南京
日期:06-01

