In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [1]:
# prepare the model input
prompt = """# your role
你是一个一对一教英语自然拼读课的老师
Your name is Lucy.
You are a twenty six year-old English teacher from Seattle, USA.

# teaching objective
- 目标单词：kate
- 目标单词音标列表：[/k/, /ei/, /t/]

# dialogue history


# student input
输入描述：拼读错误：一个错误
错误音素：[/ei/]

# 特殊音标标记
/l_1/:/l/后跟元音时， 使用/l_1/，如单词left、led、love、like、alligator。
/l_2/:/l/后跟辅音或在单词结尾，使用/l_2/，如单词ball、feel、help。
/r_1/:/r/后跟元音时，使用/r_1/，示例单词：red、run、road、break。

# response strategy
- part1:纠错+讲解
    - 迁移示例:
        - 目标单词:cake,错误音素:/eɪ/,拼读规则:[/k/, /eɪ/, /k/],回复示例:Remember A, space, E, says /eɪ/.
        - 目标单词:car,错误音素:/ɑːr/,拼读规则:[/k/, /ɑːr/],回复示例:Remember A, R, makes the /ɑːr/ sound.
        - 目标单词:red,错误音素:/e/,拼读规则:[/r_1/, /ed/],回复示例:Take note of letter E. It says /e/.
        - 目标单词:red,错误音素:/d/,拼读规则:[/r_1/, /ed/],回复示例:Take note of letter D. It says /d/.
        - 目标单词:mat,错误音素:/t/,拼读规则:[/m/, /æ/, /t/],回复示例:T says /t/.
        - 目标单词:red,错误音素:/r/,拼读规则:[/r_1/, /ed/],回复示例:Pay attention, R, says /r_1/.
        - 目标单词:mat,错误音素:/t/,拼读规则:[/m/, /æ/, /t/],回复示例:Keep in mind that T makes the /t/ sound.
        - 目标单词:mat,错误音素:/m/,拼读规则:[/m/, /æ/, /t/],回复示例:Please remember M says /m/.
        - 目标单词:mat,错误音素:/æ/,拼读规则:[/m/, /æ/, /t/],回复示例:Remember A says /æ/. 
        - 目标单词:cake,错误音素:/eɪ/,拼读规则:[/k/, /eɪk/],回复示例:Pay attention, A, space, E says /eɪ/.
        - 目标单词:car,错误音素:/ɑːr/,拼读规则:[/k/, /ɑːr/],回复示例:Keep in mind that A, R, makes the /ɑːr/ sound.
- part2:重读目标单词
    - 迁移示例:
        - 目标单词:car,错误音素:/k/,拼读规则:[/k/, /ɑːr/],回复示例:Say, /k/, /k/, car.
        - 目标单词:shirt,错误音素:/ɜːr/,拼读规则:[/ʃ/, /ɜːr/, /t/],回复示例:Please say, /ɜːr/, /ɜːr/, shirt.
        - 目标单词:mat,错误音素:/æ/,拼读规则:[/m/, /æ/, /t/],回复示例:Please say, /æ/, /æ/, mat.
        - 目标单词:shirt,错误音素:/ʃ/,拼读规则:[/ʃ/, /ɜːr/, /t/],回复示例:Please say, /ʃ/, /ʃ/, shirt.
        - 目标单词:red,错误音素:/r/,拼读规则:[/r_1/, /e/, /d/],回复示例:Please follow me, /r_1/, /r_1/, red.
        - 目标单词:mat,错误音素:/t/,拼读规则:[/m/, /æ/, /t/],回复示例:Repeat after me, /t/, /t/, mat.
        - 目标单词:mat,错误音素:/æ/,拼读规则:[/m/, /æt/],回复示例:Please say, /æt/, /æt/, mat.
        - 目标单词:mat,错误音素:/m/,拼读规则:[/m/, /æ/, /t/],回复示例:Follow me, /m/, /m/, mat.
        - 目标单词:red,错误音素:/d/,拼读规则:[/r_1/, /ed/],回复示例:Follow me, /ed/, /ed/, red.
        - 目标单词:cake,错误音素:/eɪ/,拼读规则:[/k/, /eɪk/],回复示例:Please say, /eɪk/, /eɪk/, cake.
        - 目标单词:red,错误音素:/d/,拼读规则:[/r_1/, /e/, /d/],回复示例:Follow me, /d/, /d/, red.
        - 目标单词:red,错误音素:/e/,拼读规则:[/r_1/, /e/, /d/],回复示例:Please follow me, /e/, /e/, red.
        - 目标单词:mat,错误音素:/t/,拼读规则:[/m/, /æ/, /t/],回复示例:Let's try once more, /t/, /t/, mat.
        - 目标单词:red,错误音素:/e/,拼读规则:[/r_1/, /ed/],回复示例:Follow me, /ed/, /ed/, red.
        - 目标单词:car,错误音素:/ɑːr/,拼读规则:[/k/, /ɑːr/],回复示例:Say, /ɑːr/, /ɑːr/, car.
        - 目标单词:mat,错误音素:/æ/,拼读规则:[/m/, /æt/],回复示例:Now say, /æt/, /æt/, mat.
        - 目标单词:cake,错误音素:/eɪ/,拼读规则:[/k/, /eɪ/, /k/],回复示例:Say, /eɪ/, /eɪ/, cake.
        - 目标单词:mat,错误音素:/t/,拼读规则:[/m/, /æt/],回复示例:Please say, /æt/, /æt/, mat.


# task
- **任务1**：输出目标单词的拼读规则
    - sub_step1: 判断<目标单词音标列表>是否是乱序的，如果是乱序的则需要调整顺序，**只能调整列表中元素的顺序，不能合并元素**
    - sub_step2: 如果<目标单词音标列表>中包含/l/或/r/，根据<特殊音标标记>的规则修改音标
    - sub_step3: 输出调整后的音标列表作为拼读规则
    - example
        - 目标单词：mane, 目标单词音标列表: [/n/, /eɪ/, /m/], 调整后: [/m/, /eɪ/, /n/]
        - 目标单词：mat, 目标单词音标列表: [/æ/, /t/, /m/], 调整后: [/m/, /æ/, /t/]
        - 目标单词：red, 目标单词音标列表: [/d/, /r/, /e/], 调整后: [/r_1/, /e/, /d/]
- **任务2**：遍历<response strategy>中每个part，把每个part生成的回复按顺序拼接到一起，作为最终的回复
    - 如果part中有给出通用示例的的，直接从通用示例选择一个，只使用一个示例
    - 如果part中有给出迁移示例的（1条迁移示例包含：目标单词、错误音素（可选）、拼读规则、回复示例），结合任务1生成的拼读规则，以及<teaching objective>中目标单词，<student input>中的错误音素（如有），从迁移示例中匹配一个最佳示例，**匹配时要优先考虑能够产生错误音素的单词拼写规则或字母组合（例如，单词shame中的/eɪ/音是由'a_e'拼写规则产生的，因此应匹配教授'A, space, E'的示例）**，只基于回复示例生成回复，不要添加多余的描述，只使用一个示例
    - 严格遵守任务1输出的拼读规则，不要修改拼读规则
    - 使用空格拼接多个part的回复

# requirement
- 最终的回复不能有换行符
- 最终的回复必须严格完全由各个part构成，确保没有其他内容

# output
- 输出且只输出任务2的回复（输出内容中不要有换行符、双引号、单引号。不要遗漏逗号、句号。）"""
messages = [
    {"role": "user", "content": prompt}
]

In [4]:
import json

model_name = "/root/group-shared/models/base_models/Qwen3-32B"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
    attn_implementation="flash_attention_2",
)

Loading checkpoint shards:   0%|          | 0/17 [00:00<?, ?it/s]

In [5]:
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

In [16]:
from __future__ import unicode_literals
from tqdm import tqdm

In [None]:
for _ in tqdm(range(16)):
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=32768,
        temperature=1.0
    )
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 
    
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0
    
    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

    print("thinking content:", thinking_content)
    print("content:", content)

  6%|▋         | 1/16 [00:06<01:43,  6.91s/it]

thinking content: 
content: Please remember K says /k/\. Pay attention, A says /ei/\. T says /t/\.\.\. Please say, /k/, /k/, kate\. Please say, /ei/, /ei/, kate\. Please say, /t/, /t/, kate\.


 12%|█▎        | 2/16 [00:14<01:39,  7.12s/it]

thinking content: 
content: Remember K says /k/. Take note of letter A. It says /ei/. Keep in mind that T makes the /t/ sound. Please say, /k/, /k/, kate. Please say, /ei/, /ei/, kate. Please say, /t/, /t/, kate.


 19%|█▉        | 3/16 [00:19<01:19,  6.08s/it]

thinking content: 
content: Remember K says /k/. Take note of letter A. It says /ei/. Keep in mind that T makes the /t/ sound. Please say, /k/, /k/, kate.


 25%|██▌       | 4/16 [00:26<01:18,  6.55s/it]

thinking content: 
content: Remember K says /k/. Take note of letter A. It says /ei/. Keep in mind that T makes the /t/ sound. Please say, /k/, /k/, kate. Please say, /ei/, /ei/, kate. Please say, /t/, /t/, kate.


 31%|███▏      | 5/16 [00:29<01:00,  5.47s/it]

thinking content: 
content: Remember K says /k/, E says /ei/, T says /t/. Please say, /k/, /k/, kate.


 38%|███▊      | 6/16 [00:33<00:48,  4.82s/it]

thinking content: 
content: Remember K says /k/, E says /ei/, T says /t/. Please say, /k/, /k/, kate.


 44%|████▍     | 7/16 [00:40<00:50,  5.62s/it]

thinking content: 
content: Remember K says /k/. Take note of letter A. It says /ei/. Keep in mind that T makes the /t/ sound. Please say, /k/, /k/, kate. Please say, /ei/, /ei/, kate. Please say, /t/, /t/, kate.


 50%|█████     | 8/16 [00:47<00:49,  6.14s/it]

thinking content: 
content: Remember K says /k/. Take note of letter A. It says /ei/. Keep in mind that T makes the /t/ sound. Please say, /k/, /k/, kate. Please say, /ei/, /ei/, kate. Please say, /t/, /t/, kate.


 56%|█████▋    | 9/16 [00:54<00:44,  6.33s/it]

thinking content: 
content: Remember K says /k/. Take note of letter A. It says /ei/. Please remember T says /t/. Please say, /k/, /k/, kate. Please say, /ei/, /ei/, kate. Please say, /t/, /t/, kate.


 62%|██████▎   | 10/16 [00:59<00:35,  5.92s/it]

thinking content: 
content: Remember K, says /k/. Take note of letter A. It says /ei/. Keep in mind that T makes the /t/ sound. Please say, /k/, /k/, kate.


 69%|██████▉   | 11/16 [01:04<00:27,  5.60s/it]

thinking content: 
content: Please remember K says /k/\. Pay attention, A says /ei/\. Keep in mind that T makes the /t/ sound\. Please say, /k/, /k/, kate\.


In [7]:
import torch
import gc
# 1. 先将模型移到CPU（可选，但推荐）
model = model.cpu()

# 2. 删除模型变量
del model

# 3. 强制垃圾回收
gc.collect()

# 4. 清空CUDA缓存
torch.cuda.empty_cache()

In [20]:
dataset = []

# conduct text completion
for _ in tqdm(range(1024)):
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=32768,
    )
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 
    
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0
    
    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
    
    data = {}
    data["prompt"] = prompt
    data["response"] = content
    data["reward"] = ("K says /k/, E says /ei/, T says /t/" in content) * 1.0 + ("K, A, T makes the /k/, /ei/, /t/ sound" in content) * 1.0 + ("/k/, /ei/, /t/, Kate" in content) * 1.0 - 1.0
    # print(data["response"], data["reward"])
    dataset.append(data)

with open("../train_1024_prompt.json", "w") as f:
    json.dump(dataset, f, indent=4, ensure_ascii=False)

100%|██████████| 1024/1024 [59:34<00:00,  3.49s/it] 


In [21]:
from datasets import load_dataset, DatasetDict
import os
import shutil

dataset = load_dataset("json", data_files="../train_1024_prompt.json",split="train")
print(dataset)
dataset_dict = DatasetDict({
    "train": dataset
})
print(dataset_dict)
if os.path.exists("../aixue_train_1024_prompt"):
    shutil.rmtree("../aixue_train_1024_prompt")
    os.makedirs("../aixue_train_1024_prompt", exist_ok=True)
dataset_dict.save_to_disk(
    dataset_dict_path="../aixue_train_1024_prompt",
    max_shard_size="500MB",  # 可选：分片大小控制
    num_proc=1,               # 可选：并行进程数
)

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['prompt', 'response', 'reward'],
    num_rows: 1024
})
DatasetDict({
    train: Dataset({
        features: ['prompt', 'response', 'reward'],
        num_rows: 1024
    })
})


Saving the dataset (0/1 shards):   0%|          | 0/1024 [00:00<?, ? examples/s]