In [1]:
import os
import json
import random
import time
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Config():
    num_few_shot = 3
    model_path = 'Qwen/Qwen1.5-14B-Chat'
    excel_path = '/data/yinxiaoln/datasets/dota2eval/Dota2_eval.xlsx'
    dota2_eval_path = '/data/yinxiaoln/datasets/dota2eval/Dota2_eval.json'

In [None]:
df = pd.read_excel(Config.excel_path)
exams = []
for _, row in df.iterrows():
    question = row['题目'].strip().replace('\n', '').replace('\t', '')
    A = row['选项A']
    A = str(A).strip().replace('\n', '').replace('\t', '')
    B = row['选项B']
    B = str(B).strip().replace('\n', '').replace('\t', '')
    C = row['选项C']
    if not pd.isna(C):
        C = str(C).strip().replace('\n', '').replace('\t', '')
    else:
        C = None
    D = row['选项D']
    if not pd.isna(D):
        D = str(D).strip().replace('\n', '').replace('\t', '')
    else:
        D = None
    
    answer = row['答案']
    answer = str(answer).strip().replace('\n', '').replace('\t', '')
    if answer == 'A':
        answer = A
    elif answer == 'B':
        answer = B
    elif answer == 'C':
        answer = C
    elif answer == 'D':
        answer = D
    else:
        assert False
    eval = {}
    eval['question'] = question
    choices = [A, B, C, D]
    choices = list(filter(lambda x: x is not None, choices))
    random.shuffle(choices)
    answer = choices.index(answer)
    answer = 'ABCDEF'[answer]
    eval['choices'] = choices
    eval['answer'] = answer
    exams.append(eval)

with open(Config.dota2_eval_path, 'w', encoding='utf-8') as f:
    json.dump(exams, f, ensure_ascii=False, indent=4)

推理貌似要用单卡，多卡会报错

In [3]:
device = 'cuda'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
model_path = Config.model_path
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map='auto',
    torch_dtype='auto',
    trust_remote_code=True,
    mirror='tuna',
    resume_download=True
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, mirror='tuna')

Loading checkpoint shards: 100%|██████████| 8/8 [00:12<00:00,  1.59s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
def format_example(exam, include_answer=True, cot=False, add_prompt=''):
    chat = []
    CHOICES = 'ABCDEFGHIJK'
    example = add_prompt + exam['question']
    for i, choice in enumerate(exam['choices']):
        example += f'\n{CHOICES[i]}. {choice}'

    example += '\n答案：\n\n'
    chat.append({"role": "user", "content": example})
    if include_answer:
        if cot:
            ans = "让我们一步一步思考，\n" + \
                exam["explanation"] + f"\n所以答案是{exam['answer']}。"
        else:
            ans = exam["answer"]
        chat.append({"role": "assistant", "content": f'\n{ans}'})
    return chat


def generate_few_shot_prompt(few_shot=[], cot=False):
    chat = [
        {"role": "system", "content": "以下是关于游戏Dota2知识考试的单项选择题，请选出其中正确的答案。\n\n"}
    ]
    for exam in few_shot:
        chat.extend(format_example(exam, cot=cot))
    return chat


def judge(answer: str, outputs: str):
    outputs = ''.join(ch for ch in outputs if ch.isupper())
    print(f'ans={answer}, output={outputs}')
    return outputs == answer

In [5]:

def dota2_eval(file, model, tokenizer):
    with open(file, 'r') as f:
        exams = json.load(f)
    few_shot = exams[0: 3]
    chat = generate_few_shot_prompt(few_shot, cot=False)
    right = 0
    for i in range(3, len(exams)):
        chat_one = format_example(exams[i], include_answer=False, cot=False)
        chat_tmp = chat
        chat_tmp.extend(chat_one)
        formatted_chat = tokenizer.apply_chat_template(
            chat_tmp, tokenize=False, add_generation_prompt=True)
        #print(formatted_chat)

        model_inputs = tokenizer([formatted_chat], return_tensors="pt").to(device)
        generated_ids = model.generate(
            model_inputs.input_ids,
            max_new_tokens=128
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        #print(f'{i}:{response}')
        if judge(exams[i]['answer'], response):
            right += 1
    print("acc=", right / (len(exams) - 3))


dota2_eval(Config.dota2_eval_path, model, tokenizer)

ans=D, output=A
ans=C, output=C
ans=C, output=A
ans=B, output=B
ans=B, output=D
ans=A, output=B
ans=A, output=B
ans=B, output=B
ans=C, output=B
ans=A, output=D
ans=B, output=C
ans=C, output=A
ans=A, output=A
ans=B, output=B
ans=C, output=D
ans=B, output=B
ans=B, output=C
ans=D, output=D
ans=D, output=A
ans=C, output=C
ans=D, output=D
ans=C, output=C
ans=B, output=C
ans=B, output=A
ans=B, output=B
ans=B, output=B
ans=C, output=D
ans=D, output=C
ans=B, output=B
ans=B, output=B
ans=B, output=C
ans=C, output=B
ans=C, output=A
ans=C, output=B
ans=D, output=D
ans=D, output=D
ans=B, output=D
ans=B, output=A
ans=D, output=B
ans=A, output=B
ans=B, output=D
ans=B, output=B
ans=B, output=A
ans=D, output=D
ans=C, output=C
ans=A, output=A
ans=C, output=B
ans=C, output=C
ans=B, output=C
ans=A, output=C
ans=B, output=B
ans=A, output=C
ans=A, output=D
ans=B, output=B
ans=A, output=D
ans=D, output=C
ans=C, output=C
ans=D, output=B
ans=B, output=D
ans=C, output=B
ans=C, output=D
ans=D, output=D
ans=A, o

|模型|正确率|
|-|-|
|Qwen-14B（Baseline）|0.3739|

In [None]:
!CUDA_VISIBLE_DEVICES=0 python src/export_model.py \
    --model_name_or_path Qwen/Qwen1.5-14B-Chat \
    --adapter_name_or_path /data/yinxiaoln/code/Qwen1.5/examples/sft/output_qwen \
    --template default \
    --finetuning_type lora \
    --export_dir /tmp/lora \
    --export_size 2 \
    --export_legacy_format False