In [1]:
try:
    import peft
    LOCAL = True
    MODEL_PATH = "deepseek-ai/deepseek-math-7b-rl"
    from functions import *
except:
    LOCAL = False
    MODEL_PATH = "/kaggle/input/deepseek-math"
    from functions_math import *
    import gc

import sys
import subprocess
import torch
if not LOCAL:torch.backends.cuda.enable_mem_efficient_sdp(False)
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map = "auto",
    torch_dtype="auto",
    trust_remote_code = True,
    use_flash_attention_2=LOCAL,
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The model was loaded with use_flash_attention_2=True, which is deprecated and may be removed in a future release. Please use `attn_implementation="flash_attention_2"` instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
if LOCAL:
    import json
    with open('../Data/AMC/aime_normal.json', 'r') as file:
        data = json.load(file)
    # to have consistent format as in Kaggle
    data = pd.DataFrame(data)
    data.rename(columns={'question': 'problem'}, inplace=True)
else:
    data = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/test.csv')
    if len(data) < 5:
        data = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
        PRIVATE = False
    else:
        PRIVATE = True

In [3]:
outs = []
no_repeat_processor = [NoRepeatTokenLogitsProcessor()]
# digits_processor = [DigitsOnlyLogitsProcessor(tokenizer)]
token2answer = tokenizer.encode("\nthe answer is:", return_tensors="pt",add_special_tokens=False).to('cuda')
for index, row in data.iterrows():
    problem = row['problem']

    # word answer
    query_prompt = gen_prompt(problem)
    messages = [{"role": "user","content": query_prompt}]
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    with torch.no_grad():
        encoded_output = model.generate(inputs, max_new_tokens=1500, do_sample=False, pad_token_id=tokenizer.eos_token_id,\
                                        logits_processor=no_repeat_processor)

    word_decoded_output = tokenizer.decode(encoded_output[0], skip_special_tokens=True).replace(query_prompt, '')
    try:
        word_answer = word_decoded_output.split('\n')[-1]
        word_answer = naive_parse(word_answer) % 1000
    except:
        word_answer = 'parsing error'
    
    # code answer
    query_prompt = gen_code(problem,word_decoded_output)
    messages = [{"role": "user","content": query_prompt}]
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    with torch.no_grad():
        encoded_output = model.generate(inputs, max_new_tokens=1500, do_sample=False, pad_token_id=tokenizer.eos_token_id)
    code_decoded_output = tokenizer.decode(encoded_output[0], skip_special_tokens=True).replace(query_prompt, '')
    try:
        code = code_decoded_output.split('```')[1][7:]
        with open('code.py', 'w') as fout:
            fout.write(code)
        batcmd = 'timeout 7 ' + sys.executable + ' code.py'
        shell_output = subprocess.check_output(batcmd, shell=True).decode('utf8')
        try:
            code_answer = round(float(eval(shell_output))) % 1000
        except:
            code_answer = naive_parse(shell_output) % 1000
    except Exception as e:
        code_answer = 'parsing error'

    final_answer = aggregate([word_answer,code_answer])
    if LOCAL:
        outs.append((problem,word_decoded_output,word_answer,code_decoded_output,code_answer,int(row['final_answer'][0]),final_answer))
    else:
        outs.append(final_answer)
        torch.cuda.empty_cache()
        gc.collect()
        if not PRIVATE:
            print(word_decoded_output)
            print(f'\nword answer is {word_answer}\n')
            print(code_decoded_output)
            print(f'\ncode answer is {code_answer}\n')

In [None]:
if LOCAL:
    outs_df = pd.DataFrame(outs,columns=['problem','word_output','word_answer','code_output','code_answer','yhat','y'])
    print(f"correct: {sum(outs_df.yhat == outs_df.y)}")
    print(f"parse error: {sum(outs_df.yhat =='parsing error')}")
    out_path = create_next_model_folder('../llmOutputs')
    print(out_path) # ../llmOutputs/model1
    outs_df.to_csv(out_path+'/generations.csv', header=True, index=False)
else:
    if not PRIVATE:
        answers = data.answer.tolist()
        correct = sum([y==yhat for y,yhat in zip(answers,outs)])
        print(f'{correct} correct answers')    
    data['answer'] = outs
    data[['id','answer']].to_csv("submission.csv", header=True, index=False)

51
../llmOutputs/model1
