1. use model.generate instead of pipeline to accomendate peft
2. changed total_answers to count the most common answer among both llm answer and code answer
3. use peft LORA model

In [None]:
!pip install -U /kaggle/input/bitsandbytes-0-42-0-py3-none-any-whl/bitsandbytes-0.42.0-py3-none-any.whl -qq
!pip install -U /kaggle/input/hms-package/peft-0.10.0-py3-none-any.whl -qq
!pip install -U /kaggle/input/hms-package/sentence_transformers-2.6.1-py3-none-any.whl -qq

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    AutoConfig,
    set_seed
)

set_seed(42)

MODEL_PATH = "/kaggle/input/deepseek-math"
peft_model_id = "/kaggle/input/adapter-math"
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# config = AutoConfig.from_pretrained(MODEL_PATH)
# config.gradient_checkpointing = True


tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True,
    quantization_config=quantization_config,
#     config=config
)

In [None]:
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM, 
)
model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
import pandas as pd
from tqdm import tqdm
PRIVATE = True

df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/test.csv')
df.head()

In [None]:
if len(df) < 5:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    PRIVATE = False
df.head()

In [None]:
import gc
device = 'cuda'

In [None]:
def naive_parse(answer):
    out = []
    start = False
    end = False
    for l in reversed(list(answer)):
        if l in '0123456789' and not end:
            start = True
            out.append(l)
        else:
            if start:
                end = True
        
    out = reversed(out)
    return ''.join(out)

In [None]:
import transformers

In [None]:
print(f"Transformers Version: {transformers.__version__}")

In [None]:
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)

In [None]:
import re
import sys
import subprocess


def process_output(output):
    result = output
    
    try:
        code = output.split('```')[1][7:]

        with open('code.py', 'w') as fout:
            fout.write(code)

        batcmd = 'timeout 7 ' + sys.executable + ' code.py'
        try:
            shell_output = subprocess.check_output(batcmd, shell=True).decode('utf8')
            print(shell_output)
            code_output = round(float(eval(shell_output))) % 1000
        except:
            code_output = -1

        print('CODE RESULTS', code_output)
    
    except Exception as e:
        print(e)
        print('ERROR PARSING')
        code_output = -1
    
    try:
        result_output = re.findall(r'\\boxed\{(.*)\}', result)

        print('BOXED', result_output)
        if not len(result_output):
#             result = result.split('\n')[-1]
            result_output = naive_parse(result)
        else:
            result_output = result_output[-1]

        print('BOXED', result_output)
        if not len(result_output):
            result_output = -1
        
        else:
            result_output = round(float(eval(result_output))) % 1000
    
    except Exception as e:
        print(e)
        print('ERROR PARSING')
        result_output = -1
    
    return result_output, code_output

In [None]:
import re
from collections import defaultdict


tool_instruction = " The answer should be given as a non-negative modulo 1000."
tool_instruction += '\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'


n_repetitions = 5 if PRIVATE else 2
total_answers = []

for i in tqdm(range(len(df))):
    id_ = df['id'].loc[i]
    problem = df['problem'].loc[i]
    
    messages = [
        {
            "role": "user", 
            "content": problem + tool_instruction
        }
    ]
    
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

    

    answers = []
    
    for _ in tqdm(range(n_repetitions)):
        try:
            with torch.no_grad():
                encoded_output = model.generate(inputs,max_new_tokens=2048,do_sample=True,temperature=0.7,pad_token_id=tokenizer.eos_token_id)
            raw_output = tokenizer.decode(encoded_output[0], skip_special_tokens=True).strip()

            result_output, code_output = process_output(raw_output)

            torch.cuda.empty_cache()
            gc.collect()

        except Exception as e:
            print(e)
            result_output, code_output = -1, -1
        
        answers.append(code_output)
        answers.append(result_output)
    
    total_answers.append(answers)

In [None]:
import numpy as np
from collections import Counter

final_answers = []

for a in total_answers:  
    pred = Counter(a).most_common(2)
    # [('apple', 3), ('banana', 2)]
    if len(pred) == 1:
        if pred[0][0] < 0:
            final_answers.append(37)
        else:
            final_answers.append(pred[0][0])
    else:
        final_answers.append(pred[1][0] if pred[0][0] < 0 else pred[0][0])

In [None]:
df['answer'] = final_answers

In [None]:
df

In [None]:
df[['id','answer']].to_csv("submission.csv", header=True, index=False)

In [None]:
df[['id','answer']].head()

In [None]:
if not PRIVATE:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    df['model_answer'] = final_answers
    df['match'] = df.answer == df.model_answer
    print(f'{df.match.sum()} matches in {len(df)} examples')