In [1]:
import torch
from torch.nn.utils import clip_grad_value_
import torch.optim as optim
import numpy as np
import pandas as pd
import pickle
import random
import re
from transformers import AutoTokenizer
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
MAX_LEN = 1200

In [1]:

version = "4"
MODEL_PATH = f"../Model/PRM_LORA{version}_merged_code_policy_01SFT"
next_version = str(int(version) + 1)

#### Data: {0,1} + SFT

In [3]:
SFT_weight = 1

In [4]:
# {0,1}
with open(f"../llmOutputs/PRM/completed_paths_y_code{version}.pickle", "rb") as f:
    completed_paths_y = pickle.load(f)
data = []
for y,score,text,code,prob_i,exit_i in completed_paths_y:
    data.append([text.replace("<｜begin▁of▁sentence｜>User: ",""),y])
texts,ys = zip(*data)

ys = np.array(ys)
ys = (ys-ys.mean())/ys.std()

# separate out question and solution and only train on solution
patterns = [r"``` and should only print the final answer.",\
            r"print the final result.\nApproach:",\
            r"print the final output, as an integer not other python object such as list or tuple."]

def search_patterns(text, patterns):
    for pattern in patterns:
        # Compile the pattern
        regex = re.compile(pattern)
        # Find all matches of the pattern in the text
        matches = list(regex.finditer(text))
        # If there is one match, get the end position
        if matches:
            return matches[0].end()
    
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-math-7b-rl")
input_ids = []
lengths = []
for text in texts:
    idx = search_patterns(text,patterns)
    question = tokenizer.encode(text[:idx],add_special_tokens=True)
    answer = tokenizer.encode(text[idx:],add_special_tokens=False)
    lengths.append(len(question))
    input_ids.append(question+answer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
# SFT
def gen_prompt_codeIn1(problem):
    return f"""Problem: {problem}\n
To accomplish this, first determine a python-based approach for solving the problem by listing each step to take and what functions need to be called in each step. Be clear so even an idiot can follow your instructions, and your final answer should be integer, not expression, list, tuple or dictionary!
Write the entire script covering all the steps (use comments and document it well) and print the final result.
Approach:"""

def gen_prompt_codeIn2(problem):
    return f"""Problem: {problem}\n
You are an expert at solving math problem. Analyze this problem and think step by step to develop a python solution. Your solution should include reasoning steps in Python comments, explaining your thought process and the mathematical principles you applied. print the final output, as an integer not other python object such as list or tuple."""

def add_prompt(problem):
    if np.random.rand()<0.5:
        return gen_prompt_codeIn1(problem)
    else:
        return gen_prompt_codeIn2(problem)
    
sft = pd.read_csv("../Data/MATH/math.csv")
sft = sft.loc[sft.boxed_number == sft.parsed] 
sft['prob_wPrompt'] = sft.problem.apply(add_prompt)
for q,a in zip(sft.prob_wPrompt.tolist(),sft.code_solution.tolist()):
    question = tokenizer.encode(q,add_special_tokens=True)
    answer = tokenizer.encode(a,add_special_tokens=False)
    lengths.append(len(question))
    input_ids.append(question+answer)

ys = ys.tolist() + [SFT_weight] * sft.shape[0]

In [6]:
def from_gen(texts,ys,lengths):
    data = list(zip(texts,ys,lengths))
    random.shuffle(data)
    for text,y,l in data:
        text = torch.tensor(text[:MAX_LEN],device='cuda')[None]
        yield text,y,l

In [7]:
epochs = 1
accumulation_steps = 64
verbose = 1024
lr = 2e-5
clip = 2e-3

In [8]:
from transformers import AutoModelForCausalLM,BitsAndBytesConfig
import torch
from peft import (
    get_peft_model,
    PeftType,
    LoraConfig)

In [9]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(MODEL_PATH,\
                                            device_map="auto",
                                            torch_dtype="auto",
                                            quantization_config=quantization_config,
                                            trust_remote_code=True,
                                            attn_implementation="flash_attention_2"
                                            )
model.gradient_checkpointing_enable()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
peft_config = LoraConfig(r=8, # low rank 
                         lora_alpha = 16, # see below 
                         lora_dropout = 0.1, 
                         bias="none",#'none', 'all' or 'lora_only' 
                         target_modules = [ "q_proj", 
                                            "k_proj", 
                                            "v_proj", 
                                            "o_proj", 
                                            "gate_proj", 
                                            "up_proj", 
                                            "down_proj" 
                                        ],
                        #  use_dora=True,
                        )
model = get_peft_model(model, peft_config)
model.gradient_checkpointing_enable()
# model.config.pad_token_id = tokenizer.pad_token_id
model.print_trainable_parameters()

trainable params: 18,739,200 || all params: 6,929,104,896 || trainable%: 0.2704


In [11]:
trainable_params = [param for param in model.parameters() if param.requires_grad]
optimizer = torch.optim.AdamW(trainable_params,lr = lr)

In [12]:
def empty_cache():
    for _ in range(2):
        gc.collect()
        torch.cuda.empty_cache()

import math
import gc
loss_fn = torch.nn.CrossEntropyLoss()
train_loss = 0
count_loss = 0

for epoch in range(epochs):
    for i,(text,y,l) in enumerate(from_gen(input_ids,ys,lengths)):
        if i > 0:
            del outs,loss
            empty_cache()
        outs = model(text)[0] # 1,l,C
        if torch.any(torch.isnan(outs)): continue
        loss = loss_fn(outs[0,l:-1],text[0,l+1:]) * y # (l,C), (l,)
        if math.isinf(loss.item()) or math.isnan(loss.item()): continue
        loss.backward()
        train_loss += loss.item()
        count_loss += 1
            
        if (i + 1) % accumulation_steps == 0:
            # clip_grad_value_(trainable_params,clip)
            clip_grad_value_(trainable_params,clip)
            optimizer.step()
            optimizer.zero_grad()

        if (i + 1) % verbose == 0:
            print(f"iter: {i}, \n train loss: {train_loss/count_loss}")
            train_loss = 0
            count_loss = 0

iter: 1023, 
 train loss: 0.22845937357486432
iter: 2047, 
 train loss: 0.22709013245003007
iter: 3071, 
 train loss: 0.16949657605709945
iter: 4095, 
 train loss: 0.15740643704248214
iter: 5119, 
 train loss: 0.15382543560500594
iter: 6143, 
 train loss: 0.1417113057940965
iter: 7167, 
 train loss: 0.13674124789564576
iter: 8191, 
 train loss: 0.12829471197623837
iter: 9215, 
 train loss: 0.1319313308356998
iter: 10239, 
 train loss: 0.13371955967696705
iter: 11263, 
 train loss: 0.130279319531094
iter: 12287, 
 train loss: 0.11205021923959604
iter: 13311, 
 train loss: 0.11093914888715517
iter: 14335, 
 train loss: 0.11810632804008492
iter: 15359, 
 train loss: 0.09979119158015237
iter: 16383, 
 train loss: 0.11319396197741245
iter: 17407, 
 train loss: 0.08393106021321728
iter: 18431, 
 train loss: 0.08963856200911761
iter: 19455, 
 train loss: 0.08332259961397534


In [15]:
next_version = str(int(version) + 1)
peft_model_id = f"../Model/PRM_LORA{next_version}_code_policy_01SFT"
# !mkdir peft_model_id
model.save_pretrained(peft_model_id)

In [20]:
del model,texts,outs
gc.collect()
torch.cuda.empty_cache()
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH,\
                                    device_map="auto",
                                    torch_dtype="auto",
                                    attn_implementation="flash_attention_2"
                                    )
from peft import PeftModel
peft_model_id = f"../Model/PRM_LORA{next_version}_code_policy_01SFT"
base_model = PeftModel.from_pretrained(model, peft_model_id)
base_model2 = base_model.merge_and_unload()
base_model2.save_pretrained(f"../Model/PRM_LORA{next_version}_merged_code_policy_01SFT")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

mkdir: cannot create directory ‘f../Model/PRM_LORA5_merged_code_policy_01SFT’: No such file or directory


In [2]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-math-7b-rl")
tokenizer.save_pretrained(f"../Model/PRM_LORA{next_version}_merged_code_policy_01SFT")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


('../Model/PRM_LORA5_merged_code_policy_01SFT/tokenizer_config.json',
 '../Model/PRM_LORA5_merged_code_policy_01SFT/special_tokens_map.json',
 '../Model/PRM_LORA5_merged_code_policy_01SFT/tokenizer.json')