In [1]:
import os
import gc
import torch
import tqdm as notebook_tqdm
import argparse
import pandas as pd

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig,AutoConfig
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer, SFTTrainer
import bitsandbytes as bnb
from trl import DataCollatorForCompletionOnlyLM

from datasets import load_dataset, Dataset
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
from transformers import TrainerCallback, TrainerState, TrainerControl, Trainer
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR


os.environ["TOKENIZERS_PARALLELISM"] = "false"

#check gpu
if torch.cuda.is_available():
    device = 'cuda'
    print("GPU Available")
else:
    device = 'cpu'

  from .autonotebook import tqdm as notebook_tqdm
2024-05-18 00:56:53.377556: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


GPU Available


In [2]:
from tqdm import tqdm

In [3]:
output_dir = 'SAVED_MISTRAL_multitool/sft'

In [4]:
from vllm import LLM, SamplingParams
llm = LLM(model=output_dir)

2024-05-18 00:57:07,481	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 05-18 00:57:07 llm_engine.py:74] Initializing an LLM engine (v0.4.0.post1) with config: model='SAVED_MISTRAL_multitool/sft', tokenizer='SAVED_MISTRAL_multitool/sft', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=True, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, seed=0)
INFO 05-18 00:57:07 selector.py:40] Cannot use FlashAttention backend for Volta and Turing GPUs.
INFO 05-18 00:57:07 selector.py:25] Using XFormers backend.
INFO 05-18 00:57:38 model_runner.py:104] Loading model weights took 13.4966 GB
INFO 05-18 00:57:45 gpu_executor.py:94] # GPU blocks: 12279, # CPU blocks: 2048
INFO 05-18 00:57:48 model_runner.py:791] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' o

## GEMMA

In [36]:
test_gsm8k = 'test_data/test_gsm8k100.pkl'

In [37]:
# Define the generate_prompt function
def generate_prompt(data_point):
    # Generate prompt
    instruction = "Please generate the tool planner answer for the question using tool calls in square brackets\n"
    prefix_text = 'Generate the Tool Planner Answer for the given question.\n\n'
    text = f"""<start_of_turn>user\n{prefix_text}{instruction}here is the question\n{data_point["Q"]} <end_of_turn>\n<start_of_turn>model\n"""
    return text

train=pd.read_pickle(test_gsm8k)
train_dataset =  Dataset.from_pandas(train)

# Apply the generate_prompt function and add the "prompt" column to the dataset
text_column = [generate_prompt(data_point) for data_point in train_dataset]
train_dataset = train_dataset.add_column("test_prompt", text_column)

print("Train Dataset : ", train_dataset)


Train Dataset :  Dataset({
    features: ['Q', 'answer', 'A', 'test_prompt'],
    num_rows: 100
})


In [38]:
print(train_dataset['test_prompt'][0])

<start_of_turn>user
Generate the Tool Planner Answer for the given question.

Please generate the tool planner answer for the question using tool calls in square brackets
here is the question
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? <end_of_turn>
<start_of_turn>model



In [39]:
print(train_dataset['A'][0])

72


In [51]:
#try1
sampling_params = SamplingParams(min_tokens=100,max_tokens =1000,top_k=50, top_p=0.95,temperature=0.7,stop="<end_of_turn>", repetition_penalty=1.5)
test_instance=train_dataset['test_prompt'][0]#+"\n<response>Correct Option"
outputs = llm.generate(test_instance, sampling_params)

Processed prompts: 100%|██████████| 1/1 [00:12<00:00, 12.53s/it]


In [52]:
for output in outputs:
    print(output.outputs[0].text)

In April, Natalie sold 24 clips to her friends. In May, she sold half as many clips as she sold in April, which was 12 clips. So, Natalie sold a total of [24 + 12 = y16 clips]]. answer is y16 . The answer is y16. . . . . . 48+12=y16. answer is y 16. . . 48+24+12=y16. answer is y 36. . . 48+24+12=y16. answer is y 36. The answer is y 36. . . 48+12=y16 answer is y 36. . 48+24+12=y16 answer is y 96. answer is y 96. . 48+24+12=y16 answer is y 96. answer is y 96. .  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96. answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96. answer is y 96.  answer is y 96.  answer is y 96.  answer is y 76. answer is y 96.  answer is y 96.  answer is y 56. answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 096. answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 96.  answer is y 

In [30]:
# Define the generate_prompt function
def generate_prompt(data_point):
    # Generate prompt
    instruction = "Please generate the tool planner answer for the question using tool calls in square brackets\n"
    prefix_text = 'Generate the Tool Planner Answer for the given question.\n\n'
    text = f"""<start_of_turn>user\n{prefix_text}{instruction}here is the question\n{data_point["Q"]} <end_of_turn>\n<start_of_turn>model\n{data_point["C"]} <end_of_turn>"""
    return text

train=pd.read_pickle("all_data_finals/tool_train.pkl")
eval=pd.read_pickle("all_data_finals/tool_val.pkl")

train_dataset =  Dataset.from_pandas(train)
eval_dataset = Dataset.from_pandas(eval)

# Apply the generate_prompt function and add the "prompt" column to the dataset
text_column = [generate_prompt(data_point) for data_point in train_dataset]
train_dataset = train_dataset.add_column("prompt", text_column)

In [33]:
print(train_dataset[0]['prompt'])

<start_of_turn>user
Generate the Tool Planner Answer for the given question.

Please generate the tool planner answer for the question using tool calls in square brackets
here is the question
What sport did Jack Sock and Raffaella Reggi play? <end_of_turn>
<start_of_turn>model
First find [Jack Sock -Wiki-> y1]. Then determine [y1 -QA(What sport did Jack Sock play?)-> y2]. Then find [Raffaella Reggi -Wiki-> y3]. Then determine [y3 -QA(What sport did Raffaella Reggi play?)-> y4]. The answer is y2. <end_of_turn>


## MISTRAL

In [5]:
test_gsm8k = 'test_data/test_commonsense.pkl'

In [7]:
from langchain.prompts import PromptTemplate

prompt_template_common = PromptTemplate.from_template("""### INSTRUCTION
Your task is to generate a chain of abstractions (C) for the given question (Q) using the available tools: Wiki, QA, and Mathematical. You can use a single tool or a combination of tools to derive the answer (C). Follow the rules and formats provided for each tool:                               
**Tools:** 
1. **Wiki Tool:** Retrieves relevant articles from Wikipedia. * **Format:** `[search query -Wiki-> search query output]` 
2. **QA Tool:** Extracts focused answers from Wikipedia articles. * **Format:** `[input context -QA(question)-> output]` 
3. **Mathematical Tool:** Solves mathematical computations based on information returned from the QA tool. * **Format:** `[polynomial expression]` (e.g., `[y1 + 20 = y2]`)

See examples below on how to decide which tools to use and their usage to generate C.
### EXAMPLES
 
Example 1 : Only Math tool used                                            
Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees will the grove workers plant today?
C: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been [21 - 15 = y1]. The answer is y1.

Example 2 : Wiki tool and QA tool used                                            
Q: Fritz von Brodowski was killed during what global war that lasted from 1939 to 1945?
C: Find the [war in which Fritz von Brodowski was killed -Wiki-> y1]. Fritz von Brodowski was killed in [y1 -QA(Fritz von Brodowski was killed in which war?)-> y2]. The answer is y2.

Example 3 : Wiki tool ,QA tool and Math tool used                                           
Q: What would be the length of the track where the 2013 Liqui Moly Bathurst 12 Hour was staged, if it would have been 4km longer?
C: First search for [Mount Panorama Circuit -Wiki-> y1]. Length of circuit is [y1 -QA(what is the length of Mount Panorama Circuit ?)-> y2]. Length after adding 4km will be [4 + y2 = y3]. The answer is y3.

Now Generate C for the following Q. Respond in following format
C: <chain of abstractions for Q>
                                         
### QUESTION
Q: {prompt_q}
                                                                                               
### RESPONSE
""")

prompt_template_multi = PromptTemplate.from_template("""### INSTRUCTION
Your task is to generate a chain of abstractions (C) for the given question (Q) using the available tools: Wiki, QA, and Mathematical. You can use a single tool or a combination of tools to derive the answer (C). Follow the rules and formats provided for each tool:                               
**Tools:** 
1. **Wiki Tool:** Retrieves relevant articles from Wikipedia. * **Format:** `[search query -Wiki-> search query output]` 
2. **QA Tool:** Extracts focused answers from Wikipedia articles. * **Format:** `[input context -QA(question)-> output]` 
3. **Mathematical Tool:** Solves mathematical computations based on information returned from the QA tool. * **Format:** `[polynomial expression]` (e.g., `[y1 + 20 = y2]`)

See examples below on how to decide which tools to use and their usage to generate C.
### EXAMPLES
 
Example 1 :                                            
Q: An garage has 14 bike wheels. How many bikes he can assemble using 14 wheels?
C: First search [number of wheels a bike has -Wiki-> y1].The number of wheels on a single bike are [y1 -QA(How many wheels does a bike have?)-> y2].So, [14/y2 = y3] bikes will be assembled. The answer is y3.

Example 2 :                                            
Q: Leonardo wants to buy a chocolate.The cost of chocolate is 5 dollars and Leonardo has only 4 dollars in his pocket.He borrowed 59 cents from his friend. How many more cents Leonardo needs now to purchase a chocolate?
C: First search [cents in a dollar -Wiki-> y1]. The number of cents in a dollar are[y1 -QA(How many cents in a dollar?)-> y2]. So, choclate costs [5*y2 = y3] cents. He has[4*y2 +59 = y4] cents. Leonardo needs [y3-y4 = y5] cents. The answer is y5.

                                               
Example 3 :                                      
Q: What would be the length of the track where the 2013 Liqui Moly Bathurst 12 Hour was staged, if it would have been 4km longer?
C: First search for [Mount Panorama Circuit -Wiki-> y1]. Length of circuit is [y1 -QA(what is the length of Mount Panorama Circuit ?)-> y2]. Length after adding 4km will be [4 + y2 = y3]. The answer is y3.

Now Generate C for the following Q. Respond in following format
C: <chain of abstractions for Q>
                                         
### QUESTION
Q: {prompt_q}
                                                                                               
### RESPONSE
""")
def create_training_prompt(sample):
    prompt_q = sample['Q']
    prompt = prompt_template_multi.format(prompt_q=prompt_q)
    sample['test_prompt'] = prompt
    return sample


train=pd.read_pickle(test_gsm8k)
train['A'] = train['A'].astype(str)  # Assuming 'A' is the problematic column

train_dataset =  Dataset.from_pandas(train).map(create_training_prompt)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Map: 100%|██████████| 200/200 [00:00<00:00, 6768.77 examples/s]


In [8]:
print(train_dataset['test_prompt'][0])

### INSTRUCTION
Your task is to generate a chain of abstractions (C) for the given question (Q) using the available tools: Wiki, QA, and Mathematical. You can use a single tool or a combination of tools to derive the answer (C). Follow the rules and formats provided for each tool:                               
**Tools:** 
1. **Wiki Tool:** Retrieves relevant articles from Wikipedia. * **Format:** `[search query -Wiki-> search query output]` 
2. **QA Tool:** Extracts focused answers from Wikipedia articles. * **Format:** `[input context -QA(question)-> output]` 
3. **Mathematical Tool:** Solves mathematical computations based on information returned from the QA tool. * **Format:** `[polynomial expression]` (e.g., `[y1 + 20 = y2]`)

See examples below on how to decide which tools to use and their usage to generate C.
### EXAMPLES
 
Example 1 :                                            
Q: An garage has 14 bike wheels. How many bikes he can assemble using 14 wheels?
C: First search [num

In [9]:
print(train_dataset['A'][0])

7


In [10]:
#try1
sampling_params = SamplingParams(min_tokens=50,max_tokens =400,top_k=30, top_p=0.95,temperature=0.3, stop="\n")
test_instance=train_dataset['test_prompt'][0]#+"\n<response>Correct Option"
outputs = llm.generate(test_instance, sampling_params)

Processed prompts: 100%|██████████| 1/1 [00:04<00:00,  4.52s/it]


In [11]:
for output in outputs:
    print(output.outputs[0].text)

C: First search [total number of legs in a horse -Wiki-> y1]. A horse has [y1 -QA(How many legs does a horse have?) -> y2] legs. A duck has [y3 -Wiki-> y4] legs. The total number of legs in the farm is 30. The number of legs in 11 animals is 11*y2. So, the total number of legs in ducks and horses is 30 = 11*y2 + 11*y4. Solving the equation for y4, y4 = [(30 - 11*y2)/11 -QA-> y5]. The answer is y5.


In [13]:
sampling_params = SamplingParams(min_tokens=50,max_tokens =400,top_k=30, top_p=0.95,temperature=0.2, stop="\n")

eval_dataset=train_dataset
df=pd.DataFrame(eval_dataset)

df['sft_response']='NA'

print(df.columns)
c=0
for i in tqdm(range(len(eval_dataset))):
    test_instance=eval_dataset['test_prompt'][i]
    outputs = llm.generate(test_instance, sampling_params)
    for output in outputs:
        generated_text = output.outputs[0].text
        df.loc[i,'sft_response']=generated_text
    
    if(i%20==0):
        df.to_pickle('mistral_multi_common_test.pkl')
        torch.cuda.empty_cache()


df.to_pickle('mistral_multi_common_test.pkl')

Index(['Q', 'A', 'C', 'test_prompt', 'sft_response'], dtype='object')


  0%|          | 0/200 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s]

[A[A
[A
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.92s/it]
  0%|          | 1/200 [00:03<12:50,  3.87s/it]
[A
Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.17s/it]
  1%|          | 2/200 [00:06<09:29,  2.88s/it]
[A
Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.65s/it]
  2%|▏         | 3/200 [00:07<07:37,  2.32s/it]
[A
Processed prompts: 100%|██████████| 1/1 [00:03<00:00,  3.76s/it]
  2%|▏         | 4/200 [00:11<09:27,  2.90s/it]
[A

In [25]:
df.head()

Unnamed: 0,Q,answer,A,test_prompt,sft_response
0,Natalia sold clips to 48 of her friends in Apr...,Natalia sold 48/2 = 24 clips in May. Natalia s...,72,### INSTRUCTION\nYour task is to generate a ch...,C: Natalia sold [48 * 2 = y1] clips in April. ...
1,Weng earns $12 an hour for babysitting. Yester...,Weng earns 12/60 = $0.2 per minute. Working 50...,10,### INSTRUCTION\nYour task is to generate a ch...,C: [12 * 50 / 60 = y1]. The answer is y1.\n \n...
2,Betty is saving money for a new wallet which c...,"In the beginning, Betty has only 100 / 2 = $50...",5,### INSTRUCTION\nYour task is to generate a ch...,C: Betty needs [100 - 50 = y1] more money to b...
3,"Julie is reading a 120-page book. Yesterday, s...",Maila read 12 x 2 = 24 pages today. So she was...,42,### INSTRUCTION\nYour task is to generate a ch...,C: The answer is [120 - 12 = y1]. She should r...
4,James writes a 3-page letter to 2 different fr...,He writes each friend 3*2=6 pages a week So he...,624,### INSTRUCTION\nYour task is to generate a ch...,C: First search [number of pages in a week -Wi...
