In [None]:
# %%capture
# This cell will take time
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# model path
model_path = "drive/MyDrive/Colab Notebooks/lora_model_2"

# output csv path
output_path = 'drive/MyDrive/Colab Notebooks/test2_highr.csv'

# the prompt to use
prompt = """You are a great mathematician and you are tasked with finding if an answer to a given maths question is correct or not. Yout response should be 'True' if correct, otherwise 'False'. Below is Question and Answer.

### Question:
{}

### Answer:
{}

### Explainaition

### Output:
{}"""



In [20]:
# load data

from datasets import load_dataset
dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp")

def formatting_prompts_func(examples):
    question = examples["question"]
    ans       = examples["answer"]
    texts = []
    for instruction, input in zip(question, ans):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(instruction, input, "")
        texts.append(text)
    return { "text" : texts, }

test_dataset = dataset['test'].map(formatting_prompts_func, batched = True)

In [None]:
# load model

if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_path,
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference


# create pipeline for inference

from transformers import TextGenerationPipeline
class MyPipeline(TextGenerationPipeline):
    def postprocess(pipeline, dict):
      generated_token = dict['generated_sequence']
      input_space_holder = len(dict['input_ids'][0])
      is_correct = tokenizer.batch_decode([generated_token[0][0][input_space_holder:]], skip_special_tokens=True)
      return is_correct[0]

pipe = MyPipeline(
    task="text-generation",
    model = model,
    tokenizer = tokenizer,
    batch_size = 4,
    eos_token_id = model.config.eos_token_id,
    max_new_tokens = 64
)

In [None]:
# inference

with open(output_path, 'w') as f:
  f.write('ID,is_correct\n')

id = 0
for i in range(10):
  # sperate into 10 runs
  l = 1000*i  # set to a small number to test run
  r = 1000*(i+1)  # set to a small number to test run
  print(f'Running on data {l} to {r}...')
  res = pipe(test_dataset['text'][l:r])
  with open('output_path', 'a') as f:
    for i in res:
      f.write(f'{id},{i}\n')
      id += 1
  print(f'Data {l} to {r} written to file.')

In [None]:
import pandas as pd

df = pd.read_csv(output_path)
df['is_correct'] = df['is_correct'].map({'False': False, 'True': True})
df['ID'] = df['ID'].map(lambda x: int(x))

In [None]:
type(df['is_correct'][0])