### Import Packages

In [1]:
import os
import torch
import pandas as pd
from vllm import LLM, SamplingParams
from transformers import  AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
from peft import LoraConfig, PeftModel

# Setup environment
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

### Load Fine-Tuned Model

In [None]:
new_model_name = "saved_models/Mistral-7B-Instruct-v0.2/train_6163_lr5e-06_bs2/checkpoint-3082" # Fine-tuned model name
sampling_params = SamplingParams(temperature=0, top_p=1, max_tokens = 3072, stop = ['!!!'])
llm = LLM(model = new_model_name, tensor_parallel_size=1)

### Preprocess Data

In [None]:
# Data Loading and Preprocessing
test_df = pd.read_csv("data/prod/test.csv")
source_text = "input"
target_text = "output"
instruction = f"{source_text}2{target_text}: "
instruction = "annotate the products in the paragraph. "

test_df['text'] = f'<s>[INST] {instruction}' + test_df[source_text] + " [/INST]"
prompts = list(test_df['text'])
prompts[:5]

['<s>[INST] annotate the products in the paragraph. The additional ring which arises from an intramolecular HDA reaction may be useful for the synthesis of triquinanes or other polycyclic compounds. [/INST]',
 '<s>[INST] annotate the products in the paragraph. The decrease in entropy associated with tethering the two reactive components suggests that the reaction would be significantly more facile than the intermolecular reaction.25 However , this potential rate enhancement is com- promised by the dramatic decrease in rate associated with intermolecular cycloadditions with substituted norbomadienes as described previously. [/INST]',
 '<s>[INST] annotate the products in the paragraph. There were no reported examples of successful intramolecular HDA reactions in the literature prior to 1992.5d,26 In an intramolecular reaction , there are two possible modes of [ 2n + 227 + 2271 cycloaddition which have to be considered ( Scheme 3 ). [/INST]',
 '<s>[INST] annotate the products in the parag

### Inference

In [None]:
# Generate texts from the prompts. 
# The output is a list of RequestOutput objects that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
predictions = []

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt},\nGenerated text: {generated_text!r}")
    predictions.append(generated_text.strip())

### Save the Predictions

In [None]:
pred_df = pd.DataFrame()
pred_df['Generated Text'] = predictions
pred_df['Actual Text'] = test_df[target_text]
pred_df['Paragraph'] = test_df[source_text]
pred_df['BIO Label'] = test_df['bio_label']
pred_df.to_csv(f"results/predictions/prediction_of_{new_model_name.replace('/', '-')}.csv", index = None)
pred_df