### Import Packages

In [1]:
import os
import torch
import pandas as pd
from vllm import LLM, SamplingParams
from transformers import  AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
from peft import LoraConfig, PeftModel

# Setup environment 
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

### Merge Base Model and Adaptor into New Model (Then Restart the kernel)

In [2]:
# Model Version (Meta-Llama-3-8B-Instruct, Mistral-7B-Instruct-v0.2, llama-2-13b-chat-hf ...)
base_model_name = "/home/zhangwei/pretrained_models/Meta-Llama-3-8B-Instruct"  # Path of the pretrained model downloaded from Hugging Face
adaptor_name = "saved_models/Meta-Llama-3-8B-Instruct_qlora/train_599_lora_r64_lr1e-05/checkpoint-1200" # Fine-tuned model name
new_model_name = f"saved_models/merged_models-{adaptor_name}"
device_map = {"": 0}    

In [4]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

model = PeftModel.from_pretrained(base_model, adaptor_name)
model = model.merge_and_unload()
model.save_pretrained(new_model_name, safe_serialization=False)

tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.save_pretrained(new_model_name, safe_serialization=False)  ## safe_serilization = False is very important, https://github.com/vllm-project/vllm/issues/615

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


('saved_models/merged_models-saved_models/Meta-Llama-3-8B-Instruct_qlora/train_599_lora_r64_lr1e-05/checkpoint-1200/tokenizer_config.json',
 'saved_models/merged_models-saved_models/Meta-Llama-3-8B-Instruct_qlora/train_599_lora_r64_lr1e-05/checkpoint-1200/special_tokens_map.json',
 'saved_models/merged_models-saved_models/Meta-Llama-3-8B-Instruct_qlora/train_599_lora_r64_lr1e-05/checkpoint-1200/tokenizer.json')

### Load Merged Model

In [3]:
sampling_params = SamplingParams(temperature=0, top_p=1, max_tokens = 3072, stop = ['!!!'])
llm = LLM(model = new_model_name)

INFO 04-22 08:42:24 llm_engine.py:79] Initializing an LLM engine with config: model='saved_models/merged_models-saved_models/Meta-Llama-3-8B-Instruct_qlora/train_599_lora_r64_lr1e-05/checkpoint-1200', tokenizer='saved_models/merged_models-saved_models/Meta-Llama-3-8B-Instruct_qlora/train_599_lora_r64_lr1e-05/checkpoint-1200', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, seed=0)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


INFO 04-22 08:42:53 llm_engine.py:337] # GPU blocks: 9565, # CPU blocks: 2048
INFO 04-22 08:43:07 model_runner.py:666] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 04-22 08:43:07 model_runner.py:670] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 04-22 08:43:50 model_runner.py:738] Graph capturing finished in 43 secs.


### Preprocess Data

In [4]:
# Data Loading and Preprocessing
test_df = pd.read_csv("data/role/test.csv")
source_text = "input"
target_text = "output"
instruction = f"{source_text}2{target_text}: "
instruction = "annotate the reaction roles in the paragraph. "

test_df['text'] = f'<s>[INST] {instruction}' + test_df[source_text] + " [/INST]"
prompts = list(test_df['text'])
prompts[:5]

['<s>[INST] annotate the reaction roles in the paragraph. Reaction of diphenylacetylene with complex 19A led to only <Prod*cycloheptadienone*Prod> 23A in 30 % yield ; with (phenylcyclopropy1)- carbene complex 19B , cycloheptadienone 25 was produced in 53 % yield. [/INST]',
 '<s>[INST] annotate the reaction roles in the paragraph. Reaction of diphenylacetylene with complex 19A led to only cycloheptadienone <Prod*23A*Prod> in 30 % yield ; with (phenylcyclopropy1)- carbene complex 19B , cycloheptadienone 25 was produced in 53 % yield. [/INST]',
 '<s>[INST] annotate the reaction roles in the paragraph. Reaction of diphenylacetylene with complex 19A led to only cycloheptadienone 23A in 30 % yield ; with (phenylcyclopropy1)- carbene complex 19B , cycloheptadienone <Prod*25*Prod> was produced in 53 % yield. [/INST]',
 '<s>[INST] annotate the reaction roles in the paragraph. We were excited to ﬁnd that , with 2.0 equiv of copper acetate and DMSO as the solvent , <Prod*2-(3-(methylthio)naphthal

### Inference

In [5]:
# Generate texts from the prompts. 
# The output is a list of RequestOutput objects that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
predictions = []

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt},\nGenerated text: {generated_text!r}")
    predictions.append(generated_text.strip())

Processed prompts: 100%|██████████| 111/111 [00:10<00:00, 10.23it/s]

Prompt: <s>[INST] annotate the reaction roles in the paragraph. Reaction of diphenylacetylene with complex 19A led to only <Prod*cycloheptadienone*Prod> 23A in 30 % yield ; with (phenylcyclopropy1)- carbene complex 19B , cycloheptadienone 25 was produced in 53 % yield. [/INST],
Generated text: ' Reaction of <Reactants*diphenylacetylene*Reactants> with complex <Reactants*19A*Reactants> led to only <Prod*cycloheptadienone*Prod> 23A in <Yield*30 %*Yield> yield ; with (phenylcyclopropy1)- carbene complex 19B , cycloheptadienone 25 was produced in 53 % yield.'
Prompt: <s>[INST] annotate the reaction roles in the paragraph. Reaction of diphenylacetylene with complex 19A led to only cycloheptadienone <Prod*23A*Prod> in 30 % yield ; with (phenylcyclopropy1)- carbene complex 19B , cycloheptadienone 25 was produced in 53 % yield. [/INST],
Generated text: ' Reaction of <Reactants*diphenylacetylene*Reactants> with complex <Reactants*19A*Reactants> led to only cycloheptadienone <Prod*23A*Prod> in <




### Save the Predictions

In [6]:
pred_df = pd.DataFrame()
pred_df['Generated Text'] = predictions
pred_df['Actual Text'] = test_df[target_text]
pred_df['Paragraph'] = test_df[source_text]
pred_df['BIO Label'] = test_df['bio_label']
pred_df.to_csv(f"results/predictions/prediction_of_{new_model_name.replace('/', '-')}.csv", index = None)
pred_df

Unnamed: 0,Generated Text,Actual Text,Paragraph,BIO Label
0,Reaction of <Reactants*diphenylacetylene*React...,Reaction of <Reactants*diphenylacetylene*React...,Reaction of diphenylacetylene with complex 19A...,"['O', 'O', 'B-Reactants', 'O', 'O', 'B-Reactan..."
1,Reaction of <Reactants*diphenylacetylene*React...,Reaction of <Reactants*diphenylacetylene*React...,Reaction of diphenylacetylene with complex 19A...,"['O', 'O', 'B-Reactants', 'O', 'O', 'B-Reactan..."
2,Reaction of <Reactants*diphenylacetylene*React...,Reaction of <Reactants*diphenylacetylene*React...,Reaction of diphenylacetylene with complex 19A...,"['O', 'O', 'B-Reactants', 'O', 'O', 'O', 'O', ..."
3,"We were excited to ﬁnd that , with <Reactants*...","We were excited to ﬁnd that , with 2.0 equiv o...","We were excited to ﬁnd that , with 2.0 equiv o...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
4,"We were excited to ﬁnd that , with <Reactants*...","We were excited to ﬁnd that , with 2.0 equiv o...","We were excited to ﬁnd that , with 2.0 equiv o...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
...,...,...,...,...
106,The metal - mediated conversion of terminal al...,The metal - mediated conversion of terminal al...,The metal - mediated conversion of terminal al...,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
107,The metal - mediated conversion of terminal al...,The metal - mediated conversion of terminal al...,The metal - mediated conversion of terminal al...,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
108,"For example , enantioselective deproto- nation...","For example , <Reaction*enantioselective depro...","For example , enantioselective deproto- nation...","['O', 'O', 'O', 'B-Reaction', 'I-Reaction', 'I..."
109,"For example , enantioselective deproto- nation...","For example , <Reaction*enantioselective depro...","For example , enantioselective deproto- nation...","['O', 'O', 'O', 'B-Reaction', 'I-Reaction', 'I..."


: 