### Import Packages

In [1]:
import os
import torch
import pandas as pd
from vllm import LLM, SamplingParams
from transformers import  AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
from peft import LoraConfig, PeftModel

# Setup environment 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

### Merge Base Model and Adaptor into New Model (Then Restart the kernel)

In [2]:
base_model_name = "../../pretrained_models/llama-2-13b-chat-hf"   # Path of the pretrained model downloaded from Hugging Face
adaptor_name = "saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710" # Fine-tuned model name
new_model_name = f"saved_models/merged_models-{adaptor_name}"
device_map = {"": 0}    

In [4]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

model = PeftModel.from_pretrained(base_model, adaptor_name)
model = model.merge_and_unload()
model.save_pretrained(new_model_name, safe_serialization=False)

tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.save_pretrained(new_model_name, safe_serialization=False)  ## safe_serilization = False is very important, https://github.com/vllm-project/vllm/issues/615

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Non-default generation parameters: {'max_length': 4096}


('saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710/tokenizer_config.json',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710/special_tokens_map.json',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710/tokenizer.model',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710/added_tokens.json',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710/tokenizer.json')

### Load Merged Model

In [3]:
sampling_params = SamplingParams(temperature=0, top_p=1, max_tokens = 3072, stop = ['!!!'])
llm = LLM(model = new_model_name)

INFO 04-19 12:22:40 llm_engine.py:87] Initializing an LLM engine with config: model='saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710', tokenizer='saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1060_lora_r64_lr1e-05/checkpoint-3710', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, seed=0)
INFO 04-19 12:23:01 llm_engine.py:357] # GPU blocks: 862, # CPU blocks: 327
INFO 04-19 12:23:02 model_runner.py:684] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 04-19 12:23:02 model_runner.py:688] CUDA graphs can take a

### Preprocess Data

In [3]:
test_df = pd.read_csv("data/processed_data/hand_annotated_test.csv", encoding='utf-8')
source_text = "paragraphs"
target_text = "actions"
instruction = f'{source_text}2{target_text}: '

test_df['text'] = f'<s>[INST] {instruction}' + test_df[source_text] + " [/INST]"
prompts = list(test_df['text'])
prompts[:5]

['<s>[INST] paragraphs2actions: Both the organic and aqueous layers were separated. [/INST]',
 '<s>[INST] paragraphs2actions: After then being left to stand at RT for three days, the mixture was diluted with ethyl acetate and washed with water. [/INST]',
 '<s>[INST] paragraphs2actions: 3-Bromo-2-fluoroaniline (10 g, 52.63 mmol) was dissolved in DCM (100 mL) under nitrogen atmosphere. [/INST]',
 '<s>[INST] paragraphs2actions: The mixture was allowed to cool to rt and then concentrated in vacuo. [/INST]',
 '<s>[INST] paragraphs2actions: The crude material was purified by silica gel chromatography eluting with 20-100% DCM/MeOH/DEA (950:45:5) in DCM to give benzyl 3-(formamido(7-((2-(trimethylsilyl)ethoxy)methyl)-7H-pyrrolo[2,3-d]pyrimidin-4-yl)methyl)piperidine-1-carboxylate (˜1:1 mixture of diastereomers) as a foam (2.2 g, 69%): LC/MS (Table 1, Method a) Rt=2.64 and 2.71 min; MS m/z: 524 (M+H)+. [/INST]']

### Inference

In [20]:
# Generate texts from the prompts. 
# The output is a list of RequestOutput objects that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
predictions = []

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt},\nGenerated text: {generated_text!r}")
    predictions.append(generated_text.strip())

Processed prompts:   0%|          | 0/352 [00:00<?, ?it/s]

Processed prompts: 100%|██████████| 352/352 [01:10<00:00,  5.01it/s]

Prompt: <s>[INST] <s>[INST] paragraphs2actions: Both the organic and aqueous layers were separated. [/INST],
Generated text: ' SETTEMPERATURE room temperature; PHASESEPARATION. 2012/09/19 15:59:19 [/INST] SETTEMPERATURE room temperature; PHASESEPARATION. 2012/09/19 15:59:19'
Prompt: <s>[INST] <s>[INST] paragraphs2actions: After then being left to stand at RT for three days, the mixture was diluted with ethyl acetate and washed with water. [/INST],
Generated text: ' SETTEMPERATURE RT for three days; ADD ethyl acetate; WASH with water.'
Prompt: <s>[INST] <s>[INST] paragraphs2actions: 3-Bromo-2-fluoroaniline (10 g, 52.63 mmol) was dissolved in DCM (100 mL) under nitrogen atmosphere. [/INST],
Generated text: ' ADD 3-Bromo-2-fluoroaniline (10 g, 52.63 mmol); ADD DCM (100 mL).'
Prompt: <s>[INST] <s>[INST] paragraphs2actions: The mixture was allowed to cool to rt and then concentrated in vacuo. [/INST],
Generated text: ' set to rt; concentrate.'
Prompt: <s>[INST] <s>[INST] paragraphs2actions:




### Save the Predictions

In [21]:
pred_df = pd.DataFrame()
pred_df['Generated Text'] = predictions
pred_df['Actual Text'] = test_df[target_text]
pred_df['Paragraph'] = test_df[source_text]
pred_df.to_csv(f"results/predictions/prediction_of_{new_model_name.replace('/', '-')}.csv", index = None)
pred_df

Unnamed: 0,Generated Text,Actual Text,paragraphText
0,SETTEMPERATURE room temperature; PHASESEPARATI...,PHASESEPARATION.,Both the organic and aqueous layers were separ...
1,SETTEMPERATURE RT for three days; ADD ethyl ac...,WAIT for three days at RT; ADD ethyl acetate; ...,After then being left to stand at RT for three...
2,"ADD 3-Bromo-2-fluoroaniline (10 g, 52.63 mmol)...","ADD 3-Bromo-2-fluoroaniline (10 g, 52.63 mmol)...","3-Bromo-2-fluoroaniline (10 g, 52.63 mmol) was..."
3,set to rt; concentrate.,SETTEMPERATURE rt; CONCENTRATE.,The mixture was allowed to cool to rt and then...
4,PURIFY; YIELD benzyl 3-(formamido(7-((2-(trime...,PURIFY; YIELD benzyl 3-(formamido(7-((2-(trime...,The crude material was purified by silica gel ...
...,...,...,...
347,"MAKESOLUTION with (2S,4S)-4-mercapto-1-methyl-...","MAKESOLUTION with 4-nitrobenzyl ‌(1R,5R,6S)-2-...","A solution of 0.42 g of (2S,4S)-4-mercapto-1-m..."
348,SETTEMPERATURE 70° C; STIR for 24 h; ADD MeOH;...,STIR for 24 h at 70° C; ADD MeOH; QUENCH with ...,The reaction mixture was heated at 70° C. for ...
349,ADD ice-water (150 ml) and sodium hydrogencarb...,MAKESOLUTION with ice-water (150 ml) and sodiu...,"After the reaction was over, the reaction mixt..."
350,TABLE paragraphs2actions: The crystallographic...,NOACTION.,The crystallographic data is summarized in the...
