### Import Packages

In [1]:
import os
import torch
import pandas as pd
from vllm import LLM, SamplingParams
from transformers import  AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
from peft import LoraConfig, PeftModel

# Setup environment 
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

### Merge Base Model and Adaptor into New Model (Then Restart the kernel)

In [2]:
base_model_name = "/home/zhangwei/pretrained_models/llama-2-13b-chat-hf"   # Path of the pretrained model downloaded from Hugging Face
adaptor_name = "saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500" # Fine-tuned model name
new_model_name = f"saved_models/merged_models-{adaptor_name}"
device_map = {"": 0}    

In [3]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

model = PeftModel.from_pretrained(base_model, adaptor_name)
model = model.merge_and_unload()
model.save_pretrained(new_model_name, safe_serialization=False)

tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.save_pretrained(new_model_name, safe_serialization=False)  ## safe_serilization = False is very important, https://github.com/vllm-project/vllm/issues/615

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Non-default generation parameters: {'max_length': 4096}


('saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500/tokenizer_config.json',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500/special_tokens_map.json',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500/tokenizer.model',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500/added_tokens.json',
 'saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500/tokenizer.json')

### Load Merged Model

In [3]:
sampling_params = SamplingParams(temperature=0, top_p=1, max_tokens = 3072, stop = ['!!!'])
llm = LLM(model = new_model_name)

INFO 04-21 12:31:07 llm_engine.py:79] Initializing an LLM engine with config: model='saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500', tokenizer='saved_models/merged_models-saved_models/llama2_13b_chat_qlora/train_1000_lora_r8_lr1e-05/checkpoint-2500', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, seed=0)
INFO 04-21 12:31:44 llm_engine.py:337] # GPU blocks: 862, # CPU blocks: 327
INFO 04-21 12:32:06 model_runner.py:666] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 04-21 12:32:06 model_runner.py:670] CUDA graphs can take add

### Preprocess Data

In [4]:
# Data Loading and Preprocessing
test_df = pd.read_csv("data/test/test_1000.csv")
source_text = "Paragraph"
target_text = "Compound"
instruction = f'{source_text}2{target_text}: '
# insturction = '''You are an assistant that formats chemical paragraphs to compounds.Please extract all compound names in the paragraph, the compound names should be split in " | ".'''

test_df['text'] = f'<s>[INST] {instruction}' + test_df[source_text] + " [/INST]"
prompts = list(test_df['text'])
prompts[:5]

['<s>[INST] Paragraph2Compound: 6.86 g (25 mmoles) of 5-phenylsulfinyl-benzimidazoline-2-thione and 4.9 ml (60 mmoles) of allylchloride are refluxed in 50 ml of ethanol for 24 hours. Then the reaction mixture is concentrated and the residue is crystallized from acetonitrile. The weight of the filtered endproduct is 7.68 g (87.55%). After recrystallization from ethanol it melts at 168° C. [/INST]',
 '<s>[INST] Paragraph2Compound: A mixture of [2-bromo-4-(tert-butoxycarbonylamino-methyl)-phenoxy]-acetic acid methyl ester (2.0 g, 5.34 mmol), pyridine-4-boronic acid (0.78 g, 6.41 mmol), Cs2CO3 (3.48 g, 10.68 mol), Pd(dppf)Cl2.CH2Cl2 (0.29 g, 10% mol) in dioxane/H2O (29 mL, 10/1) is heated at 80° C. for 4 h. The reaction mixture is cooled to r.t., and then concentrated in vacuo. The residue is partitioned between CH2Cl2 and H2O. The two layers are separated, and the organic layer is washed with H2O, and brine, dried over Na2SO4, filtered, and concentrated in vacuo. The crude material is pur

### Inference

In [5]:
# Generate texts from the prompts. 
# The output is a list of RequestOutput objects that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
predictions = []

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt},\nGenerated text: {generated_text!r}")
    predictions.append(generated_text.strip())

Processed prompts: 100%|██████████| 1000/1000 [03:36<00:00,  4.61it/s]

Prompt: <s>[INST] Paragraph2Compound: 6.86 g (25 mmoles) of 5-phenylsulfinyl-benzimidazoline-2-thione and 4.9 ml (60 mmoles) of allylchloride are refluxed in 50 ml of ethanol for 24 hours. Then the reaction mixture is concentrated and the residue is crystallized from acetonitrile. The weight of the filtered endproduct is 7.68 g (87.55%). After recrystallization from ethanol it melts at 168° C. [/INST],
Generated text: ' 5-phenylsulfinyl-benzimidazoline-2-thione | allylchloride | ethanol | acetonitrile | ethanol'
Prompt: <s>[INST] Paragraph2Compound: A mixture of [2-bromo-4-(tert-butoxycarbonylamino-methyl)-phenoxy]-acetic acid methyl ester (2.0 g, 5.34 mmol), pyridine-4-boronic acid (0.78 g, 6.41 mmol), Cs2CO3 (3.48 g, 10.68 mol), Pd(dppf)Cl2.CH2Cl2 (0.29 g, 10% mol) in dioxane/H2O (29 mL, 10/1) is heated at 80° C. for 4 h. The reaction mixture is cooled to r.t., and then concentrated in vacuo. The residue is partitioned between CH2Cl2 and H2O. The two layers are separated, and the org




### Save the Predictions

In [8]:
pred_df = pd.DataFrame()
pred_df['Generated Text'] = predictions
pred_df['Actual Text'] = test_df[target_text]
pred_df['Paragraph'] = test_df[source_text]
pred_df.to_csv(f"results/predictions/rediction_of_{new_model_name.replace('/', '-')}.csv", index = None)
pred_df

Unnamed: 0,Generated Text,Actual Text,Paragraph
0,5-phenylsulfinyl-benzimidazoline-2-thione | al...,5-phenylsulfinyl-benzimidazoline-2-thione | al...,6.86 g (25 mmoles) of 5-phenylsulfinyl-benzimi...
1,[2-bromo-4-(tert-butoxycarbonylamino-methyl)-p...,[2-bromo-4-(tert-butoxycarbonylamino-methyl)-p...,A mixture of [2-bromo-4-(tert-butoxycarbonylam...
2,trimethylsilyl bromide | [4-(3-methoxypropyl)-...,trimethylsilyl bromide | [4-(3-methoxypropyl)-...,1.560 ml of trimethylsilyl bromide are added d...
3,"N-isopropyl-3-(3',4'-dimethoxyphenyl)-piperidi...","N-isopropyl-3-(3',4'-dimethoxyphenyl)-piperidi...","A solution of 4.8 g of N-isopropyl-3-(3',4'-di..."
4,thionyl chloride | 5-methoxy-1H-indole-2-carbo...,thionyl chloride | 5-methoxy-1H-indole-2-carbo...,14.5 ml of thionyl chloride are added dropwise...
...,...,...,...
995,"(E)-1-(1-hydroxy-3,3-dimethyl-1,3-dihydrobenzo...","crude compound | (E)-1-(1-hydroxy-3,3-dimethyl...",A mixture of crude compound (E)-1-(1-hydroxy-3...
996,tert-butyl 3-aminopropylcarbamate | toluene | ...,tert-butyl 3-aminopropylcarbamate | toluene | ...,To a solution of tert-butyl 3-aminopropylcarba...
997,methanesulfonate | diglycolic anhydride | 4-am...,methanesulfonate | (a) | diglycolic anhydride ...,methanesulfonate by (a) reacting diglycolic an...
998,Ethyl trifluoroacetate | tert-butyl methyl eth...,Ethyl trifluoroacetate | tert-butyl methyl eth...,Ethyl trifluoroacetate (6.32 g) was dissolved ...


: 