In [1]:
import os
import sys
import logging
import torch

gpus = '2,6'
os.environ["CUDA_VISIBLE_DEVICES"] = gpus
os.environ['MKL_THREADING_LAYER'] = 'GNU'
sys.path.append('../')
import utils
from data.generate_finetune_data import generate_finetune_data
from evaluation._1_overall_evaluation_vllm import overall_evaluation_vllm
from evaluation._2_each_drug_evaluation_vllm import each_drug_evaluation_vllm


output_dir = '../output/test'

base_model = 'llama-2-7b'

# setup logging
log_file = os.path.join(output_dir, "train.log")
logger = utils.set_logger(log_file, mode='a')

_, med_names, _ = utils.load_data()
# med_list = ['Chlorhexidine', 'Metronidazole']
med_list = med_names
for idx, med in enumerate(med_list):
    data_path, val_set_size = generate_finetune_data(med, logger)    # generate finetune data

    # data_path = '../data/finetune_data_ratio_2_fix_Chlorhexidine.json'
    # val_set_size = 300

    resume_from_checkpoint = 0 if idx == 0 else 1

    # logger.info('Training on drug: {}'.format(med))
    os.system(f"CUDA_VISIBLE_DEVICES={gpus} torchrun --nproc_per_node={torch.cuda.device_count()} --master_port=29501 finetune_fix_drug.py --base_model {base_model}  --learning_rate 5e-5 --num_epochs 10 --early_stopping_patience 20 --eval_epochs 16 --val_set_size {val_set_size} --batch_size 64 --micro_batch_size 12 --train_on_inputs 0 --resume_from_checkpoint {resume_from_checkpoint} --data_path {data_path} --output_dir {output_dir}  > test.out 2> test.err")

    drug_evaluate_vllm(output_dir, med_list[:idx+1], gpu_memory_utilization=0.8, data_len=-1, logger=logger)
    evaluate_vllm(output_dir, gpu_memory_utilization=0.8, data_len=20, logger=logger)
    utils.rename_checkpoint(output_dir, med, idx)
    logger.info(f'----------Finished training on drug: {idx}-{med}-----------\n\n')


2023-11-19 20:53:48,702 - INFO - Generate finetune data for Acetaminophen successfully! Train Length: 9960=8193+1767, Val Length: 122=100+22


------ Single drug evaluation: Merging lora weights and saving hf model ------


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

INFO 11-19 21:47:58 llm_engine.py:72] Initializing an LLM engine with config: model='../Models/test/checkpoint-336', tokenizer='../Models/test/checkpoint-336', tokenizer_mode=auto, revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, quantization=None, seed=0)
INFO 11-19 21:48:19 llm_engine.py:205] # GPU blocks: 6454, # CPU blocks: 512


Processed prompts: 100%|██████████| 2490/2490 [00:55<00:00, 44.57it/s]
2023-11-19 21:49:22,007 - INFO - med:Acetaminophen           , recall:0.9982  precision:0.9442  f1:0.9704  jaccard:0.9425  drug_pred:2431/2490, drug_gt:2042


INFO 11-19 21:49:22 llm_engine.py:72] Initializing an LLM engine with config: model='../Models/test/checkpoint-336', tokenizer='../Models/test/checkpoint-336', tokenizer_mode=auto, revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, quantization=None, seed=0)
INFO 11-19 21:49:42 llm_engine.py:205] # GPU blocks: 6454, # CPU blocks: 512


Processed prompts: 100%|██████████| 3020/3020 [00:59<00:00, 50.88it/s]
2023-11-19 21:50:47,515 - INFO - jaccard: 0.1314, recall: 0.9792, precision: 0.1316, f1: 0.2261, ddi_rate: 0.1244, drug_num: 147.70/151, refuse_rate: 0.0000
2023-11-19 21:50:47,854 - INFO - ----------Finished training on drug: 0-Acetaminophen-----------


2023-11-19 21:50:49,888 - INFO - Generate finetune data for Potassium chloride successfully! Train Length: 9960=7143+2817, Val Length: 135=100+35


------ Single drug evaluation: Merging lora weights and saving hf model ------


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

INFO 11-19 22:47:00 llm_engine.py:72] Initializing an LLM engine with config: model='../Models/test/checkpoint-352', tokenizer='../Models/test/checkpoint-352', tokenizer_mode=auto, revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, quantization=None, seed=0)
INFO 11-19 22:47:20 llm_engine.py:205] # GPU blocks: 6454, # CPU blocks: 512


Processed prompts: 100%|██████████| 2490/2490 [00:55<00:00, 44.56it/s]
2023-11-19 22:48:21,303 - INFO - med:Acetaminophen           , recall:0.9474  precision:0.9422  f1:0.9448  jaccard:0.8954  drug_pred:2197/2490, drug_gt:2042
Processed prompts: 100%|██████████| 2490/2490 [00:56<00:00, 44.24it/s]
2023-11-19 22:49:22,933 - INFO - med:Potassium chloride      , recall:0.9812  precision:0.8976  f1:0.9375  jaccard:0.8824  drug_pred:2256/2490, drug_gt:1719


INFO 11-19 22:49:23 llm_engine.py:72] Initializing an LLM engine with config: model='../Models/test/checkpoint-352', tokenizer='../Models/test/checkpoint-352', tokenizer_mode=auto, revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, quantization=None, seed=0)
INFO 11-19 22:49:43 llm_engine.py:205] # GPU blocks: 6454, # CPU blocks: 512


Processed prompts: 100%|██████████| 3020/3020 [00:59<00:00, 50.99it/s]
2023-11-19 22:50:47,892 - INFO - jaccard: 0.1234, recall: 0.8958, precision: 0.1234, f1: 0.2113, ddi_rate: 0.1132, drug_num: 133.70/151, refuse_rate: 0.0000
2023-11-19 22:50:48,225 - INFO - ----------Finished training on drug: 1-Potassium chloride-----------


2023-11-19 22:50:50,239 - INFO - Generate finetune data for Pantoprazole successfully! Train Length: 9958=7358+2600, Val Length: 150=100+50


In [None]:
med_names

['Acetaminophen',
 'Potassium chloride',
 'Pantoprazole',
 'Metoprolol',
 'Magnesium sulfate',
 'Furosemide',
 'Vancomycin',
 'Salbutamol',
 'Oxycodone',
 'Bisacodyl',
 'Lorazepam',
 'Magnesium',
 'Ipratropium',
 'Acetylsalicylic acid',
 'Morphine',
 'Ondansetron',
 'Fentanyl',
 'Calcium gluconate',
 'D-glucose',
 'Propofol',
 'Lansoprazole',
 'Hydromorphone',
 'Levofloxacin',
 'Atorvastatin',
 'Metronidazole',
 'Midazolam',
 'Lisinopril',
 'Warfarin',
 'Lidocaine',
 'Famotidine',
 'Hydralazine',
 'Chlorhexidine',
 'Phenylephrine',
 'Metoclopramide',
 'Ciprofloxacin',
 'Spironolactone',
 'Nitroglycerin',
 'Prednisone',
 'Sodium bicarbonate',
 'Tazobactam',
 'Diphenhydramine',
 'Omeprazole',
 'Lactulose',
 'Clopidogrel',
 'Zolpidem',
 'Fluticasone propionate',
 'Levothyroxine',
 'Norepinephrine',
 'Simvastatin',
 'Monopotassium phosphate',
 'Ranitidine',
 'Haloperidol',
 'Thiamine',
 'Cefazolin',
 'Piperacillin',
 'Amiodarone',
 'Ceftriaxone',
 'Trazodone',
 'Phylloquinone',
 'Miconazol