In [2]:
from inference import *
from analysis import *
import json
import numpy as np
import textdistance
import os

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def evaluation(test_set_dict, model_path, tokenizer_path, batch_size=32, ckpt_name='', multi_gpu=True, aug_test=False):
    model_dir = os.path.dirname(model_path)
    test_dir_path = os.path.join(model_dir, 'test_results_'+ckpt_name)
    os.makedirs(test_dir_path, exist_ok=True)
    if aug_test:
        file_name = 'augmented_test_results.jsonl'
        test_set = test_set_dict['augmented']
    else:
        file_name = 'original_test_results.jsonl'
        test_set = test_set_dict['original']
    if os.path.exists(os.path.join(test_dir_path, file_name)):
        print('Test results already exist. Loading from file...')
        test_results = []
        with open(os.path.join(test_dir_path, file_name), 'r') as f:
            for line in f:
                test_results.append(json.loads(line))
        return test_results
    else:
        test_results = dataset_inference(test_set, model_path, tokenizer_path, batch_size=batch_size, multi_gpu=multi_gpu)
        for i in range(len(test_results)):
            test_results[i]['sim'] = textdistance.levenshtein.normalized_similarity(test_results[i]['target'], test_results[i]['pred'])
        with open(os.path.join(test_dir_path, file_name), 'w') as f:
            for item in test_results:
                f.write(json.dumps(item) + '\n')
        return test_results

In [4]:
def metric_printer(model_name, print_aug_test=False):
    test_dir_list = os.listdir(f"../../results/{model_name}")
    test_dir_list = [item for item in test_dir_list if 'test_results' in item]
    test_path_list = [os.path.join(f"../../results/{model_name}", item) for item in test_dir_list]
    for test_path in test_path_list:
        print(f"Test Path: {test_path}")
        results_list = []
        if not print_aug_test:
            with open(os.path.join(test_path, 'original_test_results.jsonl')) as f:
                original_test_results = [json.loads(line) for line in f]
                results_list.append(original_test_results)
        else:
            if os.path.exists(os.path.join(test_path, 'augmented_test_results.jsonl')):
                with open(os.path.join(test_path, 'augmented_test_results.jsonl')) as f:
                    augmented_test_results = [json.loads(line) for line in f]
                    results_list.append(augmented_test_results)
            else:
                print("Augmented Test Results Not Found")
        for results in results_list:
            avg_sim = round(np.mean([item['sim'] for item in results]),4)
            acc_100 = round(len([item for item in results if item['sim'] == 1]) / len(results), 4)
            acc_90 = round(len([item for item in results if item['sim'] >= 0.9]) / len(results), 4)
            acc_75 = round(len([item for item in results if item['sim'] >= 0.75]) / len(results), 4)
            acc_50 = round(len([item for item in results if item['sim'] >= 0.5]) / len(results), 4)
            pred_list = [item['pred'] for item in results]
            target_list = [item['target'] for item in results]
            bleu = round(original_bleu(pred_list, target_list), 4)
            print(f"Model: {model_name}, Bleu: {bleu}, Ave Accuracy:{avg_sim}, Accuracy 100: {acc_100}, Accuracy 90: {acc_90}, Accuracy 75: {acc_75}, Accuracy 50: {acc_50}")

# Evaluating the Results of Pistachio Dataset

In [31]:
ori_test_path = '../../dataset/Pistachio_test/Original/T5/test.jsonl'
aug_test_path = '../../dataset/Pistachio_test/Augmented/T5/test.jsonl'

with open(ori_test_path) as f:
    ori_test_data = [json.loads(line) for line in f]

with open(aug_test_path) as f:
    aug_test_data = [json.loads(line) for line in f]

test_set_dict = {'original': ori_test_data, 'augmented': aug_test_data}

# model_list = [
#     ('molT5-base-aug-3-ignore-pad','epoch=8-step=200000'),
#     ('molT5-base-ignore-pad','epoch=32-step=140000'),
#     ('molT5-small-aug-3-ignore-pad','epoch=7-step=345000'),
#     ('molT5-small-ignore-pad','epoch=31-step=265000'),
#     ('text-chemT5-base-aug-3-ignore-pad', 'epoch=8-step=190000'),
#     ('text-chemT5-base-ignore-pad', 'epoch=25-step=110000'),
#     ('text-chemT5-small-aug-3-ignore-pad', 'epoch=9-step=450000'),
#     ('text-chemT5-small-ignore-pad', 'epoch=43-step=370000'),
#     ('T5-base-aug-3-ignore-pad','epoch=7-step=185000'),
#     ('T5-base-ignore-pad','epoch=24-step=105000')]

model_list = [
    ('molT5-base-aug-3-ignore-pad','epoch=8-step=200000'),
    ('molT5-base-ignore-pad','epoch=32-step=140000')]
for model_name, ckpt_name in model_list:
    model_path = f"../../results/{model_name}/hf_model"
    tokenizer_path = f"../../results/{model_name}/hf_model"
    if 'small' in model_name:
        batch_size = 64
    else:
        batch_size = 32
    test_results = evaluation(test_set_dict, model_path, tokenizer_path, batch_size=batch_size, ckpt_name=ckpt_name, multi_gpu=True, aug_test=True)

Test results already exist. Loading from file...
Test results already exist. Loading from file...


In [30]:
for model_name, _ in model_list:
    metric_printer(model_name, print_aug_test=True)

Test Path: ../../results/molT5-base-aug-3-ignore-pad/test_results_epoch=8-step=200000
Model: molT5-base-aug-3-ignore-pad, Bleu: 0.6032, Ave Accuracy:0.6387, Accuracy 100: 0.0548, Accuracy 90: 0.1283, Accuracy 75: 0.2798, Accuracy 50: 0.7372
Test Path: ../../results/molT5-base-ignore-pad/test_results_epoch=32-step=140000
Model: molT5-base-ignore-pad, Bleu: 0.5509, Ave Accuracy:0.5986, Accuracy 100: 0.021, Accuracy 90: 0.0669, Accuracy 75: 0.1953, Accuracy 50: 0.6817


In [6]:
import os
from tqdm import tqdm
import textdistance
transformer_results_path = '../../dataset/Pistachio_Aug_1/transformer'
test_data = os.path.join(transformer_results_path, 'tgt-test.txt')
with open(test_data, 'r') as f:
    test_data = [line.strip() for line in f]
pred_results_list = os.listdir(os.path.join(transformer_results_path, 'results_txt'))
os.makedirs(os.path.join(transformer_results_path, 'results_jsonl'), exist_ok=True)
for pred_file_path in pred_results_list:
    print(f"Processing {pred_file_path}")
    pred_file_path = os.path.join(transformer_results_path, 'results_txt', pred_file_path)
    with open(pred_file_path, 'r') as f:
        pred_data = [line.strip() for line in f]
    result_path = os.path.join(transformer_results_path, 'results_jsonl', os.path.basename(pred_file_path).replace('.txt', '.jsonl'))
    result_list = []
    with open(result_path, 'w') as f:
        for i in tqdm(range(len(test_data))):
            sim = textdistance.levenshtein.normalized_similarity(test_data[i], pred_data[i])
            f.write(json.dumps({'target': test_data[i], 'pred': pred_data[i], 'sim': sim}) + '\n')
            result_list.append({'target': test_data[i], 'pred': pred_data[i], 'sim': sim})
    acc_100 = round(len([item for item in result_list if item['sim'] == 1]) / len(result_list), 4)
    acc_90 = round(len([item for item in result_list if item['sim'] >= 0.9]) / len(result_list), 4)
    acc_75 = round(len([item for item in result_list if item['sim'] >= 0.75]) / len(result_list), 4)
    acc_50 = round(len([item for item in result_list if item['sim'] >= 0.5]) / len(result_list), 4)
    bleu = round(original_bleu(pred_data, test_data), 4)
    print(f"Bleu: {bleu}, Accuracy 100: {acc_100}, Accuracy 90: {acc_90}, Accuracy 75: {acc_75}, Accuracy 50: {acc_50}")

Processing pred-test-1000000.txt


100%|██████████| 67638/67638 [16:45<00:00, 67.27it/s]


Bleu: 0.559, Accuracy 100: 0.0396, Accuracy 90: 0.1105, Accuracy 75: 0.2596, Accuracy 50: 0.7012
Processing pred-test-910000.txt


100%|██████████| 67638/67638 [16:53<00:00, 66.72it/s]


Bleu: 0.5598, Accuracy 100: 0.0401, Accuracy 90: 0.109, Accuracy 75: 0.2596, Accuracy 50: 0.6976
Processing pred-test-920000.txt


100%|██████████| 67638/67638 [16:38<00:00, 67.74it/s]


Bleu: 0.5579, Accuracy 100: 0.0396, Accuracy 90: 0.1087, Accuracy 75: 0.2587, Accuracy 50: 0.6985
Processing pred-test-930000.txt


100%|██████████| 67638/67638 [16:37<00:00, 67.82it/s]


Bleu: 0.5563, Accuracy 100: 0.04, Accuracy 90: 0.1089, Accuracy 75: 0.2576, Accuracy 50: 0.6982
Processing pred-test-940000.txt


100%|██████████| 67638/67638 [16:33<00:00, 68.07it/s]


Bleu: 0.557, Accuracy 100: 0.0406, Accuracy 90: 0.1104, Accuracy 75: 0.2606, Accuracy 50: 0.6932
Processing pred-test-950000.txt


100%|██████████| 67638/67638 [16:38<00:00, 67.76it/s]


Bleu: 0.5574, Accuracy 100: 0.0399, Accuracy 90: 0.1096, Accuracy 75: 0.258, Accuracy 50: 0.698
Processing pred-test-960000.txt


100%|██████████| 67638/67638 [16:36<00:00, 67.86it/s]


Bleu: 0.558, Accuracy 100: 0.04, Accuracy 90: 0.1092, Accuracy 75: 0.2605, Accuracy 50: 0.699
Processing pred-test-970000.txt


100%|██████████| 67638/67638 [16:43<00:00, 67.40it/s]


Bleu: 0.5594, Accuracy 100: 0.0405, Accuracy 90: 0.1105, Accuracy 75: 0.2604, Accuracy 50: 0.6993
Processing pred-test-980000.txt


100%|██████████| 67638/67638 [16:59<00:00, 66.34it/s]


Bleu: 0.5611, Accuracy 100: 0.0406, Accuracy 90: 0.1101, Accuracy 75: 0.2607, Accuracy 50: 0.7025
Processing pred-test-990000.txt


100%|██████████| 67638/67638 [16:50<00:00, 66.92it/s] 


Bleu: 0.5615, Accuracy 100: 0.0406, Accuracy 90: 0.1097, Accuracy 75: 0.2582, Accuracy 50: 0.7027


In [9]:
import os
transformer_results_path = '../../yuxuan/smiles2actions/Augmented'
test_data = os.path.join(transformer_results_path, 'tgt-test.txt')
pred_data = os.path.join(transformer_results_path, 'pred-test.txt')
result_path = os.path.join(transformer_results_path, 'result.jsonl')

with open(test_data, 'r') as f:
    test_data = [line.strip() for line in f]

with open(pred_data, 'r') as f:
    pred_data = [line.strip() for line in f]


from tqdm import tqdm

with open(result_path, 'w') as f:
    for i in tqdm(range(len(test_data))):
        sim = textdistance.levenshtein.normalized_similarity(test_data[i], pred_data[i])
        f.write(json.dumps({'target': test_data[i], 'pred': pred_data[i], 'sim': sim}) + '\n')

100%|██████████| 67499/67499 [13:27<00:00, 83.60it/s] 


In [20]:
import os
transformer_results_path = '../../yuxuan/smiles2actions/transformer'
test_data = os.path.join(transformer_results_path, 'tgt-test.txt')
pred_data = os.path.join(transformer_results_path, 'pred_256.txt')
result_path = os.path.join(transformer_results_path, 'result_256.jsonl')

with open(test_data, 'r') as f:
    test_data = [line.strip() for line in f]

with open(pred_data, 'r') as f:
    pred_data = [line.strip() for line in f]


from tqdm import tqdm

with open(result_path, 'w') as f:
    for i in tqdm(range(len(test_data))):
        sim = textdistance.levenshtein.normalized_similarity(test_data[i], pred_data[i])
        f.write(json.dumps({'target': test_data[i], 'pred': pred_data[i], 'sim': sim}) + '\n')

100%|██████████| 67499/67499 [13:43<00:00, 81.95it/s] 


In [10]:
with open(result_path, 'r') as f:
    results = [json.loads(line) for line in f]
model_name = 'transformer_aug_1'
acc_100 = round(len([item for item in results if item['sim'] == 1]) / len(results), 4)
acc_90 = round(len([item for item in results if item['sim'] >= 0.9]) / len(results), 4)
acc_75 = round(len([item for item in results if item['sim'] >= 0.75]) / len(results), 4)
acc_50 = round(len([item for item in results if item['sim'] >= 0.5]) / len(results), 4)
pred_list = [item['pred'] for item in results]
target_list = [item['target'] for item in results]
bleu = round(original_bleu(pred_list, target_list), 4)
print(f"Model: {model_name}, Bleu: {bleu}, Accuracy 100: {acc_100}, Accuracy 90: {acc_90}, Accuracy 75: {acc_75}, Accuracy 50: {acc_50}")

Model: transformer_aug_1, Bleu: 0.554, Accuracy 100: 0.0339, Accuracy 90: 0.1029, Accuracy 75: 0.2561, Accuracy 50: 0.6962


# Evaluating the Results of Orgsyn Dataset

In [1]:
ori_test_path = '../../dataset/Orgsyn_HC/test.jsonl'

with open(ori_test_path) as f:
    ori_test_data = [json.loads(line) for line in f]

test_set_dict = {'original': ori_test_data}
model_list = [
    ('molT5-base-orgsyn-hc-aug'),
    ('molT5-base-orgsyn-hc-mix-1'),
    ('molT5-base-orgsyn-hc-mix-2'),
    ('molT5-base-orgsyn-hc-mix-3'),
    ('molT5-base-orgsyn-hc-mix-random')
]
for model_name in model_list:
    ckpt_list = os.listdir(f"../../results/{model_name}")
    ckpt_list = [item.split('.')[0] for item in ckpt_list if 'ckpt' in item]
    for i, ckpt_name in enumerate(ckpt_list):
        model_path = f"../../results/{model_name}/hf_model_{i+1}"
        tokenizer_path = f"../../results/{model_name}/hf_model_{i+1}"
        if 'small' in model_name:
            batch_size = 64
        else:
            batch_size = 32
        test_results = evaluation(test_set_dict, model_path, tokenizer_path, batch_size=batch_size, ckpt_name=ckpt_name, multi_gpu=True, aug_test=False)
    # test_results = evaluation(test_set_dict, model_path, tokenizer_path, batch_size=batch_size, ckpt_name=ckpt_name, multi_gpu=True, aug_test=True)
for model_name in model_list:
    metric_printer(model_name)

FileNotFoundError: [Errno 2] No such file or directory: '../../dataset/Orgsyn_HC/test.jsonl'

In [17]:
model_list = [
('molT5-base-orgsyn-mix-rag-1', 'epoch=37-step=34000'),
('molT5-base-orgsyn-mix-random', 'epoch=49-step=41500')]
for model_name, _ in model_list:
    metric_printer(model_name)

Test Path: ../../results/molT5-base-orgsyn-mix-rag-1/test_results_epoch=37-step=34000
Model: molT5-base-orgsyn-mix-rag-1, Bleu: 0.3879, Ave Accuracy:0.4953, Accuracy 100: 0.0, Accuracy 90: 0.0, Accuracy 75: 0.0483, Accuracy 50: 0.4207
Test Path: ../../results/molT5-base-orgsyn-mix-random/test_results_epoch=49-step=41500
Model: molT5-base-orgsyn-mix-random, Bleu: 0.3949, Ave Accuracy:0.4883, Accuracy 100: 0.0, Accuracy 90: 0.0, Accuracy 75: 0.0207, Accuracy 50: 0.4276
