In [4]:
import numpy as np
import pickle
import seaborn as sns
import statistics
import matplotlib.pyplot as plt
import sys

sys.path.append("src")
from utils.tool import get_task_info


task_id = 202
constraint = "loose"
prop_name, opt_direction, task_objective, threshold = get_task_info(constraint=constraint, task_id=task_id)

def if_complete(prop_dict_list, opt_direction, threshold, prop_name):
    complete = True
    for prop_nm in prop_name:
        thresh = threshold[prop_nm]
        direct = opt_direction[prop_nm]
        if direct == 'increase':
            if ((prop_dict_list[-1][prop_nm] - prop_dict_list[0][prop_nm]) < thresh):
                complete = False
        if direct == 'decrease':
            if ((prop_dict_list[0][prop_nm] - prop_dict_list[-1][prop_nm]) < thresh):
                complete = False
    return complete
    
pickle_file_path_1 = f'results/baseline/{task_id}/deepseek_baseline_{constraint}_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl'
pickle_file_path_2 = f'results/llm_planner/{task_id}/deepseek_llm_planner_{constraint}_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl'
pickle_file_path_3 = f'results/rl_planner/{task_id}/deepseek_rl_planner_{constraint}_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl'
pickle_file_path_4 = f'results/ChatDrug/{task_id}/deepseek_chatdrug_{constraint}_C_0_seed_0_single_v1/search_tree_total_info.pkl'


path_list = [pickle_file_path_1, 
             pickle_file_path_2,
             pickle_file_path_3,
             pickle_file_path_4,]

loaded_object_dict = {}
complete_rate_dict = {}
for _, path in enumerate(path_list):
    with open(path, 'rb') as file:
        data = pickle.load(file)
        loaded_object_dict[path] = data

for name, loaded_dict in loaded_object_dict.items():
    print(len(loaded_dict))


complete_dict = {}
new_dict_dict = {}
for name, loaded_dict in loaded_object_dict.items():
    complete_dict[name] = 0
    count = 0
    new_dict_dict[name] = {}
    for key, value in loaded_dict.items():
        count += 1
        mol = key
        prop_list = value[0]
        sim_list = value[1]
        # if len(value)==3:
        #     if value[2] == 'complete':
        #         new_dict_dict[name][mol] = [prop_list, sim_list]
        #         complete_dict[name] += 1
        # else:
        if if_complete(prop_dict_list=prop_list, opt_direction=opt_direction, threshold=threshold, prop_name=prop_name):
            new_dict_dict[name][mol] = [prop_list, sim_list]
            complete_dict[name] += 1
        

sim_sum_dict = {}
for name, new_dict in new_dict_dict.items():
    # print(f"Setting: {name}, Size: {len(new_dict)}")
    sim_sum = [[], []]
    if len(new_dict) != 0:
        print(f"name: {name}, len: {len(new_dict)}")
        count = 0
        for key, value in new_dict.items():
            sim_list = value[1]
            if sim_list[-1] == 1:
                continue
            count += 1
            for i in range(len(sim_sum)):
                sim_sum[i].append(sim_list[i])
        # sim_sum = [i/count for i in sim_sum]
    sim_sum_dict[name] = sim_sum


bad_thresh = 0.5
sim_1 = sim_sum_dict[pickle_file_path_1]
sim_1_bad = len([x for x in sim_1[1] if x < bad_thresh])
sim_1_mean = np.mean(sim_1[1])
sim_1_std = statistics.stdev(sim_1[1])

sim_2 = sim_sum_dict[pickle_file_path_2]
sim_2_bad = len([x for x in sim_2[1] if x < bad_thresh])
sim_2_mean = np.mean(sim_2[1])
sim_2_std = statistics.stdev(sim_2[1])

sim_3 = sim_sum_dict[pickle_file_path_3]
sim_3_bad = len([x for x in sim_3[1] if x < bad_thresh])
sim_3_mean = np.mean(sim_3[1])
sim_3_std = statistics.stdev(sim_3[1])

sim_4 = sim_sum_dict[pickle_file_path_4]
sim_4_bad = len([x for x in sim_4[1] if x < bad_thresh])
sim_4_mean = np.mean(sim_4[1])
sim_4_std = statistics.stdev(sim_4[1])

print(complete_dict)

print(f"Baseline mean: {sim_1_mean}, std: {sim_1_std}, #Dissimilar: {sim_1_bad}, #Success: {complete_dict[path_list[0]]}, ratio: {sim_1_bad/complete_dict[path_list[0]]}")
print(f"LLM Planner mean: {sim_2_mean}, std: {sim_2_std}, #Dissimilar: {sim_2_bad}, #Success: {complete_dict[path_list[1]]}, ratio: {sim_2_bad/complete_dict[path_list[1]]}")
print(f"RL Planner mean: {sim_3_mean}, std: {sim_3_std}, #Dissimilar: {sim_3_bad}, #Success: {complete_dict[path_list[2]]}, ratio: {sim_3_bad/complete_dict[path_list[2]]}")
print(f"ChatDrug mean {sim_4_mean}, std: {sim_4_std}, #Dissimilar: {sim_4_bad}, #Success: {complete_dict[path_list[3]]}, ratio: {sim_4_bad/complete_dict[path_list[3]]}")

200
200
200
200
name: results/baseline/202/deepseek_baseline_loose_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl, len: 173
name: results/llm_planner/202/deepseek_llm_planner_loose_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl, len: 141
name: results/rl_planner/202/deepseek_rl_planner_loose_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl, len: 167
name: results/ChatDrug/202/deepseek_chatdrug_loose_C_0_seed_0_single_v1/search_tree_total_info.pkl, len: 139
{'results/baseline/202/deepseek_baseline_loose_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl': 173, 'results/llm_planner/202/deepseek_llm_planner_loose_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl': 141, 'results/rl_planner/202/deepseek_rl_planner_loose_depth_1_gen_1_keep_1_mol_200_single_v1/search_tree_total_info.pkl': 167, 'results/ChatDrug/202/deepseek_chatdrug_loose_C_0_seed_0_single_v1/search_tree_total_info.pkl': 139}
Baseline mean: 

In [None]:
Task 201 loose
Baseline mean: 0.7486479575080639, std: 0.07244069395933145, bad: 0
LLM Planner mean: 0.7381296405475005, std: 0.08570368880134244, bad: 2
RL Planner mean: 0.7149992508358064, std: 0.09332126327685263, bad: 2
ChatDrug mean 0.5604758400011548, std: 0.20194130115530004, bad: 59

Task 201 strict
Baseline mean: 0.7132068833749506, std: 0.11005028388577724, bad: 0
LLM Planner mean: 0.6396884332778855, std: 0.08326603130439096, bad: 0
RL Planner mean: 0.6834007638327335, std: 0.08310701357503054, bad: 2
ChatDrug mean 0.546894551368688, std: 0.23128717901620152, bad: 37

Task 202 loose
Baseline mean: 0.6631046874073476, std: 0.07490959762754022, bad: 0
LLM Planner mean: 0.6691456874495781, std: 0.10245481284235387, bad: 3
RL Planner mean: 0.6546404692307448, std: 0.09111666651483336, bad: 4
ChatDrug mean 0.4452511754462071, std: 0.17687099227573533, bad: 78

Task 203 loose
Baseline mean: 0.72939684595242, std: 0.08775693726267762, bad: 2
LLM Planner mean: 0.7360300889992184, std: 0.07775170651859546, bad: 1
RL Planner mean: 0.6862792459137006, std: 0.09553104538730403, bad: 3
ChatDrug mean 0.5817965962012399, std: 0.19535129301232051, bad: 55
