In [1]:
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['lines.linewidth'] = 2
plt.rcParams['figure.dpi'] = 500 # Resolution of figures
plt.rcParams["figure.autolayout"] = True
plt.rcParams["legend.loc"] = 'best'
plt.rcParams['xtick.labelsize']=18
plt.rcParams['ytick.labelsize']=18

In [2]:
fp_types = ['ecfp4','atom_pair','mordred','MAP4','MACCS']
model_types = ['XGBoost','Logistic','SVM','Random_forest']

In [3]:
# Initialize a list to store the data
val_results = []
test_results= []

for fp_type in fp_types:
    for model_type in model_types:
        
        val_performance_results_filepath = f'../../models/performance_results/all_AdH_{fp_type}_{model_type}_4_by_descending_MW_val_performance_results.json'
        test_performance_results_filepath = f'../../models/performance_results/all_AdH_{fp_type}_{model_type}_4_by_descending_MW_test_performance_results.json'
        
        with open(val_performance_results_filepath, 'r') as val_results_file:
            val_results.append({
                'Fingerprint': fp_type,
                'Model': model_type,
                'AUPRC': json.load(val_results_file)['mean AUPRC']})
                
        
        with open(test_performance_results_filepath, 'r') as test_results_file:
            test_results.append({
                'Fingerprint': fp_type,
                'Model': model_type,
                'AUPRC': json.load(test_results_file)['mean AUPRC']})

In [4]:
val_results_df = pd.DataFrame(val_results)
test_results_df = pd.DataFrame(test_results)

In [5]:
val_results_df

Unnamed: 0,Fingerprint,Model,AUPRC
0,ecfp4,XGBoost,0.967139
1,ecfp4,Logistic,0.966107
2,ecfp4,SVM,0.951401
3,ecfp4,Random_forest,0.936186
4,atom_pair,XGBoost,0.980237
5,atom_pair,Logistic,0.953715
6,atom_pair,SVM,0.91299
7,atom_pair,Random_forest,0.937891
8,mordred,XGBoost,0.908277
9,mordred,Logistic,0.894177


In [6]:
val_results_df['Fingerprint'] = val_results_df['Fingerprint'].replace({'ecfp4': 'ECFP4',
                                                                       'atom_pair': 'Atom Pair',
                                                                       'mordred': 'Mordred'})

In [7]:
val_results_df['Model'] = val_results_df['Model'].replace({'Random_forest': 'Random Forest'})

In [8]:
val_matrix_df = val_results_df.pivot("Model", "Fingerprint", "AUPRC")

  val_matrix_df = val_results_df.pivot("Model", "Fingerprint", "AUPRC")


In [9]:
val_matrix_df

Fingerprint,Atom Pair,ECFP4,MACCS,MAP4,Mordred
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Logistic,0.953715,0.966107,0.891697,0.943227,0.894177
Random Forest,0.937891,0.936186,0.931079,0.947986,0.869626
SVM,0.91299,0.951401,0.931271,0.895276,0.814053
XGBoost,0.980237,0.967139,0.924369,0.94682,0.908277


In [None]:
# Use Seaborn to create the heatmap
plt.figure(figsize=(10, 8))  # Adjust the size as needed
heatmap = sns.heatmap(val_matrix_df, annot=True, fmt=".3f", cmap="Blues")

# Add labels and a title
plt.ylabel('Architecture',fontsize=13)
plt.yticks(rotation=0, fontsize=12)

plt.xlabel('Fingerprints',fontsize=13)
plt.xticks(fontsize=12)
plt.title('AUPRC of Bayesian optimized alcohol \n dehydrogenase classifier on validation set',fontsize=14)

# Show the plot
plt.savefig('SI_fig7A.png')
plt.show()

In [None]:
test_results_df['Fingerprint'] = test_results_df['Fingerprint'].replace({'ecfp4': 'ECFP4',
                                                                       'atom_pair': 'Atom Pair',
                                                                       'mordred': 'Mordred'})

In [None]:
test_results_df['Model'] = test_results_df['Model'].replace({'Random_forest': 'Random Forest'})

In [None]:
test_matrix_df = test_results_df.pivot("Model", "Fingerprint", "AUPRC")

In [None]:
test_matrix_df

In [None]:
# Use Seaborn to create the heatmap
plt.figure(figsize=(10, 8))  # Adjust the size as needed
heatmap = sns.heatmap(test_matrix_df, annot=True, fmt=".3f", cmap="Blues")

# Add labels and a title
plt.ylabel('Architecture',fontsize=13)
plt.yticks(rotation=0, fontsize=12)

plt.xlabel('Fingerprints',fontsize=13)
plt.xticks(fontsize=12)
plt.title('AUPRC of Bayesian optimized alcohol \n dehydrogenase classifier on test set',fontsize=14)

# Show the plot
plt.savefig('SI_fig7B.png')
plt.show()