**Notebook - Model Results**

This notebook combines the cross-validated results from each cv-fold that the model was evaluated on and combines the predictions before calculating performance metrics across all test cases.

In [6]:
import pandas as pd
import glob
import re
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from sklearn.metrics import (accuracy_score, f1_score, roc_auc_score, precision_score, auc,
                             recall_score, average_precision_score)
from modules.viz import conf_mat
from modules.eval_funcs import amalgamate_preds

cv_folds = 5

In [13]:
def string_to_float_list(s, method='new'):
    if method == 'old':
        s = s.replace('[', '').replace('[', '').replace('\n', '')
        s = re.sub(r'\s+', ', ', re.sub(r'\s*\]', '', s))
#         print("STRING", s)
    return [float(val) for val in s.split(',')]

# Load Data:

In [18]:
def summary_stats(df):
    """
    Calculate summary statistics using combined CV results.
    """
    if 'actual' in df.columns:
        df.rename(columns={'actual': 'actuals'}, inplace=True)
        
    acc = accuracy_score(df.actuals, df.preds)
    f1 = f1_score(df.actuals, df.preds, average='macro')
    precision = precision_score(df.actuals, df.preds, average='macro', zero_division=0)
    recall = recall_score(df.actuals, df.preds, average='macro')
    
    # Create a prediction array from probs:
    pred_arr = np.array(df['proba'].tolist())
    roc_auc = roc_auc_score(df.actuals, pred_arr, average='macro', multi_class='ovr')
    
    # Calculate AUPR for each class
    aupr_scores = [average_precision_score(df.actuals == class_index, pred_arr[:, class_index]
                                          ) for class_index in range(pred_arr.shape[1])]
    mean_aupr = np.mean(aupr_scores)
    
    return {'acc': acc, 'f1': f1, 'precision': precision, 'recall': recall,
           'roc_auc': roc_auc, 'aupr': mean_aupr}

In [9]:
# Locations of model results:
swin_img = 'results/swinv2/model_eval/'  # SwinV2 model results
cvf_fusion = 'results/fusion/model_eval/'  # CVF model results
ibp_mlp = 'results/IBP_MLP/model_eval'  # IBP MLP model results

In [8]:
# Load Kinase Inhibitor metadata:
ki_meta = pd.read_csv('data/cwp_ki_moa.csv')
print(ki_meta.shape)
ki_meta.head(2)

(19765, 13)


Unnamed: 0,Metadata_JCP2022,Metadata_InChIKey,Metadata_InChI,Metadata_Source,Metadata_Plate,Metadata_Well,Metadata_Batch,Metadata_PlateType,moa,target,smiles,clinical_phase,moa_src
0,JCP2022_037716,IVUGFMLRJOCGAS-UHFFFAOYSA-N,InChI=1S/C28H21N7OS/c1-17-15-24(37-16-17)25-20...,source_1,UL001783,B03,Batch5_20221030,COMPOUND,AURK inhibitor,AURKA|AURKB|AURKC,Cc1csc(c1)-c1nnc(Nc2ccc(Oc3ncccc3-c3ccnc(N)n3)...,Phase 1,dr_hub
1,JCP2022_037716,IVUGFMLRJOCGAS-UHFFFAOYSA-N,InChI=1S/C28H21N7OS/c1-17-15-24(37-16-17)25-20...,source_1,UL001783,B46,Batch5_20221030,COMPOUND,AURK inhibitor,AURKA|AURKB|AURKC,Cc1csc(c1)-c1nnc(Nc2ccc(Oc3ncccc3-c3ccnc(N)n3)...,Phase 1,dr_hub


## Return Prediction Dataframes:

In [11]:
def return_preds(pred_dir, method, cv_folds=5):
    """
    Returns the five prediction dataframes associated with the cross-val test datasets for the specified model.
    method: the method of storing proba data changed, so this reflects how it was stored when each model was run, don't change below
    """
    cpnd_frames = []
    well_frames = []

    for i in range(cv_folds):
        cpnd_file = glob.glob(f'{pred_dir}/CV{i}_eval/cpnd*.csv')
        well_file = glob.glob(f'{pred_dir}/CV{i}_eval/well*.csv')
        cdf = pd.read_csv(cpnd_file[0])
        wdf = pd.read_csv(well_file[0])
        cpnd_frames.append(cdf)
        well_frames.append(wdf)

    # Create combined dataframes of predictions across folds:
    cpnd_comb = pd.concat(cpnd_frames, axis=0).reset_index(drop=True)
    well_comb = pd.concat(well_frames, axis=0).reset_index(drop=True)
    cpnd_comb['proba'] = cpnd_comb['proba'].apply(string_to_float_list, method=method)
    well_comb['proba'] = well_comb['proba'].apply(string_to_float_list, method=method)
    
    return cpnd_comb, well_comb

# Summary Statistics:

## Swin Image:

In [14]:
swin_cpnd, swin_well = return_preds(swin_img, method='new')

- Performance metrics at a compound level:

In [19]:
swin_cpnd_df = amalgamate_preds(swin_cpnd, 'cpnd_ids')
summary_stats(swin_cpnd_df)

{'acc': 0.6666666666666666,
 'f1': 0.6958562413939988,
 'precision': 0.7223717948717948,
 'recall': 0.6891504410622058,
 'roc_auc': 0.8903891744625895,
 'aupr': 0.6743125651910769}

- Performance metrics at a well level:

In [34]:
summary_stats(swin_well)

{'acc': 0.5086614173228347,
 'f1': 0.5156309749533527,
 'precision': 0.5299316917556887,
 'recall': 0.5102600411926476,
 'roc_auc': 0.8293556964132762,
 'aupr': 0.5012733597895953}

## IBP MLP:

In [75]:
ibp_cpnd, ibp_well = return_preds(ibp_mlp, method='old')

In [76]:
summary_stats(ibp_cpnd)

{'acc': 0.6559139784946236,
 'f1': 0.6507266544197491,
 'precision': 0.6806501831501831,
 'recall': 0.6454739704739705,
 'roc_auc': 0.8944605757519939,
 'aupr': 0.6763566642321532}

In [77]:
summary_stats(ibp_well)

{'acc': 0.4861111111111111,
 'f1': 0.49803206495679947,
 'precision': 0.5046762372918698,
 'recall': 0.49432053944028914,
 'roc_auc': 0.8328365544673989,
 'aupr': 0.5138306028276413}

## Cell-Vision Fusion:

In [23]:
fuse_cpnd, fuse_well = return_preds(cvf_fusion, method='old')

In [25]:
fuse_cpnd_df = amalgamate_preds(fuse_cpnd, 'cpnd_ids')
summary_stats(fuse_cpnd_df)

{'acc': 0.6979166666666666,
 'f1': 0.7056240981240982,
 'precision': 0.733980405559353,
 'recall': 0.6986832122126241,
 'roc_auc': 0.9073359725095406,
 'aupr': 0.741223030335291}

In [26]:
summary_stats(fuse_well)

{'acc': 0.573228346456693,
 'f1': 0.5661536124758205,
 'precision': 0.5833149743472675,
 'recall': 0.5622480545889381,
 'roc_auc': 0.8671299891316183,
 'aupr': 0.6076237281421811}