In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
import ogb
from tqdm import tqdm
import hiplot as hip
from copy import deepcopy
import datetime

In [None]:
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch.utils.data import Subset, TensorDataset

In [None]:
cwd = os.getcwd()
print(cwd)
cwd_parent = os.path.abspath(os.path.join(cwd, os.pardir))
print(cwd_parent)

sys.path.append(cwd_parent)

In [None]:
import deepadr
from deepadr.dataset import *
from deepadr.utilities import *
from deepadr.run_workflow import *
from deepadr.chemfeatures import *
from deepadr.hyphelperflat import *
from deepadr.model_gnn_ogb import GNN, DeepAdr_SiameseTrf, ExpressionNN
from ogb.graphproppred import Evaluator

In [None]:
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw

In [None]:
rawdata_dir = '../data/raw/'
processed_dir = '../data/processed/'
up_dir = '..'

In [None]:
n_gpu = torch.cuda.device_count()
n_gpu

In [None]:
device_cpu = get_device(to_gpu=False)
# device_gpu = get_device(True, index=0)

In [None]:
print("torch:", torch.__version__)
print("CUDA:", torch.version.cuda)
print(sys.version)

### Preparing dataset 

In [None]:
# options: 
# 'total_thresh' + 4,3,2
# 'loewe_thresh', 'hsa_thresh', 'bliss_thresh', 'zip_thresh' + 1

score = 'zip_thresh'
score_val = 1

In [None]:
DSdataset_name = f'DrugComb_{score}_{score_val}'

data_fname = 'data_v4' # v2 for baseline models, v3 for additive samples

In [None]:
targetdata_dir = create_directory(os.path.join(processed_dir, DSdataset_name, data_fname))
targetdata_dir_raw = create_directory(os.path.join(targetdata_dir, "raw"))
targetdata_dir_processed = create_directory(os.path.join(targetdata_dir, "processed"))
targetdata_dir_exp = create_directory(os.path.join(targetdata_dir, "experiments"))
# # ReaderWriter.dump_data(dpartitions, os.path.join(targetdata_dir, 'data_partitions.pkl'))
print(targetdata_dir)

### Hyper Parameters

In [None]:
time_stamp = "2022-09-14_17-49-43"

In [None]:
import glob
exp_dirs = glob.glob(targetdata_dir_exp+"/fold_*_"+time_stamp)
len(exp_dirs)

In [None]:
folds = []

for edir in exp_dirs:
    fold = edir.split("/")[-1].split('_')[1]
    df_curves = pd.read_csv(edir + "/curves.csv")
    df_curves["fold"] = [fold] * len(df_curves)
    folds.append(df_curves)

In [None]:
df_folds = pd.concat(folds)
df_folds

In [None]:
df_folds_epoch = df_folds.groupby("epoch")[["test_aupr", "test_auc"]].mean()
df_folds_epoch["Fscore"] = df_folds_epoch.apply(lambda x: (2*x[0]*x[1])/(x[0]+x[1]+1e-7), axis=1) # harmonic mean of AUC, AUPR
df_folds_epoch

In [None]:
best_epoch = df_folds_epoch.Fscore.idxmax()
df_folds_epoch.loc[best_epoch]

### Predictions (Best Epoch)

In [None]:
print("best epoch:", best_epoch)

l_pred = []

for edir in exp_dirs:
    predictions = pd.read_csv(edir + f"/predictions/epoch_{best_epoch}_predictions_test.csv")
    l_pred.append(predictions)

In [None]:
df_pred = pd.concat(l_pred, axis=0).astype({"id":int}).set_index("id")
df_pred

### DATA

In [None]:
data_pairs = ReaderWriter.read_data(os.path.join(targetdata_dir_raw, 'data_pairs.pkl'))
data_pairs

In [None]:
if (df_attn is not None):
    df_all = pd.concat([data_pairs, df_attn, df_pred], axis=1)
else: 
    df_all = pd.concat([data_pairs, df_pred], axis=1)
    
df_all=df_all.astype({"true_class":int})
df_all

In [None]:
dir_fold_all = create_directory(os.path.join(targetdata_dir_exp, "all_fold_" + time_stamp))
print(dir_fold_all)

In [None]:
ReaderWriter.dump_data(df_all, os.path.join(dir_fold_all, f'drugcomb_attn_predictions_{score}_{score_val}.pkl'))