In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import numpy as np
from torch.utils.data import Dataset, DataLoader
import glob
import wandb
import os
import torch.optim as optimizers

In [None]:
import dfs_code
from torch_geometric.data import InMemoryDataset, Data
import pickle
import torch
import torch.nn as nn
import random
import tqdm
import copy
from chemprop.args import TrainArgs, PredictArgs
from chemprop.train import cross_validate, run_training, make_predictions
from chemprop.data import get_data
import pandas as pd
from sklearn.metrics import roc_auc_score, average_precision_score

In [None]:
wandb.init(project='moleculenet10', entity='chrisxx')
config = wandb.config
config.dataset = 'bbbp' # supported 'bbbp', 'clintox', 'hiv', 'tox21'
config.model_dir = '../../models/moleculenet/chemprop_mpnn/%s/'%config.dataset
config.data_dir = '../../datasets/mymoleculenet/%s/'%config.dataset
config.seed = 123

In [None]:
random.seed(config.seed)
torch.manual_seed(config.seed)
np.random.seed(config.seed)

In [None]:
rocs = []
prcs = []
for rep in range(10):
    train_args = [
        '--data_path', '%s%d/train.csv'%(config.data_dir, rep),
        '--separate_val_path', '%s%d/valid.csv'%(config.data_dir, rep),
        '--separate_test_path', '%s%d/test.csv'%(config.data_dir, rep),
        '--dataset_type', 'classification',
        '--save_dir', '%s%d/'%(config.model_dir, rep),
        '--num_folds', '1',
        '--smiles_columns', 'smiles',
        '--target_columns', 'target',
        '--no_cuda'
    ]
    
    os.makedirs("%s%d"%(config.model_dir, rep), exist_ok=True)
    score = cross_validate(args=TrainArgs().parse_args(train_args), train_func=run_training)
    
    test_args = [
            '--test_path', '%s%d/test.csv'%(config.data_dir, rep),
            '--preds_path', '%s%d/pred.csv'%(config.model_dir, rep),
            '--checkpoint_dir', '%s%d/'%(config.model_dir, rep),
            '--num_workers', '0',
            '--smiles_columns', 'smiles',
            '--no_cuda'
        ]
    preds = make_predictions(args=PredictArgs().parse_args(test_args))
    preds = np.asarray(preds)[:, 0]
    gt = pd.read_csv('%s%d/test.csv'%(config.data_dir, rep))
    roc = roc_auc_score(gt['target'].to_numpy(), preds)
    prc = average_precision_score(gt['target'].to_numpy(), preds)
    print(roc, prc)
    wandb.log({'roc_test_avg20':roc, 'prc_test_avg20':prc})
    rocs += [roc]
    prcs += [prc]
wandb.log({'roc_test_avgavg': np.mean(rocs),
           'roc_test_avgstd': np.std(rocs),
           'prc_test_avgavg': np.mean(prcs),
           'prc_test_avgstd': np.std(prcs)})
wandb.run.summary["roc_test_mean"] = np.mean(rocs) 
wandb.run.summary["roc_test_std"] = np.std(rocs)
wandb.run.summary["prc_test_mean"] = np.mean(prcs)
wandb.run.summary["prc_test_std"] = np.std(prcs)    