In [21]:
import rdkit
import os
import copy
import numpy as np
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import pandas as pd
from sklearn.utils import class_weight
from modules.utils import prepare_data
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import classification_report
from modules.viz import conf_mat
from modules.utils import load_cv
from modules.model_utils import EarlyStopper
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, average_precision_score
from sklearn.model_selection import RandomizedSearchCV
import itertools

rand_seed = 42
moa_dict = {'PI3K' : 0, 'p38 MAPK': 1, 'RAF': 2, 'AURK': 3, 'CDK': 4, 'EGFR': 5, 'ROCK': 6,
             'MEK': 7, 'GSK': 8, 'mTOR': 9}

Following source code contained here:

    https://github.com/pharmbio/CP-Chem-MoA/blob/main/Compound_structure_based_models/MLP.ipynb
    

# Static Variables:

# Load Data:

In [3]:
ki_ibp = pd.read_csv('data/ibp/ki_ibp.csv')
print(ki_ibp.shape)
ki_ibp.head(2)

(635, 4778)


Unnamed: 0,Metadata_Source,Metadata_Plate,Metadata_Well,Cells_AreaShape_Area,Cells_AreaShape_BoundingBoxArea,Cells_AreaShape_BoundingBoxMaximum_X,Cells_AreaShape_BoundingBoxMaximum_Y,Cells_AreaShape_BoundingBoxMinimum_X,Cells_AreaShape_BoundingBoxMinimum_Y,Cells_AreaShape_Center_X,...,smiles,clinical_phase,moa_src,Metadata_JCP2022,Metadata_InChIKey,Metadata_PlateType,blur_score,sat_score,focus_score,comp_score
0,source_3,JCPQC023,G14,3227.817708,5310.328125,589.21875,541.552083,519.484375,471.942708,553.446757,...,Nc1cc(c(cn1)-c1cc(nc(n1)N1CCOCC1)N1CCOCC1)C(F)...,Phase 3,dr_hub,JCP2022_013856,CWHUFRVAEUJCEF-UHFFFAOYSA-N,TARGET2,0.430742,0.453621,0.517562,1.401925
1,source_4,BR00121424,G14,4255.3,7338.3,572.7,554.5,488.42,470.49,530.26,...,Nc1cc(c(cn1)-c1cc(nc(n1)N1CCOCC1)N1CCOCC1)C(F)...,Phase 3,dr_hub,JCP2022_013856,CWHUFRVAEUJCEF-UHFFFAOYSA-N,TARGET2,0.436727,0.144924,0.386009,0.967661


In [4]:
# Load CV split data:
folds = 5
cv_path = 'data/cv_val_split/'
cv_data = load_cv(cv_path, folds, ki_ibp, moa_dict, norm=None)

# Convert to Fingerprints:

## Morgan Fingerprints:

In [5]:
# change SMILES to Morgan Fingerprints 
def morgan_fprints(smiles):
    molecules = Chem.MolFromSmiles(smiles) 
    fingerprints = AllChem.GetMorganFingerprintAsBitVect(molecules, 2)
    x_array = []
    arrays = np.zeros(0,)
    DataStructs.ConvertToNumpyArray(fingerprints, arrays)
    x_array.append(arrays)
    x_array = np.asarray(x_array)
    x_array = ((np.squeeze(x_array)).astype(int))
    
    return x_array

## Convert Fingerprints:

In [6]:
def convert_fprints(smiles, fprint_type):
    # Convert to cannonical smiles
    can_smiles = [Chem.MolToSmiles(Chem.MolFromSmiles(smi), True) for smi in smiles]
    
    # Convert canonical smiles to fingerprints:
    fprints = np.zeros((len(can_smiles), 2048), dtype = np.float32)
    
    for f in range(fprints.shape[0]):
        fprints[f] = morgan_fprints(can_smiles[f])

    return torch.tensor(fprints)

# Create Class Weights:

In [7]:
def get_class_weights(y_train):
    # List of unique y_values:
    y_unique = np.unique(np.array(y_train))

    # Computing class weights based on data:
    class_weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = y_unique,
                    y = np.array(y_train)) 

    # Create a dictionary of weights:
    class_weights_dict = dict(enumerate(class_weights))
    
    return class_weights_dict

# Create Model:
Adapted the code to pytorch from its original Tensorflow format.

Source: https://github.com/pharmbio/CP-Chem-MoA/blob/main/Compound_structure_based_models/MLP.ipynb

In [8]:
class MlpModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout):
        super(MlpModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.layer2(out)
        out = self.softmax(out)
        return out

# Model Training:

### Metric Function(s):

In [9]:
# Define the metrics (accuracy)
def calc_acc(output, target):
    _, predicted = torch.max(output, 1)
    correct = (predicted == target).sum().item()
    total = target.size(0)
    return (correct / total)*100

### Training Loop:

In [10]:
def train_mlp(model, train_loader, valid_loader, criterion, optimizer, num_epochs, 
          checkpoint_mlp, reduce_lr_loss, early_stopper, verbose, criteria):
    # Setting initial minimum val. loss to infinity:
    min_val_loss = np.Inf
    max_val_acc = 0
    
    for epoch in range(num_epochs):
        train_loss_epoch = 0
        train_acc_epoch = 0
        val_loss_epoch = 0
        val_acc_epoch = 0
        
        # ---- Set model to TRAINING mode ---- #
        model.train()
        
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            y_pred_batch = model(inputs)
            train_batch_loss = criterion(y_pred_batch, targets.to(torch.int64))
            train_batch_acc = calc_acc(y_pred_batch, targets.to(torch.int64))
            train_batch_loss.backward()
            optimizer.step()
            train_loss_epoch += train_batch_loss.item()
            train_acc_epoch += train_batch_acc
        
        model.eval()
        valid_loss = 0.0
        valid_acc = 0.0
        
        with torch.no_grad():
            for inputs, targets in valid_loader:
                val_pred_batch = model(inputs)
                val_batch_loss = criterion(val_pred_batch, targets.to(torch.int64))
                val_batch_acc = calc_acc(val_pred_batch, targets.to(torch.int64))
                
                val_loss_epoch += val_batch_loss.item()
                val_acc_epoch += val_batch_acc
        
        # ---------------------
        #  Log Results
        # ---------------------
        avg_train_loss = train_loss_epoch / len(train_loader)
        avg_train_acc = train_acc_epoch / len(train_loader)
        avg_val_loss = val_loss_epoch / len(valid_loader)
        avg_val_acc = val_acc_epoch / len(valid_loader)
        
        if verbose == 1:
            print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} |'
                  f'Train Acc: {avg_train_acc:.2f} | Valid Loss: {avg_val_loss:.4f} | '
                  f'Valid Acc: {avg_val_acc:.2f}')

        # Check if the current validation loss/accuracy is the best so far
        if criteria == 'loss':
            if avg_val_loss < min_val_loss:
                min_val_loss = avg_val_loss
                torch.save(model.state_dict(), checkpoint_mlp['file_path'])
        elif criteria == 'acc':
            if avg_val_acc > max_val_acc:
                max_val_acc = avg_val_acc
                torch.save(model.state_dict(), checkpoint_mlp['file_path'])

        # Adjust learning rate using the learning rate scheduler
        reduce_lr_loss.step(avg_val_loss)
        
        # Break loop if 
        if early_stopper.early_stop(avg_val_loss):
            print("Training stopped early due to early stopper.")
            break
    
    if criteria == 'loss':
        print('Training completed...\nLowest Val Loss: %s.' % min_val_loss)
        return min_val_loss
    elif criteria == 'acc':
        print('Training completed...\nHighest Val Acc.: %s.' % max_val_acc)
        return max_val_acc

# Create Dataset and Dataloaders:

In [11]:
class MyDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        x = self.data[index]
        y = self.labels[index]
        return x, y

# Train Model:

In [156]:
def summary_stats(df):
    """
    Return performance stats for the model
    """
    if 'actual' in df.columns:
        df.rename(columns={'actual': 'actuals'}, inplace=True)
        
    acc = accuracy_score(df.actuals, df.preds)
    f1 = f1_score(df.actuals, df.preds, average='macro')
    precision = precision_score(df.actuals, df.preds, average='macro')
    recall = recall_score(df.actuals, df.preds, average='macro')
    
    return {'acc': acc, 'f1': f1, 'precision': precision, 'recall': recall}

## Static Variables:

In [155]:
run_dir = 'output/Struct_MLP/'
if not os.path.exists(run_dir):
        os.makedirs(run_dir)

class Args:
    def __init__(self):
        self.num_classes = 10
        self.num_epochs = 1000
        self.batch_size = 8
        self.input_size = 2048   # length of morgan fingerprints, don't change
        self.hidden_size = 64
        self.lr = 1e-3
        self.lrd_fac = 0.2
        self.lr_pat = 15
        self.lr_min = 1e-12
        self.eq_weights = False
        self.opt_wd = 1e-1
        self.dropout = 0.6
        self.early_stop = 50
        self.criteria = 'loss' # choices ['loss', 'acc']
        
args = Args()

with open('{}/args_vars.txt'.format(run_dir), 'w') as file:
    for key, value in vars(args).items():
        file.write(f"{key}: {value}\n")

## Training Loop:

In [None]:
cv_res = []
res_dicts = []

for f in range(folds):
   
    # ---------------------
    #  LOAD DATA
    # ---------------------
    # Training and validation data:
    train_meta, val_meta = cv_data[f]['train_meta'], cv_data[f]['val_meta']
    
    # Remove duplicate smiles caused by replicates:
    tm_no_dupes = train_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    vm_no_dupes = val_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    
    # Convert smiles in meta data to morgan fingerprints:
    train_x = convert_fprints(tm_no_dupes.smiles.to_list(), 'morgan')
    val_x = convert_fprints(vm_no_dupes.smiles.to_list(), 'morgan')
    
    # Return moa labels:
    y_train = np.array([moa_dict[element] for element in tm_no_dupes.moa.tolist()])
    y_val = np.array([moa_dict[element] for element in vm_no_dupes.moa.tolist()])
    
    # Return dictionary of relative class weights:
    class_weights_dict = get_class_weights(y_train)
    
    # Convert weights dict to tensor:
    class_weights = torch.tensor(list(class_weights_dict.values()), dtype=torch.float32)
    
    # ---------------------
    
    #  CREATE DATA OBJECTS
    # ---------------------
    # Create Pytorch dataset:
    train_dataset = MyDataset(train_x, y_train)
    val_dataset = MyDataset(val_x, y_val)
    
    # Create data loaders for training and validation datasets
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
    
    # ---------------------
    #  INIT. MODEL
    # ---------------------
    model = MlpModel(args.input_size, args.hidden_size, args.num_classes, args.dropout)
#     print(model)

    # Define the optimizer and loss function
    optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.opt_wd)
    if args.eq_weights:
        criterion = nn.CrossEntropyLoss(weight=class_weights)
    else:
        criterion = nn.CrossEntropyLoss()

    # Define the checkpoint path
    filepath_mlp = os.path.join(run_dir, f'CV_Fold{f}.pth')

    # Define the model checkpoint
    if args.criteria == 'loss':
        checkpoint_mlp = {
            'file_path': filepath_mlp, 'monitor': 'val_loss',
            'verbose': 0,'save_best_only': True,'mode': 'min'}
    elif args.criteria == 'acc':
        checkpoint_mlp = {
            'file_path': filepath_mlp, 'monitor': 'val_acc',
            'verbose': 0,'save_best_only': True,'mode': 'max'}

    # Define the learning rate scheduler
    reduce_lr_loss = ReduceLROnPlateau(optimizer, factor=args.lrd_fac, patience=args.lr_pat, 
                                   verbose=1, mode='min', min_lr=args.lr_min)
    
    # Define early stopping:
    early_stopper = EarlyStopper(patience=args.early_stop, min_delta=1e-5)
    
    # ---------------------
    #  TRAIN MODEL
    # ---------------------
    train_mlp(model, train_loader, valid_loader, criterion, optimizer, args.num_epochs, 
          checkpoint_mlp, reduce_lr_loss, early_stopper, verbose=1, criteria=args.criteria)
    
    # ---------------------
    #  EVALUATE MODEL
    # ---------------------
    # Load test data:
    test_meta = cv_data[f]['test_meta']
    test_no_dupes = test_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    test_x = convert_fprints(test_no_dupes.smiles.to_list(), 'morgan')
    y_test = np.array([moa_dict[element] for element in test_no_dupes.moa.tolist()])
    
    # Load Trained Model:
    eval_model = MlpModel(args.input_size, args.hidden_size, args.num_classes, args.dropout)
    eval_model.load_state_dict(torch.load(checkpoint_mlp['file_path']))

    # Evaluate Model on test set:
    eval_model.eval()
            
    with torch.no_grad():
        outputs = eval_model(test_x)
        test_acc = calc_acc(outputs, torch.tensor(y_test))
        print(f"Cpnd-level Test Accuracy = {test_acc:.2f}%")
        preds = torch.argmax(outputs, dim=1).numpy()
        
    # Saving results:
    test_proba = []
    [test_proba.append(p.detach().cpu().numpy()) for p in outputs]
    res_df = pd.DataFrame({'preds': preds, 'proba': test_proba, 'actual': y_test, 
                      'cpnd_ids': test_no_dupes.Metadata_JCP2022})
    res_df['proba'] = res_df['proba'].apply(lambda x: ', '.join(map(str, x)))
    ss_dict = summary_stats(res_df)
    
    # Log cv results:
    res_dicts.append(ss_dict)
    cv_res.append(res_df)
    cm_save_loc = os.path.join(run_dir, f"CV{f}_CM_{test_acc:.2f}.png")
    conf_mat(y_test, preds, f'Structural MLP - Fold {f}\n', cm_save_loc)

## Amalgamate Model Results (across folds):

In [27]:
# Concatenate the cross-validated results across folds:
cpnd_comb = pd.concat(cv_res, axis=0).reset_index(drop=True)
cpnd_comb.head()

Unnamed: 0,preds,proba,actuals,cpnd_ids
0,0,"0.7228253, 0.11624495, 0.06420749, 0.024424778...",2,JCP2022_006029
1,1,"0.2522545, 0.3364648, 0.022225024, 0.08587418,...",1,JCP2022_009919
2,5,"0.0035663005, 0.0069937008, 0.009755337, 0.335...",5,JCP2022_002910
3,1,"0.039721202, 0.61459315, 0.02256958, 0.0512067...",1,JCP2022_023860
4,4,"0.03488773, 0.08748526, 0.037528146, 0.1476160...",4,JCP2022_032771


In [34]:
def string_to_float_list(s):
    return [float(val) for val in s.split(',')]

# Convert probability column back into floats:
cpnd_comb['proba'] = cpnd_comb['proba'].apply(string_to_float_list)

In [35]:
# Calculate performance metrics at a compound-level:
acc = accuracy_score(cpnd_comb.actuals, cpnd_comb.preds)
f1 = f1_score(cpnd_comb.actuals, cpnd_comb.preds, average='macro')
precision = precision_score(cpnd_comb.actuals, cpnd_comb.preds, average='macro', zero_division=0)
recall = recall_score(cpnd_comb.actuals, cpnd_comb.preds, average='macro')

# Create a prediction array from probs:
proba_arr = np.array(cpnd_comb['proba'].tolist())
roc_auc = roc_auc_score(cpnd_comb.actuals, proba_arr, average='macro', multi_class='ovr')

# Calculate AUPR for each class
aupr_scores = [average_precision_score(cpnd_comb.actuals == class_index, proba_arr[:, class_index]
                                       ) for class_index in range(proba_arr.shape[1])]
mean_aupr = np.mean(aupr_scores)

# Print metrics:
print('Accuracy: {:.2f}%'.format(acc*100))
print('F1 Score: {:.2f}%'.format(f1*100))
print('Precision: {:.2f}%'.format(precision * 100))
print('Recall: {:.2f}%'.format(recall * 100))
print('ROC AUC: {:.2f}%'.format(roc_auc * 100))
print('AUPR: {:.2f}%'.format(mean_aupr * 100))

Accuracy: 58.33%
F1 Score: 56.08%
Precision: 58.19%
Recall: 55.94%
ROC AUC: 84.34%
AUPR: 60.31%


In [163]:
conf_mat(cpnd_comb.actuals, cpnd_comb.preds, 'Struct MLP - Compounds', 
         'output/Struct_MLP/cpnd_cm.png')

In [164]:
# Save results:
cpnd_comb.to_csv('output/Struct_MLP/results_df.csv', index=False)

# XGBoost:

In [131]:
# Define model:
xgb_model = XGBClassifier()

In [151]:
for f in range(folds):
    # Training and validation data:
    train_meta, val_meta = cv_data[f]['train_meta'], cv_data[f]['val_meta']
    test_meta = cv_data[f]['test_meta']
    
    # Remove duplicate smiles caused by replicates:
    tm_no_dupes = train_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    vm_no_dupes = val_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    test_no_dupes = test_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    
    # Convert smiles in meta data to morgan fingerprints:
    train_x = convert_fprints(tm_no_dupes.smiles.to_list(), 'morgan')
    val_x = convert_fprints(vm_no_dupes.smiles.to_list(), 'morgan')
    test_x = convert_fprints(test_no_dupes.smiles.to_list(), 'morgan').numpy()

    # Combine training and val. set for XGBoost:
    training_set = torch.cat((train_x, val_x), dim=0).numpy()
    
    # Return moa labels:
    y_train = np.array([moa_dict[element] for element in tm_no_dupes.moa.tolist()])
    y_val = np.array([moa_dict[element] for element in vm_no_dupes.moa.tolist()])
    training_labels = np.hstack((y_train, y_val))
    y_test = np.array([moa_dict[element] for element in test_no_dupes.moa.tolist()])
    
    # Make a deep copy of the model and fit it to the training data:
    cv_mod = copy.deepcopy(xgb_model)
    cv_mod.fit(training_set, training_labels)

    # Make predictions on the test/validation set:
    y_pred = cv_mod.predict(test_x)
    preds = [round(value) for value in y_pred]
#         y_pred_proba = cv_mod.predict_proba(X_test) # for roc_auc

    # Calculate well-level accuracy of predictions:
    acc = accuracy_score(y_test, preds)*100
    f1 = f1_score(y_test, preds, average='macro')*100
    print(f"--- CV {f}: Accuracy {acc:.2f}%")
    print(f"--- CV {f}: F1 Score {f1:.2f}%\n")

--- CV 0: Accuracy 60.00%
--- CV 0: F1 Score 54.38%

--- CV 1: Accuracy 57.89%
--- CV 1: F1 Score 40.22%

--- CV 2: Accuracy 47.37%
--- CV 2: F1 Score 32.38%

--- CV 3: Accuracy 36.84%
--- CV 3: F1 Score 40.36%

--- CV 4: Accuracy 47.37%
--- CV 4: F1 Score 33.71%



## Random Search for Parameters:

In [152]:
rs_params = {'max_depth': [3, 5, 6, 10, 15, 20],
           'learning_rate': [0.01, 0.1, 0.2, 0.3],
           'subsample': np.arange(0.5, 1.0, 0.1),
           'colsample_bytree': np.arange(0.4, 1.0, 0.1),
           'colsample_bylevel': np.arange(0.4, 1.0, 0.1),
           'n_estimators': np.arange(50, 1000, 200)}

In [158]:
for f in range(folds):
    # Define model and search:
    rand_search_XGB = XGBClassifier(random_state=rand_seed) 
    
    # Search across 100 different combinations, and use all available cores
    xgb_random = RandomizedSearchCV(estimator=rand_search_XGB, param_distributions=rs_params, 
                               n_iter=50, verbose=1, random_state=rand_seed, cv=3)
    
    # Training and validation data:
    train_meta, val_meta = cv_data[f]['train_meta'], cv_data[f]['val_meta']
    
    # Remove duplicate smiles caused by replicates:
    tm_no_dupes = train_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    vm_no_dupes = val_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    # Convert smiles in meta data to morgan fingerprints:
    train_x = convert_fprints(tm_no_dupes.smiles.to_list(), 'morgan')
    val_x = convert_fprints(vm_no_dupes.smiles.to_list(), 'morgan')
    # Combine training and val. set for XGBoost:
    training_set = torch.cat((train_x, val_x), dim=0).numpy()
    
    # Return moa labels:
    y_train = np.array([moa_dict[element] for element in tm_no_dupes.moa.tolist()])
    y_val = np.array([moa_dict[element] for element in vm_no_dupes.moa.tolist()])
    training_labels = np.hstack((y_train, y_val))
    
    # Fit Random Search Model:
    xgb_random.fit(training_set, training_labels)
    
    print("Best CV Accuracy: ", xgb_random.best_score_)
    print("Best Parameters: ", xgb_random.best_params_)
    print("--------------------------------------------")

Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best CV Accuracy:  0.5117948717948718
Best Parameters:  {'subsample': 0.8999999999999999, 'n_estimators': 450, 'max_depth': 5, 'learning_rate': 0.1, 'colsample_bytree': 0.4, 'colsample_bylevel': 0.4}
--------------------------------------------
Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best CV Accuracy:  0.4682051282051282
Best Parameters:  {'subsample': 0.7999999999999999, 'n_estimators': 250, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 0.6, 'colsample_bylevel': 0.6}
--------------------------------------------
Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best CV Accuracy:  0.4794871794871794
Best Parameters:  {'subsample': 0.8999999999999999, 'n_estimators': 850, 'max_depth': 10, 'learning_rate': 0.01, 'colsample_bytree': 0.7, 'colsample_bylevel': 0.4}
--------------------------------------------
Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best CV Accura

## Fit Model with Opt. Hyperparameters:

In [8]:
# Define hyp. searched model:
xgb_model = XGBClassifier(subsample=0.9, n_estimators=450, max_depth=5, 
                          learning_rate=0.01, colsample_bytree=0.5, colsample_bylevel=0.4)

In [25]:
fold_preds = []
fold_actuals = []
fold_probas = []

for f in range(folds):
    # Training and validation data:
    train_meta, val_meta = cv_data[f]['train_meta'], cv_data[f]['val_meta']
    test_meta = cv_data[f]['test_meta']
    
    # Remove duplicate smiles caused by replicates:
    tm_no_dupes = train_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    vm_no_dupes = val_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    test_no_dupes = test_meta.drop_duplicates(['smiles']).reset_index(drop=True)
    
    # Convert smiles in meta data to morgan fingerprints:
    train_x = convert_fprints(tm_no_dupes.smiles.to_list(), 'morgan')
    val_x = convert_fprints(vm_no_dupes.smiles.to_list(), 'morgan')
    test_x = convert_fprints(test_no_dupes.smiles.to_list(), 'morgan').numpy()

    # Combine training and val. set for XGBoost:
    training_set = torch.cat((train_x, val_x), dim=0).numpy()
    
    # Return moa labels:
    y_train = np.array([moa_dict[element] for element in tm_no_dupes.moa.tolist()])
    y_val = np.array([moa_dict[element] for element in vm_no_dupes.moa.tolist()])
    training_labels = np.hstack((y_train, y_val))
    y_test = np.array([moa_dict[element] for element in test_no_dupes.moa.tolist()])
    
    # Make a deep copy of the model and fit it to the training data:
    cv_mod = copy.deepcopy(xgb_model)
    cv_mod.fit(training_set, training_labels)

    # Make predictions on the test/validation set:
    y_pred = cv_mod.predict(test_x)
    preds = [round(value) for value in y_pred]
    y_pred_proba = cv_mod.predict_proba(test_x) # for roc_auc
    
    # Update lists outside loop:
    fold_preds.append(preds)
    fold_actuals.append(y_test)
    fold_probas.append(y_pred_proba)
    
# Concatenate the arrays within the results lists:
pred_arr = np.concatenate(fold_preds)
act_arr = np.concatenate(fold_actuals)
proba_arr = np.concatenate(fold_probas)

# Calculate performance metrics at a compound-level:
acc = accuracy_score(act_arr, pred_arr)
f1 = f1_score(act_arr, pred_arr, average='macro')
precision = precision_score(act_arr, pred_arr, average='macro', zero_division=0)
recall = recall_score(act_arr, pred_arr, average='macro')
roc_auc = roc_auc_score(act_arr, proba_arr, average='macro', multi_class='ovr')

# Calculate AUPR for each class
aupr_scores = [average_precision_score(act_arr == class_index, proba_arr[:, class_index]
                                       ) for class_index in range(proba_arr.shape[1])]
mean_aupr = np.mean(aupr_scores)

# Print metrics:
print('Accuracy: {:.2f}%'.format(acc*100))
print('F1 Score: {:.2f}%'.format(f1*100))
print('Precision: {:.2f}%'.format(precision * 100))
print('Recall: {:.2f}%'.format(recall * 100))
print('ROC AUC: {:.2f}%'.format(roc_auc * 100))
print('AUPR: {:.2f}%'.format(mean_aupr * 100))

Accuracy: 55.21%
F1 Score: 48.45%
Precision: 49.58%
Recall: 49.68%
ROC AUC: 83.25%
AUPR: 55.90%
