# __Predicting cell biological response__

In [None]:
import wandb
import pandas as pd
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import logging

#### Run other .ipybn files

In [None]:
%run NN_model.ipynb
#from NN_model import *

%run NN_dataset.ipynb

#### Dataset representation for NN model

In [None]:
class CellDataset(Dataset):
    def __init__(self, data: pd.DataFrame, normalize: bool):
        # Save predictors as DataFrame
        self.cell_descriptors = data.drop(columns=['target'])
        res = data['target']

        # Save target as DataFrame
        self.cell_response = res.astype('float64').to_frame()

    def __len__(self):
        return len(self.cell_descriptors)

    def __getitem__(self, idx):
        desc = self.cell_descriptors.iloc[idx]
        res = self.cell_response.iloc[idx]
        return desc.values, res.values
    
    def get_input_size(self):
        return self.cell_descriptors.shape[1]

## Wandb Project

In [None]:
wandb.login()

%env "WANDB_NOTEBOOK_NAME" "NN"

"""
run = wandb.init(
    entity = "matus13579",  #dont change
    project = "NN-z1",      #dont change
    name = "relu_test"      #run name
    #settings=wandb.Settings(start_method="fork")
    #id =                   #define run with ID (used for resuming)
    #resume = True           #resume run
    )
"""

#show graphs in Jupyter Notebook
#%%wandb                   

logger = logging.getLogger("wandb")
logger.setLevel(logging.ERROR)

## Experiments

#### Load Dataset and Initialize NN model

In [None]:
def setup_experiment(current_data):
    # Split dataset -> 20% testing, 80% training
    # Stratified split = each dataset has equal amounts of each class (saved in column 'target')
    train, test = train_test_split(current_data, test_size = 0.2, stratify = current_data['target']) 

    # Initlize dataset for NN
    train_data = CellDataset(train, False)
    test_data = CellDataset(test, False)

    # Create NN and training class
    mlp = MultiLayerPerceptron(train_data.get_input_size())
    trainer = Trainer(config, mlp)

    # Load dataset
    trainer.load_dataset(train_data, test_data)

    return trainer

### Training loop

In [None]:
def run_experiment(run_name: str, trainer: Trainer, no_epochs):

    my_run = wandb.init(
    entity = "matus13579", 
    project = "NN-z1",     
    name = run_name    
    )

    best_model = None
    best_accuracy = None

    for epoch in range(no_epochs):
        # Train model
        trainer.train()

        # Get metrics
        metrics = trainer.evaluate()

        # Calculate loss for epoch
        loss_tr, loss_val = trainer.mean_loss()
        
        print (f"Epoch {epoch}")
        print (f"loss_training: {loss_tr} | loss_validate: {loss_val}")

        # Save metrics to wandb
        my_run.log({"loss_training": loss_tr})
        my_run.log({"loss_validate": loss_val})
        my_run.log({"accuracy": metrics.accuracy})
        my_run.log({"f1_score": metrics.f1_score})
        
        if (best_accuracy is None) or (best_accuracy < metrics.accuracy):
            best_accuracy = metrics.accuracy
            best_model = trainer.model

    my_run.finish()
    return best_model

In [None]:
data = pd.read_csv('bioresponse.csv')

## Experiment 1 

In [None]:
# Dataset with 50 best descriptors
current_data = tree_based_merged(data, 50)

NO_epochs = 50

####  Sigmoid

In [None]:
config.activation_fn = "sigmoid"

current_trainer = setup_experiment(current_data)
run_experiment("sig", current_trainer, NO_epochs)

#### Hyperbolic Tangent

In [None]:
config.activation_fn = "tanh"

current_trainer = setup_experiment(current_data)
run_experiment("tanh", current_trainer, NO_epochs)

#### SoftSign

In [None]:
config.activation_fn = "softsign"

current_trainer = setup_experiment(current_data)
run_experiment("softsign", current_trainer, NO_epochs)

#### ReLU

In [None]:
config.activation_fn = "relu"

current_trainer = setup_experiment(current_data)
run_experiment("relu", current_trainer, NO_epochs)

## Experiment 2

In [None]:
# Dataset with 50 best descriptors
current_data = tree_based_merged(data, 50)

NO_epochs = 500

####  Sigmoid

In [None]:
config.activation_fn = "sigmoid"
config.normalized_weight_init = True

current_trainer = setup_experiment(current_data)
run_experiment("sig-N", current_trainer, NO_epochs)

#### Hyperbolic Tangent

In [None]:
config.activation_fn = "tanh"
config.normalized_weight_init = True

current_trainer = setup_experiment(current_data)
run_experiment("tanh-N", current_trainer, NO_epochs)

#### SoftSign

In [None]:
config.activation_fn = "softsign"
config.normalized_weight_init = True

current_trainer = setup_experiment(current_data)
run_experiment("softsign-N", current_trainer, NO_epochs)

## Experiment 3

In [None]:
config.activation_fn = "relu"
config.normalized_weight_init = True

NO_epochs = 500

In [None]:
dataset_1 = correlation_selection_merged(data)

current_trainer = setup_experiment(dataset_1)
run_experiment("linear_corr", dataset_1, NO_epochs)

In [None]:
dataset_2 = tree_based_merged(data, 50)

current_trainer = setup_experiment(dataset_2)
run_experiment("tree_select", dataset_2, NO_epochs)

In [None]:
dataset_3 = pca(data, n_components = 25)

current_trainer = setup_experiment(dataset_3)
run_experiment("pca", dataset_3, NO_epochs)