# __Predicting cell biological response__

In [1]:
import wandb
import pandas as pd
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import logging

#### Run other .ipybn files

In [25]:
%run NN_model.ipynb
#from NN_model import *

%run NN_dataset.ipynb

         D1        D2    D3   D4        D5        D6        D7        D8  \
0  0.000000  0.497009  0.10  0.0  0.132956  0.678031  0.273166  0.585445   
1  0.366667  0.606291  0.05  0.0  0.111209  0.803455  0.106105  0.411754   
2  0.033300  0.480124  0.00  0.0  0.209791  0.610350  0.356453  0.517720   
3  0.000000  0.538825  0.00  0.5  0.196344  0.724230  0.235606  0.288764   
4  0.100000  0.517794  0.00  0.0  0.494734  0.781422  0.154361  0.303809   

         D9       D10  ...  D1768  D1769  D1770  D1771  D1772  D1773  D1774  \
0  0.743663  0.243144  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
1  0.836582  0.106480  ...    1.0    1.0    1.0    0.0    1.0    0.0    0.0   
2  0.679051  0.352308  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
3  0.805110  0.208989  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
4  0.812646  0.125177  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   

   D1775  D1776  target  
0    0.0    0.0       1  
1    1.0    0.0 

#### Dataset representation for NN model

In [3]:
class CellDataset(Dataset):
    def __init__(self, data: pd.DataFrame, normalize: bool):
        # Save predictors as DataFrame
        self.cell_descriptors = data.drop(columns=['target'])
        res = data['target']

        #if normalize:
            #res = res.apply(lambda x: -1 if x == 0 else 1)

        # Save target as DataFrame
        self.cell_response = res.astype('float64').to_frame()

    def __len__(self):
        return len(self.cell_descriptors)

    def __getitem__(self, idx):
        desc = self.cell_descriptors.iloc[idx]
        res = self.cell_response.iloc[idx]
        return desc.values, res.values
    
    def get_input_size(self):
        return self.cell_descriptors.shape[1]

## Wandb Project

In [22]:
wandb.login()

%env "WANDB_NOTEBOOK_NAME" "NN"

run = wandb.init(
    entity = "matus13579",  #dont change
    project = "NN-z1",      #dont change
    name = "relu_test"      #run name
    #settings=wandb.Settings(start_method="fork")
    #id =                   #define run with ID (used for resuming)
    #resume = True           #resume run
    )

#show graphs in Jupyter Notebook
#%%wandb                   

logger = logging.getLogger("wandb")
logger.setLevel(logging.ERROR)

env: "WANDB_NOTEBOOK_NAME"="NN"


## Experiments

In [5]:
data = pd.read_csv('bioresponse.csv')

#### Load Dataset and Initialize NN model

In [6]:
def setup_experiment(current_data):
    # Split dataset -> 20% testing, 80% training
    # Stratified split = each dataset has equal amounts of each class (saved in column 'target')
    train, test = train_test_split(current_data, test_size = 0.2, stratify = current_data['target']) 

    # Initlize dataset for NN
    train_data = CellDataset(train, False)
    test_data = CellDataset(test, False)

    # Create NN and training class
    mlp = MultiLayerPerceptron(train_data.get_input_size())
    trainer = Trainer(config, mlp)

    # Load dataset
    trainer.load_dataset(train_data, test_data)

    return trainer

### Training loop

In [7]:
def run_experiment(myrun, trainer: Trainer, no_epochs):
    best_model = None
    best_accuracy = None

    for epoch in range(no_epochs):
        # Train model
        trainer.train()

        # Get metrics
        metrics = trainer.evaluate()

        loss_tr, loss_val = trainer.mean_loss()
        
        print (f"Epoch {epoch}")
        print (f"loss_training: {loss_tr} | loss_validate: {loss_val}")

        myrun.log({"loss_training": loss_tr})
        myrun.log({"loss_validate": loss_val})
        myrun.log({"accuracy": metrics.accuracy})
        myrun.log({"f1_score": metrics.accuracy})
        
        if (best_accuracy is None) or (best_accuracy > metrics.accuracy):
            best_accuracy = metrics.accuracy
            best_model = trainer.model

    run.finish()

#### Experiment 1

In [8]:
current_data = pca(data)

config.activation_fn = "sigmoid"

In [23]:
current_trainer = setup_experiment(current_data)
run_experiment(run, current_trainer, 50)

Using cpu device for training
Epoch 0
loss_training: 3.9009411334991455 | loss_validate: 1.9582806825637817


  self.precision = tp / (tp + fp)


Epoch 1
loss_training: 1.166259765625 | loss_validate: 1.3394203186035156
Epoch 2
loss_training: 0.9043787121772766 | loss_validate: 0.8710954785346985


  self.precision = tp / (tp + fp)


Epoch 3
loss_training: 0.7530996203422546 | loss_validate: 0.7403398156166077
Epoch 4
loss_training: 0.7216560244560242 | loss_validate: 0.711334228515625


  self.precision = tp / (tp + fp)


Epoch 5
loss_training: 0.7019293308258057 | loss_validate: 0.6938424706459045
Epoch 6
loss_training: 0.6928386092185974 | loss_validate: 0.6923105120658875
Epoch 7
loss_training: 0.6904366612434387 | loss_validate: 0.6907992959022522
Epoch 8
loss_training: 0.691845715045929 | loss_validate: 0.6896920800209045
Epoch 9
loss_training: 0.6896989941596985 | loss_validate: 0.689741849899292
Epoch 10
loss_training: 0.6897615790367126 | loss_validate: 0.6896136403083801
Epoch 11
loss_training: 0.6905264258384705 | loss_validate: 0.6897098422050476
Epoch 12
loss_training: 0.6905832886695862 | loss_validate: 0.6895681023597717
Epoch 13
loss_training: 0.6905013918876648 | loss_validate: 0.6896753311157227
Epoch 14
loss_training: 0.6897185444831848 | loss_validate: 0.6896201968193054
Epoch 15
loss_training: 0.6907444596290588 | loss_validate: 0.6895473003387451
Epoch 16
loss_training: 0.6899269223213196 | loss_validate: 0.6897658705711365
Epoch 17
loss_training: 0.6899938583374023 | loss_validate:

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,█▁█▁▁███████████████████████████████████
f1_score,█▁█▁▁███████████████████████████████████
loss_training,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss_validate,█▅▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.5419
f1_score,0.5419
loss_training,0.6907
loss_validate,0.68995
