# __Predicting cell biological response__

In [75]:
import wandb
import pandas as pd
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import logging

#### Run other .ipybn files

In [76]:
%run NN_model.ipynb
%run NN_dataset.ipynb #later import as .py

         D1        D2    D3   D4        D5        D6        D7        D8  \
0  0.000000  0.497009  0.10  0.0  0.132956  0.678031  0.273166  0.585445   
1  0.366667  0.606291  0.05  0.0  0.111209  0.803455  0.106105  0.411754   
2  0.033300  0.480124  0.00  0.0  0.209791  0.610350  0.356453  0.517720   
3  0.000000  0.538825  0.00  0.5  0.196344  0.724230  0.235606  0.288764   
4  0.100000  0.517794  0.00  0.0  0.494734  0.781422  0.154361  0.303809   

         D9       D10  ...  D1768  D1769  D1770  D1771  D1772  D1773  D1774  \
0  0.743663  0.243144  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
1  0.836582  0.106480  ...    1.0    1.0    1.0    0.0    1.0    0.0    0.0   
2  0.679051  0.352308  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
3  0.805110  0.208989  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
4  0.812646  0.125177  ...    0.0    0.0    0.0    0.0    0.0    0.0    0.0   

   D1775  D1776  target  
0    0.0    0.0       1  
1    1.0    0.0 

#### Dataset representation for NN model

In [77]:
class CellDataset(Dataset):
    def __init__(self, data: pd.DataFrame, normalize:bool):
        # save predictros as DataFrame
        self.cell_descriptors = data.drop(columns=['target'])

        # Normalize target value to [-1, 1]
        res = data['target']
        if normalize:
            res = res.apply(lambda x: -1 if x == 0 else 1)

        # save target as DataFrame
        self.cell_response = res.astype('float64').to_frame()

    def __len__(self):
        return len(self.cell_descriptors)

    def __getitem__(self, idx):
        desc = self.cell_descriptors.iloc[idx]
        res = self.cell_response.iloc[idx]
        return desc.values, res.values
    
    def get_input_size(self):
        return self.cell_descriptors.shape[1]

#### Load Dataset a Initialize NN model

In [78]:
current_data = data

# Split dataset -> 20% testing, 80% training
# Stratified split = each dataset has equal amounts of each class (saved in column 'target')
train, test = train_test_split(current_data, test_size = 0.2, stratify = current_data['target']) 

# Initlize dataset for NN
config.activation_fn = "sigmoid"
train_data = CellDataset(train, normalize_dataset())
test_data = CellDataset(test, normalize_dataset())

# Create NN and training class
mlp = MultiLayerPerceptron(train_data.get_input_size())
trainer = Trainer(config, mlp)

# Load dataset
trainer.load_dataset(train_data, test_data)

Using cpu device for training


## Wandb Project

In [74]:
wandb.login()

%env "WANDB_NOTEBOOK_NAME" "NN-z1"

myrun = wandb.init(
    entity = "matus13579",  #dont change
    project = "NN-z1",      #dont change
    name = "test_run"       #run name
    # id =                  #define run with ID (used for resuming)
    # resume = True         #resume run
    )

#%%wandb                    #show graphs in Jupyter Notebook

logger = logging.getLogger("wandb")
logger.setLevel(logging.ERROR)



env: "WANDB_NOTEBOOK_NAME"="NN-z1"


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.5419


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

### Training loop

In [79]:
no_epochs = 20

for epoch in range(no_epochs):
    print (f"Epoch {epoch}")

    # Train model
    trainer.train()

    # Get metrics
    metrics = trainer.evaluate()

    #print (f"LOSS = {metrics.total_loss}") 
    #print (f"Accuracy = {metrics.accuracy}")
    #print (f"Precision = {metrics.precision}")
    #print (f"Recall = {metrics.recall}")
    #print (f"F1 score = {metrics.f1_score}")
    #print(" ")

    myrun.log({"loss": metrics.total_loss})
    myrun.log({"accuracy": metrics.accuracy})
    myrun.log({"f1_score": metrics.accuracy})

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19


In [80]:
# TODO Save model in wandb

# Finish run
myrun.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
f1_score,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,▂▅▆█▅▃█▆█▁▇▅█▄▄▄▄▅▆▅

0,1
accuracy,0.5419
f1_score,0.5419
loss,45.84858
