<a href="https://colab.research.google.com/github/wylhtydtm/Nematode-project/blob/master/cnn_lstm_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import tables
import numpy as np
import pandas as pd
from PIL import Image
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
import time
import copy

In [None]:
class timeseries_dataset(Dataset):

    def __init__(self, hdf5_filename, which_set='train', transform=None):

        self.fname = hdf5_filename
        self.set_name = which_set
        # get labels info
        with tables.File(self.fname, 'r') as fid:
            tmp = pd.DataFrame.from_records(
                fid.get_node('/'+self.set_name)['labels'].read())
        self.label_info = tmp[['imaging_plate_drug_concentration', 'MOA_group', 'ts_id']]
        # any transform?
        self.transform = transform

    def __len__(self):
        return len(self.label_info)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        # I could just use index because ts_id is the same as the index of label_info, but just in case of shuffling...
        label_info = self.label_info.iloc[index]
        ts_id = label_info['ts_id'].astype(int)
        # read data from disk
        with tables.File(self.fname, 'r') as fid:
          timeseries_data = fid.get_node(
                '/' + self.set_name + '/tw_data')[ts_id,:,:].copy()

        if self.transform:  # if any transforms were given to initialiser
            ts = timeseries_data.astype(np.float32)
            #ts = ts.T
            ts = self.transform(ts)
            ts = ts.squeeze(0)
          
        # read labels too
        labels = label_info['MOA_group']
        labels = np.array(labels, dtype=np.float32).reshape(-1, 1)
        labels = torch.from_numpy(labels)

        return ts, labels 

In [None]:
 hd = Path('/content/drive/My Drive')
 fname = hd / 'Timeseries_testnewsignals__fillednanswifnegative1.hdf'

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 5

In [None]:
tw_transform= transforms.ToTensor()

In [None]:
train_data = timeseries_dataset(fname, which_set='train',transform=tw_transform)
val_data = timeseries_dataset(fname, which_set='val',transform=tw_transform)

In [None]:
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, num_workers=4)
val_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size, num_workers=4)


In [None]:
print(i1.shape)

n_iters = 3000
num_epochs = n_iters / (len(train_data) / batch_size)
num_epochs = int(num_epochs)

torch.Size([5, 876, 8])


In [None]:
i1.view(i1.shape[0], -1).shape
type(i1)

torch.Tensor

In [None]:
input_dim = 876
hidden_dim = 100 # number of the hidden state 
num_layers =1  # or 2
output_dim =12
learning_rate = 0.1

#input dimenson:
per_element = False
if per_element:
    lstm_input_size=1
else:
    lstm_input_size= input_size

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim,num_layers,output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.input_dim = input_dim

        #define the LSTM layer and the output layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers,batch_first=True)
        # the pytroch output when batch_first= True; ( batch_size, sequence len,, number_directions * hidden_size(feature_dim))
        self.linear = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        #Initialise our hidden state and cell state with zeros
        h0= torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        
        #876 time steps, We need to detach as we are doing truncated backpropagation through time (BPTT)
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
              
        # only take output from the final timestep
        out = self.linear(out[:, -1, :]) 
        return out

In [None]:
model = LSTM(input_dim, hidden_dim, num_layers=num_layers,output_dim=output_dim)    

loss_fn = torch. nn.CrossEntropyLoss() # if we do regression use MSE instead, classification, cross entropy loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)     

In [None]:
print(model)
print(len(list(model.parameters())))

LSTM(
  (lstm): LSTM(876, 100, batch_first=True)
  (linear): Linear(in_features=100, out_features=12, bias=True)
)
6


In [None]:
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

torch.Size([400, 876])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([12, 100])
torch.Size([12])


In [None]:
seq_dim = 8  
n_iters = 3000
iter = 0
for epoch in range(100):
    for i, (images, labels) in enumerate(train_loader):
        # Load images as a torch tensor with gradient accumulation abilities
        images = images.view(-1, seq_dim, input_size).requires_grad_()

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        outputs = model(images)
        labels = labels.view(-1) # flatten
        labels = labels.long()
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = loss_fn(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in val_loader:
                # Resize images
                images = images.view(-1, seq_dim, input_dim)

                # Forward pass only to get logits/output
                outputs = model(images)
                labels = labels.view(-1) # flatten
                labels = labels.long()

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100 * np.true_divide(correct,total)

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))