## Packages

In [None]:
import random
import numpy as np
import os
import torch
import torch.nn as nn
#from pytorch_transformers import BertModel, BertTokenizer, BertConfig, WarmupLinearSchedule 
import re
import pandas as pd 
import json
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, SubsetRandomSampler
import pickle
from sklearn import metrics
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
from tqdm import tqdm_notebook, trange

def seed_everything(seed = 42): 
  random.seed(seed) 
  os.environ['PYTHONHASHSEED'] = str(seed) 
  np.random.seed(seed)
  torch.manual_seed(seed) 
  torch.cuda.manual_seed(seed) 
  torch.backends.cudnn.deterministic = True
# For reproducible results
seed_everything()

In [None]:
import matplotlib as mpl
mpl.style.use('seaborn')

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

In [None]:
%cd /content/gdrive/My Drive/seq

In [None]:
%pwd

## Data Preprocessing

In [None]:
class MyDataset(Dataset):
    def __init__(self, X, Y):
        self.data = X
        self.target = Y
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        return x, y
    
    def __len__(self):
        return len(self.data)

### Data Encoding Type 1

In [None]:
data, label = 'data/', 'data/'

In [None]:
with open(data,'rb') as fp:
    X = pickle.load(fp)
with open(label,'rb') as fp:
    Y = pickle.load(fp)

In [None]:
X, Y = shuffle(X, Y, random_state=0)

In [None]:
Counter(Y)

In [None]:
train_X = torch.from_numpy(np.asarray(X[:42000], np.float32))
train_y = torch.from_numpy(np.asarray(Y[:42000]))
test_X = torch.from_numpy(np.asarray(X[42000:56000],np.float32))
test_y = torch.from_numpy(np.asarray(Y[42000:56000]))
valid_X = torch.from_numpy(np.asarray(X[56000:],np.float32))
valid_y = torch.from_numpy(np.asarray(Y[56000:]))

### Data Encoding Type 2

In [None]:
npzfile = np.load('data/50cut/200000peaks-InhNeuron.npz')

In [None]:
npzfile.files

In [None]:
X, y = npzfile['arr_0'], npzfile['arr_1']

In [None]:
classes = max(y) + 1

In [None]:
subX, subY = shuffle(X, y, random_state=0)

In [None]:
testX = subX[int(len(subY)*0.8):]
testY = subY[int(len(subY)*0.8):]
validX = subX[int(len(subY)*0.6):int(len(subY)*0.8)]
validY = subY[int(len(subY)*0.6):int(len(subY)*0.8)]
trainX = subX[:int(len(subY)*0.6)]
trainY = subY[:int(len(subY)*0.6)]

In [None]:
Counter(subY)

#### Normalized Data

In [None]:
cap = 30000

In [None]:
normX, normY = [], []

In [None]:
for idx, y in enumerate(subY):
  if y == 0.0 and cap > 0:
    normY.append(y)
    normX.append(subX[idx])
    cap -= 1
  else:
    if y == 0.0: continue
    normY.append(y)
    normX.append(subX[idx])

In [None]:
for idx, y in enumerate(subY):
  if y == 0 and cap > 0:
    normY.append(y)
    normX.append(subX[idx])
    cap -= 1
  else:
    if y == 0: continue
    normY.append(y)
    normX.append(subX[idx])

In [None]:
normX, normY = shuffle(normX, normY, random_state=0)

In [None]:
normX, normY = np.array(normX), np.array(normY)

In [None]:
testX = normX[int(len(normY)*0.8):]
testY = normY[int(len(normY)*0.8):]
validX = normX[int(len(normY)*0.6):int(len(normY)*0.8)]
validY = normY[int(len(normY)*0.6):int(len(normY)*0.8)]
trainX = normX[:int(len(normY)*0.6)]
trainY = normY[:int(len(normY)*0.6)]

### Convert to Torch Data

In [None]:
train_X = torch.from_numpy(trainX)
train_y = torch.from_numpy(trainY)
valid_X  = torch.from_numpy(validX)
valid_y = torch.from_numpy(validY)
test_X = torch.from_numpy(testX)
test_y = torch.from_numpy(testY)

In [None]:
train_dataset = MyDataset(train_X, train_y)
valid_dataset = MyDataset(valid_X, valid_y)
test_dataset = MyDataset(test_X, test_y)

## Helper Functions

### Save Best Model

In [None]:
def bestmodel(model_name,save_model_time,valid_loss):
    bestloss = 10000
    if valid_loss < bestloss :
        bestloss = valid_loss
        torch.save(model_name, 'model/model{save_model_time}/bestmodel.pkl'.format(save_model_time=save_model_time))
        torch.save(model_name.state_dict(), 'model/model{save_model_time}/net_params_bestmodel.pkl'.format(save_model_time=save_model_time))
    return True  

### One Hot Encoding y

In [None]:
def onehot(y):
    y_onehot = np.zeros((len(y), classes), dtype=np.float32)

    all = [i for i in range(classes)]
    for i in range(len(y)):
      y_onehot[i][all.index(y[i])] = 1

    return y_onehot

## Training and Validating

### Define Model Saving Path

CNN - 0 </br>
vgg - 1 </br>
basset - 2 </br>
deapsea - 3

In [None]:
save_model_time = '0'
mkpath = 'model/model%s'% save_model_time
# os.makedirs(mkpath)

### Trainer for Categorical Data

In [None]:
class TrainHelper():
    '''
    Helper class that makes it a bit easier and cleaner to define the training routine
    
    '''

    def __init__(self,model,train_set,test_set,opts):
      self.model = model  # neural net

      # device agnostic code snippet
      self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
      self.model.to(self.device)

      self.epochs = opts['epochs']
      self.optimizer = torch.optim.Adam(model.parameters(), opts['lr']) # optimizer method for gradient descent
      #self.optimizer = torch.optim.SGD(model.parameters(), opts['lr'])
      if opts['loss_fxn'] == 'c':
        self.criterion = torch.nn.CrossEntropyLoss()                      # loss function
      else:
        self.criterion = torch.nn.BCEWithLogitsLoss()                    # loss function used in papers

      self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                      batch_size=opts['batch_size'],
                                                      shuffle=True)
      self.valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                                      batch_size=opts['batch_size'],
                                                      shuffle=True)
    def train(self):
      self.model.train() # put model in training mode
      for epoch in range(self.epochs):
          self.tr_loss = []
          for i, (data,labels) in tqdm_notebook(enumerate(self.train_loader),
                                                  total = len(self.train_loader)):

              data, labels = data.to(self.device),labels.to(self.device)
              self.optimizer.zero_grad()  
              outputs = self.model(data)

              blabels = labels
              if opts['loss_fxn'] == 'b':
                blabels = torch.from_numpy(onehot(labels)).to(self.device)

              loss = self.criterion(outputs, blabels)                  # loss function
              loss.backward()           
              self.optimizer.step()                  
              self.tr_loss.append(loss.item())       
          if (epoch+1) % 5 == 0 or epoch == 0: # save the model every _ epoch

              torch.save(self.model, 'model/model{save_model_time}/net_{epoch}.pkl'.format(save_model_time=save_model_time,epoch=int((epoch+1)/5)))
              torch.save(self.model.state_dict(), 'model/model{save_model_time}/net_params_{epoch}.pkl'.format(save_model_time=save_model_time,epoch=int((epoch+1)/5)))
          
          self.test(epoch) # run through the validation set

    def test(self,epoch):
            
      self.model.eval()    # puts model in eval mode
      self.test_loss = []
      self.test_accuracy = []

      for i, (data, labels) in enumerate(self.valid_loader):
          
          data, labels = data.to(self.device),labels.to(self.device)
          # pass data through network
          # turn off gradient calculation to speed up calcs and reduce memory
          with torch.no_grad():
              outputs = self.model(data)

          # make our predictions and update our loss info
          _, predicted = torch.max(outputs.data, 1)

          blabels = labels
          if opts['loss_fxn'] == 'b':
              blabels = torch.from_numpy(onehot(labels)).to(self.device)

          loss = self.criterion(outputs, blabels) 

          self.test_loss.append(loss.item())
          # print(predicted)
          # print(labels)
          self.test_accuracy.append((predicted == labels).sum().item() / predicted.size(0))
      
      test_loss.append(np.mean(self.test_loss))
      train_loss.append(np.mean(self.tr_loss))
      bestmodel(self.model,save_model_time,np.mean(self.test_loss)) # find best model
      print('epoch: {}, train loss: {}, test loss: {}, test accuracy: {}'.format( 
            epoch+1, np.mean(self.tr_loss), np.mean(self.test_loss), np.mean(self.test_accuracy)))

### Trainer for Continuous Data

In [None]:
class TrainHelper():
    '''
    Helper class that makes it a bit easier and cleaner to define the training routine
    
    '''

    def __init__(self,model,train_set,test_set,opts):
      self.model = model  # neural net

      # device agnostic code snippet
      self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
      self.model.to(self.device)

      self.epochs = opts['epochs']
      self.optimizer = torch.optim.Adam(model.parameters(), opts['lr']) # optimizer method for gradient descent
      #self.optimizer = torch.optim.SGD(model.parameters(), opts['lr'])
      self.criterion = torch.nn.MSELoss()
      self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                      batch_size=opts['batch_size'],
                                                      shuffle=True)
      self.valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                                      batch_size=opts['batch_size'],
                                                      shuffle=True)
    def train(self):
      self.model.train() # put model in training mode
      for epoch in range(self.epochs):
          self.tr_loss = []
          for i, (data,labels) in tqdm_notebook(enumerate(self.train_loader),
                                                  total = len(self.train_loader)):

              data, labels = data.to(self.device),labels.to(self.device)
              self.optimizer.zero_grad()  
              outputs = self.model(data)
              labels = labels.unsqueeze(1)
              loss = self.criterion(outputs.float(), labels.float())
              loss.backward()                        
              self.optimizer.step()                  
              self.tr_loss.append(loss.item())       
          if epoch % 5 == 0: # save the model every _ epoch
              torch.save(self.model, 'model/model{save_model_time}/net_{epoch}.pkl'.format(save_model_time=save_model_time,epoch=int(epoch/5)))
              torch.save(self.model.state_dict(), 'model/model{save_model_time}/net_params_{epoch}.pkl'.format(save_model_time=save_model_time,epoch=int(epoch/5)))
          
          self.test(epoch) # run through the validation set

    def test(self,epoch):
            
      self.model.eval()    # puts model in eval mode
      self.test_loss = []
      self.test_accuracy = []

      for i, (data, labels) in enumerate(self.valid_loader):
          
          data, labels = data.to(self.device),labels.to(self.device)
          # pass data through network
          # turn off gradient calculation to speed up calcs and reduce memory
          with torch.no_grad():
              outputs = self.model(data)
          # make our predictions and update our loss info
          # _, predicted = torch.max(outputs.data, 1)
          # predicted = []
          # for o in outputs:
          #   predicted.append(o[0])
          labels = labels.unsqueeze(1)
          loss = self.criterion(outputs, labels)
          self.test_loss.append(loss.item())
      
      test_loss.append(np.mean(self.test_loss))
      train_loss.append(np.mean(self.tr_loss))    
      bestmodel(self.model,save_model_time,np.mean(self.test_loss)) # find best model
      print('epoch: {}, train loss: {}, test loss: {}'.format( 
      epoch+1, np.mean(self.tr_loss), np.mean(self.test_loss)))

## Testing

### Accuracy

Choosing test_dataset or sub_dataset

In [None]:
train_X, train_y = shuffle(train_X, train_y, random_state=0) 
train_X_sub = train_X[:2000]
train_y_sub = train_y[:2000]
sub_dataset = MyDataset(train_X_sub, train_y_sub)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=True)

In [None]:
def test_result(model, datatype):
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=True)
    if datatype == 'sub':
      test_loader = torch.utils.data.DataLoader(sub_dataset, batch_size=100, shuffle=True)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    test_accuracy = []
    for i, (data, labels) in enumerate(test_loader):
      data, labels = data.to(device),labels.to(device)
    # pass data through network
    # turn off gradient calculation to speed up calcs and reduce memory
      with torch.no_grad():
          outputs = model(data)
    # make our predictions and update our loss info
      _, predicted = torch.max(outputs.data, 1)
      test_accuracy.append((predicted == labels).sum().item() / predicted.size(0))
    if datatype == 'sub':
      print('Training accuracy', np.mean(test_accuracy))
    else:
      print('Testing accuracy', np.mean(test_accuracy))
    return np.mean(test_accuracy)

### Confusion Matrix

In [None]:
def confusion(test_data, classifier):
    M = np.zeros((classes,classes))
    predict = []
    label = []
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    for i, (data, labels) in enumerate(test_loader):
      data, labels = data.to(device),labels.to(device)
      label.extend(labels.tolist())
    # pass data through network
    # turn off gradient calculation to speed up calcs and reduce memory
      with torch.no_grad():
        outputs = classifier(data)
    # make our predictions and update our loss info
      _, predicted = torch.max(outputs.data, 1)
      predict.extend(predicted.tolist())

    tmp = [i for i in range(classes)]
    M = confusion_matrix(label, predict, labels = tmp)  
    return M

def visualize_confusion(M):
    fig = plt.figure(figsize = (10, 10))
    ax = fig.add_subplot(1, 1, 1)
    tmp = [i for i in range(classes)]
    cm = ConfusionMatrixDisplay(M, display_labels = tmp);
    cm.plot(values_format = 'd', cmap = 'Blues', ax = ax)
    # plt.xticks(rotation = 20)

### Get Values

#### Categorical

In [None]:
def get_list_cat(model):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    pred, true = [], []
    for i, (data, labels) in enumerate(test_loader):
      data, labels = data.to(device),labels.to(device)
    # pass data through network
    # turn off gradient calculation to speed up calcs and reduce memory
      with torch.no_grad():
          outputs = model(data)
    # make our predictions and update our loss info
      _, predicted = torch.max(outputs.data, 1)
      pred.extend(predicted.tolist())
      true.extend(labels.tolist())
    return true, pred

#### Continuous

In [None]:
def get_list_con(model):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    pred, true = [], []
    for i, (data, labels) in enumerate(test_loader):
      data, labels = data.to(device),labels.to(device)
    # pass data through network
    # turn off gradient calculation to speed up calcs and reduce memory
      with torch.no_grad():
          outputs = model(data)
    # make our predictions and update our loss info
      predicted = []
      for o in outputs.tolist():
        predicted.append(o[0])
      pred.extend(predicted)
      true.extend(labels.tolist())
    return true, pred

### AUC

In [None]:
def getAUC(model):
    labels, predicts = get_list_cat(model)
    score = metrics.roc_auc_score(labels, predicts, average='weighted')
    return score

### Pearson R

In [None]:
from scipy import stats

In [None]:
def getR(model):
    labels, predicts = get_list_cat(model)
    corr, _ = stats.pearsonr(labels, predicts)
    return corr

### Plot Train Verse Test Accuracy

In [None]:
 def pltacc(train_acc, test_acc, epoch):
    epochs = [i for i in range(epoch+1)][::5][1:]
    fig = plt.figure()
    plt.plot(epochs, train_acc, 'g', label='Training accuracy')
    plt.plot(epochs, test_acc, 'b', label='Testing accuracy')
    plt.title('Training and Testing accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

### Plot Train Verse Test Loss

In [None]:
def pltloss(train_loss, test_loss, epoch):
    epochs = [i for i in range(epoch)]
    fig = plt.figure()
    plt.plot(epochs, train_loss, 'g', label='Training loss')
    plt.plot(epochs, test_loss, 'b', label='Testing loss')
    plt.title('Training and Testing Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

### Plot Predicated Verse Label

#### Categorical

In [None]:
def plotcomp(model):
    labels, predicts = get_list_cat(model)
    idx_list = [i for i in range(len(labels))]
    idx_sele = random.sample(idx_list, 50)
    fig = plt.figure()
    label_sele, pred_sele = [], []
    for i in idx_sele:
      label_sele.append(labels[i])
      pred_sele.append(predicts[i])
    plt.scatter(pred_sele, label_sele, c='g', marker='x')
    plt.title('Actual Values vs Predicated Values')
    plt.xlabel('Predicated Values')
    plt.ylabel('Actual Values')
    plt.xlim(0, max(pred_sele))
    plt.ylim(0, max(label_sele))
    # plt.legend()
    plt.show()

#### Continuous

In [None]:
def plotcomp(model):
    labels, predicts = get_list_con(model)
    idx_list = [i for i in range(len(labels))]
    idx_sele = random.sample(idx_list, 50)
    fig = plt.figure()
    label_sele, pred_sele = [], []
    for i in idx_sele:
      label_sele.append(labels[i])
      pred_sele.append(predicts[i])
    plt.scatter(pred_sele, label_sele, c='b', marker='+')
    plt.title('Actual Values vs Predicated Values')
    plt.xlabel('Predicated Values')
    plt.ylabel('Actual Values')
    plt.xlim(0, max(pred_sele))
    plt.ylim(0, max(label_sele))
    plt.legend()
    plt.show()

## Models

### BERT (not in use)

In [None]:
# class BertClassifier(nn.Module):
#   def __init__(self, config):
#     super(BertClassifier, self).__init__()
    
#     self.num_labels = config.num_labels
#     # Pre-trained BERT model
#     self.bert = BertModel(config)
#     # Dropout to avoid overfitting
#     self.dropout = nn.Dropout(config.hidden_dropout_prob)
#     # A single layer classifier added on top of BERT to fine tune for regression
#     self.predict = torch.nn.Linear(config.hidden_size, config.output)
#     # Weight initialization 
#     torch.nn.init.xavier_normal_(self.classifier.weight)
  
#   def forward(self, input_ids, token_type_ids=None, attention_mask=None, position_ids=None, head_mask=None):
#     # Forward pass through pre-trained BERT
#     outputs = self.bert(input_ids, position_ids=position_ids, token_type_ids=token_type_ids,
#                           attention_mask=attention_mask, head_mask=head_mask)
#     # Last layer output (Total 12 layers)
#     pooled_output = outputs[-1]
#     pooled_output = self.dropout(pooled_output) 
#     return self.classifier(pooled_output)

### CNN Naive Model

In [None]:
class CNN(nn.Module):
    def __init__(self, input_size, num_classes):
        """
        init convolution and activation layers
        Args:
        x: (Nx1x2004)
        class: 

        """
        super(CNN, self).__init__() 
        
        self.conv1 = torch.nn.Conv1d(input_size[0], 32, 3)
        self.relu = torch.nn.ReLU()
        self.conv2 = torch.nn.Conv1d(32, 64, 3)
        self.pool = torch.nn.MaxPool1d(4)
        self.fc1 = torch.nn.Linear(2304, num_classes)
        self.sig = nn.Sigmoid()

    def forward(self, x):
        """
        forward function describes how input tensor is transformed to output tensor
        Args:
            
        """
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        # print(x.size())
        # assert 0
        x = self.fc1(x)
        x = self.sig(x)

        return x

In [None]:
model = CNN(train_X.shape[1:], classes)
model

In [None]:
opts = {
    'lr': 5e-4,
    'epochs': 50,
    'batch_size': 100,
    'loss_fxn': 'c'
}

In [None]:
test_loss, train_loss = [], []
CNNTrainer = TrainHelper(model = model,
                      train_set = train_dataset,
                      test_set = valid_dataset, opts = opts)

In [None]:
CNNTrainer.train()

#### Check for Output

In [None]:
test_result(model,'test')

Get accuracy:<br>'sub' -> subset of training dataset <br/>'test' -> test dataset

In [None]:
train_acc, test_acc = [], []

In [None]:
for num in range(opts['epochs']//5):
  model.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  model.cuda()
  train_acc.append(test_result(model, 'sub'))
  test_acc.append(test_result(model, 'test'))

In [None]:
for num in range(opts['epochs']//5):
  model.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  model.cuda()
  print(getR(model))

#### Visualization

In [None]:
M = confusion(test_loader, model)
visualize_confusion(M)

In [None]:
pltacc(train_acc, test_acc, opts['epochs'])

In [None]:
pltloss(train_loss, test_loss, opts['epochs'])

### DeepSEA

In [None]:
class DeepSEA(nn.Module):
    def __init__(self, input_size, num_classes):
        super(DeepSEA, self).__init__()
        self.Conv1 = nn.Conv1d(in_channels=input_size[0], out_channels=320, kernel_size=8)
        self.Conv2 = nn.Conv1d(in_channels=320, out_channels=480, kernel_size=8)
        self.Conv3 = nn.Conv1d(in_channels=480, out_channels=960, kernel_size=8)
        self.Maxpool = nn.MaxPool1d(kernel_size=6, stride=6)
        self.Drop1 = nn.Dropout(p=0.2)
        self.Drop2 = nn.Dropout(p=0.5)
        self.Linear1 = nn.Linear(7680, 2000)
        self.Linear2 = nn.Linear(2000, num_classes)
        self.Linear3 = nn.Linear(1000, 2)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        #x = x.unsqueeze(1)
        x = self.Conv1(x)
        x = self.relu(x)
        x = self.Maxpool(x)
        x = self.Drop1(x)
        x = self.Conv2(x)
        x = self.relu(x)
        x = self.Maxpool(x)
        x = self.Drop1(x)
        x = self.Conv3(x)
        x = self.relu(x)
        x = self.Drop2(x)
        # print(x.size())
        x = torch.flatten(x, 1)
        # x = x.view(-1, 5760)
        # print(x.size())
        # assert 0
        x = self.Linear1(x)
        x = self.relu(x)
        x = self.Linear2(x)
        # x = self.relu(x)
        # x = self.Linear3(x)

        return x

In [None]:
opts = {
    'lr': 1e-3,
    'epochs': 1,
    'batch_size': 100,
    'loss_fxn': 'c'
}

In [None]:
deepsea = DeepSEA(train_X.shape[1:], classes)
deepsea

In [None]:
test_loss, train_loss = [], []
deepseaTrainer = TrainHelper(model = deepsea,
                      train_set = train_dataset,
                      test_set = valid_dataset,opts = opts)

In [None]:
deepseaTrainer.train()

#### Check for Output

In [None]:
test_result(deepsea, 'sub')

In [None]:
train_acc, test_acc = [], []

In [None]:
for num in range(opts['epochs']//5):
  deepsea.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  deepsea.cuda()
  train_acc.append(test_result(deepsea, 'sub'))
  test_acc.append(test_result(deepsea, 'test'))

#### Visualization

In [None]:
M = confusion(test_loader, deepsea)
visualize_confusion(M)

In [None]:
pltacc(train_acc, test_acc, opts['epochs'])

In [None]:
pltloss(train_loss, test_loss, opts['epochs'])

In [None]:
for num in range(opts['epochs']//5):
  deepsea.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  deepsea.cuda()
  pred_sele = []
  label_sele = []
  plotcomp(deepsea)

### Basset

In [None]:
class Basset(nn.Module):
    def __init__(self, input_size, num_class):
        super(Basset, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_size[0], out_channels=300, kernel_size=19)
        self.batch1 = nn.BatchNorm1d(num_features=300)
        self.relu = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=300, out_channels=200, kernel_size=11)
        self.batch2 = nn.BatchNorm1d(num_features=200)
        self.pool2 = nn.MaxPool1d(kernel_size=4)
        self.conv3 = nn.Conv1d(in_channels=200, out_channels=200, kernel_size=7)

        self.fc1 = nn.Linear(in_features=2000, out_features=1000)
        self.relu4 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.3)

        self.fc2 = nn.Linear(in_features=1000, out_features=1000)
        self.relu5 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=0.3)

        self.fc3 = nn.Linear(in_features=1000, out_features=num_class)
        self.fc4 = nn.Linear(in_features=164, out_features=2)
        self.sig3 = nn.Sigmoid()

    def forward(self, inputs):
        #output = inputs.unsqueeze(1)
        output = self.conv1(inputs)
        output = self.batch1(output)
        output = self.relu(output)
        output = self.pool1(output)

        output = self.conv2(output)
        output = self.batch2(output)
        output = self.relu(output)
        output = self.pool2(output)


        output = self.conv3(output)
        output = self.batch2(output)
        output = self.relu(output)
        output = self.pool2(output)

        output = torch.flatten(output, 1)
        
        output = self.fc1(output)
        output = self.relu4(output)
        output = self.dropout1(output)

        output = self.fc2(output)
        output = self.relu5(output)
        output = self.dropout2(output)

        output = self.fc3(output)
        output = self.sig3(output)
        # output = self.fc4(output)

        return output


In [None]:
opts = {
    'lr': 1e-4,
    'epochs': 5,
    'batch_size': 100,
    'loss_fxn': 'c'
}

In [None]:
basset = Basset(train_X.shape[1:], 2)
basset

In [None]:
test_loss, train_loss = [], []
bassetTrainer = TrainHelper(model = basset,
                      train_set = train_dataset,
                      test_set = valid_dataset,opts = opts)

In [None]:
bassetTrainer.train()

#### Check for Output

In [None]:
test_result(basset, 'sub')

In [None]:
train_acc, test_acc = [], []

In [None]:
for num in range(opts['epochs']//5):
  basset.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  basset.cuda()
  train_acc.append(test_result(basset, 'sub'))
  test_acc.append(test_result(basset, 'test'))

Pearson R

In [None]:
for num in range(opts['epochs']//5):
  basset.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  basset.cuda()
  print(getR(basset))

AUC

In [None]:
for num in range(opts['epochs']//5):
  basset.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  basset.cuda()
  print(getAUC(basset))

#### Visualization

Matrix

In [None]:
M = confusion(test_loader, basset)
visualize_confusion(M)

Accuracy

In [None]:
pltacc(train_acc, test_acc, opts['epochs'])

Loss

In [None]:
pltloss(train_loss, test_loss, opts['epochs'])

Others

In [None]:
plotcomp(basset)

### VGG

In [None]:
class VGG16(nn.Module):
    def __init__(self, input_size, num_classes):
        super(VGG16, self).__init__()
        self.conv1_1 = nn.Conv1d(input_size[0], 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv1d(64, 64, kernel_size=3, padding=1)

        self.conv2_1 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv1d(128, 128, kernel_size=3, padding=1)

        self.conv3_1 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv1d(256, 256, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv1d(256, 256, kernel_size=3, padding=1)

        self.conv4_1 = nn.Conv1d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv1d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv1d(512, 512, kernel_size=3, padding=1)

        self.conv5_1 = nn.Conv1d(512, 512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv1d(512, 512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv1d(512, 512, kernel_size=3, padding=1)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)
        # max pooling (kernel_size, stride)
        self.pool = nn.MaxPool1d(2, 2)

        # fully conected layers
        self.fc6 = nn.Linear(512*18, 1000)
        self.fc7 = nn.Linear(1000, 100)
        self.fc8 = nn.Linear(100, num_classes)

    def forward(self, x):
        x = self.conv1_1(x)
        x = self.relu(x)
        x = self.conv1_2(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv2_1(x)
        x = self.relu(x)
        x = self.conv2_2(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv3_1(x)
        x = self.relu(x)
        x = self.conv3_2(x)
        x = self.relu(x)
        x = self.conv3_3(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv4_1(x)
        x = self.relu(x)
        x = self.conv4_2(x)
        x = self.relu(x)
        x = self.conv4_3(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.relu(self.conv5_1(x))
        x = self.relu(self.conv5_2(x))
        x = self.relu(self.conv5_3(x))
        x = self.pool(x)
        # print(x.size())
        x = torch.flatten(x, 1)
        # print(x.size())
        # assert 0
        x = self.fc6(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc7(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc8(x)

        return x

In [None]:
opts = {
    'lr': 1e-4,
    'epochs': 5,
    'batch_size': 100,
    'loss_fxn': 'c'
}

In [None]:
vgg = VGG16(train_X.shape[1:], classes)
vgg

In [None]:
test_loss, train_loss = [], []
vggTrainer = TrainHelper(model = vgg,
                      train_set = train_dataset,
                      test_set = valid_dataset,opts = opts)

In [None]:
vggTrainer.train()

#### Check for Output

In [None]:
test_result(vgg, 'sub')

In [None]:
train_acc, test_acc = [], []

In [None]:
for num in range(opts['epochs']//5):
  vgg.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  vgg.cuda()
  train_acc.append(test_result(vgg, 'sub'))
  test_acc.append(test_result(vgg, 'test'))

#### Visualization

In [None]:
M = confusion(test_loader, vgg)
visualize_confusion(M)

In [None]:
pltacc(train_acc, test_acc, opts['epochs'])

In [None]:
pltloss(train_loss, test_loss, opts['epochs'])

### Linear Perceptron

In [None]:
# Define Single Layer Perceptron network
class SLP(nn.Module):
    def __init__(self, in_features, classes):
        super(SLP, self).__init__()
        # model variables
        self.layer1 = nn.Linear(in_features,classes)
        
    def forward(self, x):
        # linear operation
        x = torch.flatten(x, 1)
        y_pred = self.layer1(x)
        return y_pred

In [None]:
slp = SLP((train_X.shape[1:][0]*train_X.shape[1:][1], classes))
slp

In [None]:
opts = {
    'lr': 1e-4,
    'epochs': 5,
    'batch_size': 100,
    'loss_fxn': 'c'
}
test_loss, train_loss = [], []
SLPTrainer = TrainHelper(model = slp,
                      train_set = train_dataset,
                      test_set = valid_dataset,opts = opts)

In [None]:
SLPTrainer.train()

In [None]:
test_result(slp, 'test')

In [None]:
M = confusion(test_loader, slp)
visualize_confusion(M)

### LeNet

In [None]:
class LeNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(LeNet, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=input_size[0], out_channels=6, kernel_size=5) 
        self.relu = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool2 = nn.MaxPool1d(kernel_size=4)
        self.conv3 = nn.Conv1d(in_channels=16, out_channels=120, kernel_size=5)

        self.fc1 = nn.Linear(in_features=8280, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)

        x = self.relu(self.conv2(x))
        x = self.pool2(x)

        x = self.relu(self.conv3(x))
        x = torch.flatten(x, 1)
        # print(x.size())
        # assert 0
        x = self.fc1(x)
        x = self.fc2(x)

        return x

In [None]:
lenet = LeNet(train_X.shape[1:], classes)
lenet

In [None]:
opts = {
    'lr': 1e-3,
    'epochs': 25,
    'batch_size': 100,
    'loss_fxn': 'c'
}
test_loss, train_loss = [], []
LeNetTrainer = TrainHelper(model = lenet,
                      train_set = train_dataset,
                      test_set = valid_dataset, opts = opts)

In [None]:
LeNetTrainer.train()

#### Check for Output

In [None]:
train_acc, test_acc = [], []

In [None]:
for num in range(opts['epochs']//5):
  lenet.load_state_dict(torch.load('model/model'+save_model_time+'/net_params_'+str(num)+'.pkl'))
  lenet.cuda()
  train_acc.append(test_result(lenet, 'sub'))
  test_acc.append(test_result(lenet, 'test'))

#### Visualization

In [None]:
pltacc(train_acc, test_acc, opts['epochs'])

In [None]:
pltloss(train_loss, test_loss, opts['epochs'])

In [None]:
M = confusion(test_loader, lenet)
visualize_confusion(M)