In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import math as m
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_dir = "/disk1/pore_data/NanoporeTERs/"

In [3]:
'''
Loading Data
'''
Y_file = np.load(os.path.join(data_dir, 'Y00_Y08_LBset&set1set2set3_MinIONNoise_LBNoise_EcoliNoise_classes_05142019.npy'))
X_file = np.load(os.path.join(data_dir, 'Y00_Y08_LBset&set1set2set3_MinIONNoise_LBNoise_EcoliNoise_raw20000_05142019.npy'))
print('Data loaded successfully')

Y_file = Y_file.flatten()

window_length = 19881
X_file = X_file[:, :window_length]

Data loaded successfully


In [4]:
'''
Train, test split
'''
X_train = X_file.reshape(
    len(X_file), X_file.shape[1], 1)
labels_train = Y_file

X_tr, X_vld, lab_tr, lab_vld = train_test_split(
    X_train, labels_train, stratify=labels_train, train_size=0.8)

X_vld, X_test, lab_vld, lab_test = train_test_split(
    X_vld, lab_vld, stratify=lab_vld)

y_tr = lab_tr.astype(int)
y_vld = lab_vld.astype(int)
y_test = lab_test.astype(int)

print('Data split done')

Data split done


In [5]:
'''
If gpu is available we will use it
'''
use_cuda = True

In [6]:
'''
Reshaping data
'''
reshape = 141

X_tr = X_tr.reshape(len(X_tr), 1, reshape, reshape)
X_vld = X_vld.reshape(len(X_vld), 1, reshape, reshape)
X_test = X_test.reshape(len(X_test), 1, reshape, reshape)
print('Data reshaping done')

Data reshaping done


In [7]:
'''
Zipping data together and storing in trainloader objects
'''
train_set = list(zip(X_tr, y_tr))
val_set = list(zip(X_vld, y_vld))
test_set = list(zip(X_test, y_test))
print('Done zipping and converting')

Done zipping and converting


In [8]:
'''
Creating the neural net
'''


class CNN(nn.Module):

    def __init__(self):
        self.O_1 = 17
        self.O_2 = 18
        self.O_3 = 32
        self.O_4 = 37

        self.K_1 = 3
        self.K_2 = 1
        self.K_3 = 4
        self.K_4 = 2

        self.KP_1 = 4
        self.KP_2 = 4
        self.KP_3 = 1
        self.KP_4 = 1

        self.conv_linear_out = int(m.floor((m.floor((m.floor((m.floor((m.floor((reshape - self.K_1 + 1)/self.KP_1) -
                                                                       self.K_2 + 1)/self.KP_2) - self.K_3 + 1)/self.KP_3) - self.K_4 + 1)/self.KP_4)**2)*self.O_4))

        self.FN_1 = 148

        super(CNN, self).__init__()

        self.conv1 = nn.Sequential(nn.Conv2d(1, self.O_1, self.K_1), nn.ReLU(),
                                   nn.MaxPool2d(self.KP_1))
        self.conv2 = nn.Sequential(nn.Conv2d(self.O_1, self.O_2, self.K_2), nn.ReLU(),
                                   nn.MaxPool2d(self.KP_2))
        self.conv3 = nn.Sequential(nn.Conv2d(self.O_2, self.O_3, self.K_3), nn.ReLU(),
                                   nn.MaxPool2d(self.KP_3))
        self.conv4 = nn.Sequential(nn.Conv2d(self.O_3, self.O_4, self.K_4), nn.ReLU(),
                                   nn.MaxPool2d(self.KP_4))
        self.fc1 = nn.Linear(self.conv_linear_out, self.FN_1, nn.Dropout(0.2))
        self.fc2 = nn.Linear(self.FN_1, 10)

    def forward(self, x):
        x = x.float()
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = F.leaky_relu(self.conv3(x))
        x = F.leaky_relu(self.conv4(x))
        x = x.view(len(x), -1)
        x = F.logsigmoid(self.fc1(x))
        x = self.fc2(x)
        return x

In [9]:
'''
Train
'''

best_accuracy = -float('Inf')
best_params = []

batch_size = 30

trainloader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True, num_workers=2)
vldloader = torch.utils.data.DataLoader(
    val_set, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    test_set, batch_size=batch_size, shuffle=True, num_workers=2)

lr = 0.001
epochs = 250
momentum = 0.7557312793639288

net = CNN()

if use_cuda and torch.cuda.is_available():
    net.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)

for epoch in range(250):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        if use_cuda and torch.cuda.is_available():
            inputs = inputs.cuda()
            labels = labels.cuda()

        optimizer.zero_grad()
        outputs = net(inputs)
        outputs = outputs.to(dtype=torch.float64)
        labels = labels.to(dtype=torch.long)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print('Finished epoch number ' + str(epoch))

    correct = 0
    total = 0
    with torch.no_grad():
        for data in vldloader:
            inputs, labels = data
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += len(labels)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the validation set: %d %%'
          % (100 * correct / total))

Finished epoch number 0
Accuracy of the network on the validation set: 10 %
Finished epoch number 1
Accuracy of the network on the validation set: 10 %
Finished epoch number 2
Accuracy of the network on the validation set: 10 %
Finished epoch number 3
Accuracy of the network on the validation set: 10 %
Finished epoch number 4
Accuracy of the network on the validation set: 28 %
Finished epoch number 5
Accuracy of the network on the validation set: 41 %
Finished epoch number 6
Accuracy of the network on the validation set: 53 %
Finished epoch number 7
Accuracy of the network on the validation set: 60 %
Finished epoch number 8
Accuracy of the network on the validation set: 63 %
Finished epoch number 9
Accuracy of the network on the validation set: 65 %
Finished epoch number 10
Accuracy of the network on the validation set: 66 %
Finished epoch number 11
Accuracy of the network on the validation set: 67 %
Finished epoch number 12
Accuracy of the network on the validation set: 68 %
Finished 

Finished epoch number 107
Accuracy of the network on the validation set: 80 %
Finished epoch number 108
Accuracy of the network on the validation set: 80 %
Finished epoch number 109
Accuracy of the network on the validation set: 80 %
Finished epoch number 110
Accuracy of the network on the validation set: 80 %
Finished epoch number 111
Accuracy of the network on the validation set: 80 %
Finished epoch number 112
Accuracy of the network on the validation set: 80 %
Finished epoch number 113
Accuracy of the network on the validation set: 81 %
Finished epoch number 114
Accuracy of the network on the validation set: 80 %
Finished epoch number 115
Accuracy of the network on the validation set: 81 %
Finished epoch number 116
Accuracy of the network on the validation set: 80 %
Finished epoch number 117
Accuracy of the network on the validation set: 80 %
Finished epoch number 118
Accuracy of the network on the validation set: 80 %
Finished epoch number 119
Accuracy of the network on the validat

Accuracy of the network on the validation set: 81 %
Finished epoch number 216
Accuracy of the network on the validation set: 81 %
Finished epoch number 217
Accuracy of the network on the validation set: 81 %
Finished epoch number 218
Accuracy of the network on the validation set: 80 %
Finished epoch number 219
Accuracy of the network on the validation set: 81 %
Finished epoch number 220
Accuracy of the network on the validation set: 80 %
Finished epoch number 221
Accuracy of the network on the validation set: 81 %
Finished epoch number 222
Accuracy of the network on the validation set: 80 %
Finished epoch number 223
Accuracy of the network on the validation set: 80 %
Finished epoch number 224
Accuracy of the network on the validation set: 81 %
Finished epoch number 225
Accuracy of the network on the validation set: 79 %
Finished epoch number 226
Accuracy of the network on the validation set: 80 %
Finished epoch number 227
Accuracy of the network on the validation set: 81 %
Finished epo

In [10]:
'''
Test
'''
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += len(labels)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the test set: %d %%'
          % (100 * correct / total))

text_file = open(os.path.join(data_dir, "NTERs_trained_cnn_results_05152019.txt"), "w")
text_file.write("Accuracy of the network on the test set: %d %%" %
                (100 * correct / total))
text_file.close()

Accuracy of the network on the test set: 80 %


In [11]:
	
'''
Saving the trained net
'''
	
#torch.save(net, '/disk1/pore_data/jeff_saved/NTERs_trained_cnn_05152019.pt')

'\nSaving the trained net\n'