In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import math as m
import warnings
warnings.filterwarnings('ignore')

In [None]:
data_dir = "" # directory containing training data

In [2]:
'''
Loading Data
'''
Y_file = np.load(os.path.join(data_dir, 'final_10_orthogonal_barcodes_classes.npy'))
X_file = np.load(os.path.join(data_dir, 'final_10_orthogonal_barcodes_raw_windows.npy'))
print('Data loaded successfully')

Y_file = Y_file.flatten()

window_length = 19881 
X_file = X_file[:,:window_length]

Data loaded successfully


In [None]:
'''
Train, test split
'''
X_train = X_file.reshape(
        len(X_file), X_file.shape[1], 1)
labels_train = Y_file

X_tr, X_vld, lab_tr, lab_vld = train_test_split(
        X_train, labels_train, stratify = labels_train, train_size = 0.8)

X_vld, X_test, lab_vld, lab_test = train_test_split(
    X_vld, lab_vld, stratify = lab_vld)

y_tr = lab_tr.astype(int)
y_vld = lab_vld.astype(int)
y_test = lab_test.astype(int)

print('Data split done')

In [None]:
'''
If gpu is available we will use it
'''
use_cuda = True

In [None]:
'''
Reshaping data
'''
reshape = 141

X_tr = X_tr.reshape(len(X_tr),1,reshape,reshape)
X_vld = X_vld.reshape(len(X_vld),1,reshape,reshape)
X_test = X_test.reshape(len(X_test),1,reshape,reshape)
print('Data reshaping done')

In [3]:
'''
Zipping data together and storing in trainloader objects
'''
train_set = list(zip(X_tr, y_tr))
val_set = list(zip(X_vld, y_vld))
test_set = list(zip(X_test, y_test))								  
print('Done zipping and converting')

Data split done
Data reshaping done
Done zipping and converting


In [4]:
'''
Creating the neural net
'''
best_accuracy = -float('Inf')
best_params = []

batch_size = 30

trainloader = torch.utils.data.DataLoader(
		train_set, batch_size=batch_size,shuffle=True, num_workers=2)
vldloader = torch.utils.data.DataLoader(
		val_set, batch_size=batch_size,shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
		test_set, batch_size=batch_size,shuffle=True, num_workers=2)

lr = 0.001
epochs = 250
momentum = 0.7557312793639288
		
O_1 = 17
O_2 = 18
O_3 = 32
O_4 = 37

K_1 = 3
K_2 = 1
K_3 = 4
K_4 = 2

KP_1 = 4
KP_2 = 4
KP_3 = 1
KP_4 = 1

conv_linear_out = int(m.floor((m.floor((m.floor((m.floor((m.floor((reshape - K_1 + 1)/KP_1) - 
	K_2 + 1)/KP_2) - K_3 + 1)/KP_3) - K_4 + 1)/KP_4)**2)*O_4))
	
FN_1 = 148

class CNN(nn.Module):

    def __init__(self):

        super(CNN, self).__init__()

        self.conv1 = nn.Sequential(nn.Conv2d(1,O_1,K_1),nn.ReLU(), 
                                   nn.MaxPool2d(KP_1))

        self.conv2 = nn.Sequential(nn.Conv2d(O_1,O_2,K_2),nn.ReLU(),
                                   nn.MaxPool2d(KP_2))

        self.conv3 = nn.Sequential(nn.Conv2d(O_2,O_3,K_3),nn.ReLU(),
                                   nn.MaxPool2d(KP_3))

        self.conv4 = nn.Sequential(nn.Conv2d(O_3,O_4,K_4),nn.ReLU(),
                                   nn.MaxPool2d(KP_4))

        self.fc1 = nn.Linear(conv_linear_out, FN_1, nn.Dropout(0.2))


        self.fc2 = nn.Linear(FN_1, 11)


    def forward(self, x):
        x = x.float()
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = F.leaky_relu(self.conv3(x))
        x = F.leaky_relu(self.conv4(x))
        x = x.view(len(x), -1)
        x = F.logsigmoid(self.fc1(x))
        x = self.fc2(x)
        return x

net = CNN()
if use_cuda and torch.cuda.is_available():
	net.cuda()

In [5]:
'''
Train
'''
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)

for epoch in range(250):

	running_loss = 0.0
	for i,data in enumerate(trainloader, 0):
		inputs,labels = data
		if use_cuda and torch.cuda.is_available():
			inputs = inputs.cuda()
			labels = labels.cuda()

		optimizer.zero_grad()
		outputs = net(inputs)
		outputs = outputs.to(dtype = torch.float64)
		labels = labels.to(dtype = torch.long)
		loss = criterion(outputs, labels)
		loss.backward()
		optimizer.step()
		
	print('Finished epoch number ' + str(epoch))

	correct = 0
	total = 0
	with torch.no_grad():
		for data in vldloader:
			inputs, labels = data
			inputs = inputs.cuda()
			labels = labels.cuda()
			outputs = net(inputs)
			_, predicted = torch.max(outputs.data, 1)
			total += len(labels)
			correct += (predicted == labels).sum().item()

	print('Accuracy of the network on the validation set: %d %%' 
	% (100 * correct / total))

Finished epoch number 0
Accuracy of the network on the validation set: 56 %
Finished epoch number 1
Accuracy of the network on the validation set: 58 %
Finished epoch number 2
Accuracy of the network on the validation set: 57 %
Finished epoch number 3
Accuracy of the network on the validation set: 62 %
Finished epoch number 4
Accuracy of the network on the validation set: 61 %
Finished epoch number 5
Accuracy of the network on the validation set: 61 %
Finished epoch number 6
Accuracy of the network on the validation set: 63 %
Finished epoch number 7
Accuracy of the network on the validation set: 61 %
Finished epoch number 8
Accuracy of the network on the validation set: 61 %
Finished epoch number 9
Accuracy of the network on the validation set: 62 %
Finished epoch number 10
Accuracy of the network on the validation set: 63 %
Finished epoch number 11
Accuracy of the network on the validation set: 65 %
Finished epoch number 12
Accuracy of the network on the validation set: 66 %
Finished 

Finished epoch number 107
Accuracy of the network on the validation set: 78 %
Finished epoch number 108
Accuracy of the network on the validation set: 77 %
Finished epoch number 109
Accuracy of the network on the validation set: 77 %
Finished epoch number 110
Accuracy of the network on the validation set: 78 %
Finished epoch number 111
Accuracy of the network on the validation set: 78 %
Finished epoch number 112
Accuracy of the network on the validation set: 79 %
Finished epoch number 113
Accuracy of the network on the validation set: 77 %
Finished epoch number 114
Accuracy of the network on the validation set: 79 %
Finished epoch number 115
Accuracy of the network on the validation set: 79 %
Finished epoch number 116
Accuracy of the network on the validation set: 78 %
Finished epoch number 117
Accuracy of the network on the validation set: 79 %
Finished epoch number 118
Accuracy of the network on the validation set: 79 %
Finished epoch number 119
Accuracy of the network on the validat

Accuracy of the network on the validation set: 81 %
Finished epoch number 213
Accuracy of the network on the validation set: 80 %
Finished epoch number 214
Accuracy of the network on the validation set: 81 %
Finished epoch number 215
Accuracy of the network on the validation set: 80 %
Finished epoch number 216
Accuracy of the network on the validation set: 80 %
Finished epoch number 217
Accuracy of the network on the validation set: 77 %
Finished epoch number 218
Accuracy of the network on the validation set: 80 %
Finished epoch number 219
Accuracy of the network on the validation set: 81 %
Finished epoch number 220
Accuracy of the network on the validation set: 81 %
Finished epoch number 221
Accuracy of the network on the validation set: 81 %
Finished epoch number 222
Accuracy of the network on the validation set: 81 %
Finished epoch number 223
Accuracy of the network on the validation set: 81 %
Finished epoch number 224
Accuracy of the network on the validation set: 81 %
Finished epo

In [6]:
'''
Test
'''
correct = 0
total = 0
all_true = []
all_pred = []
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += len(labels)
        correct += (predicted == labels).sum().item()
        all_true.extend(labels)
        all_pred.extend(predicted)

    print('Accuracy of the network on the test set: %d %%' 
    % (100 * correct / total))

text_file = open("final_10_orthogonal_barcodes_cnn_results_20210330.txt", "w")
text_file.write("Accuracy of the network on the test set: %d %%" % (100 * correct / total))
text_file.close()

all_true = [x.item() for x in all_true]
all_pred = [x.item() for x in all_pred]

Accuracy of the network on the test set: 82 %


In [10]:
'''
Saving the trained net
'''
torch.save(net.state_dict(), "/disk1/pore_data/karen_data/final_10_orthogonal_barcodes_trained_cnn_20210330.pt")