# Jupyter Notebook Version of DL

The project is first developed in Jupyter Notebook for easy testing/verification but could be moved to a formal Python Script in the future (if I have time). Contrary to the Xtract-Sampler we won't be implementing any byte extraction but rather right now assume we have the data.
***
Training and Developing a model

### Import Statements

In [53]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [90]:
import pickle
import numpy as np
import torch, os
from torch import nn
from torch.utils.data import DataLoader
from ByteVectorDataset import ByteVectorDataset
from model import SimpleCNN
from time import time

%autoreload 2

In [2]:
BYTE_BLOCK_SIZE = 512

### Loading Files/Data Processing

Mostly for testing purposes

In [3]:
print("loading files now...")

with open('CDIACFileData/ByteVectors/byte_vector_dict_512B_one_gram.pkl', "rb") as fp1:
    one_gram = pickle.load(fp1)
with open('CDIACFileData/ByteVectors/byte_vector_dict_512B_two_gram.pkl', "rb") as fp2:
    two_gram = pickle.load(fp2)

print("loading files done!")

loading files now...
loading files done!


In [4]:
label_path = "CDIACFileData/labels/cdiac_naivetruth_processed.csv"
dataset_one_gram = ByteVectorDataset(label_path, one_gram)
dataset_two_gram = ByteVectorDataset(label_path, two_gram)

In [5]:
reccomended_num_workers = 4 * torch.cuda.device_count()
# ^ from https://discuss.pytorch.org/t/guidelines-for-assigning-num-workers-to-dataloader/813/3

In [6]:
dataloader_one_gram = DataLoader(dataset_one_gram, batch_size=1,
                        shuffle=True, num_workers=reccomended_num_workers)
dataloader_two_gram = DataLoader(dataset_two_gram, batch_size=1,
                        shuffle=True, num_workers=reccomended_num_workers)

In [7]:
for i_batch, sample_batched in enumerate(dataloader_one_gram):
    print(i_batch, sample_batched)
    # observe 3rd batch and stop.
    if i_batch == 2:
        break

0 [tensor([[67, 82, 85, 73, 83, 69, 44, 68, 65, 84, 69, 44, 84, 73, 77, 69, 44, 76,
         65, 84, 73, 84, 85, 68, 69, 44, 76, 79, 78, 71, 73, 84, 85, 68, 69, 44,
         65, 73, 82, 95, 84, 77, 80, 44, 72, 85, 77, 73, 68, 73, 84, 89, 44, 83,
         79, 76, 65, 82, 95, 82, 65, 68, 44, 82, 69, 76, 95, 87, 73, 78, 68, 95,
         83, 80, 69, 69, 68, 44, 83, 72, 73, 80, 95, 83, 80, 69, 69, 68, 44, 65,
         66, 83, 79, 76, 85, 84, 69, 95, 87, 73, 78, 68, 95, 83, 80, 69, 69, 68,
         44, 65, 84, 77, 95, 80, 82, 69, 44, 88, 67, 79, 50, 95, 65, 73, 82, 44,
         83, 68, 44, 88, 67, 79, 50, 95, 65, 73, 82, 95, 70, 76, 65, 71, 44, 70,
         67, 79, 50, 95, 65, 73, 82, 44, 83, 68, 44, 65, 73, 82, 95, 70, 67, 79,
         50, 95, 70, 76, 65, 71, 44, 80, 67, 79, 50, 95, 65, 73, 82, 44, 83, 68,
         44, 80, 67, 79, 50, 95, 65, 73, 82, 95, 70, 76, 65, 71, 44, 69, 81, 95,
         84, 77, 80, 44, 83, 83, 84, 44, 83, 68, 44, 88, 67, 79, 50, 95, 69, 81,
         44, 83, 68, 44, 

From here on out we will just be using one grams but the two grams process is identical

In [8]:
TRAIN_TEST_SPLIT = .8 # 80% for training 20% for testing
train_set_size = int(TRAIN_TEST_SPLIT * len(dataset_one_gram))
val_set_size = len(dataset_one_gram) - train_set_size
train_set, val_set = torch.utils.data.random_split(dataset_one_gram, [train_set_size, val_set_size])
print(len(train_set))
print(len(val_set))

11474
2869


In [9]:
train_loader = DataLoader(train_set, batch_size=32,
                        shuffle=True, num_workers=reccomended_num_workers)
val_loader = DataLoader(val_set, batch_size=32,
                        shuffle=True, num_workers=reccomended_num_workers)

In [10]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(type(images))
print(images.shape)
print(labels)

<class 'torch.Tensor'>
torch.Size([32, 512])
tensor([5, 1, 0, 2, 2, 1, 1, 2, 2, 2, 2, 3, 4, 2, 3, 2, 3, 3, 2, 3, 1, 2, 4, 1,
        1, 2, 2, 0, 2, 1, 2, 2])


### Training

In [99]:
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device(1))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(1))

True
0
<torch.cuda.device object at 0x7f4ec87aaa00>
4
Tesla V100-SXM2-32GB


In [96]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
torch.rand(1).cuda()
print(torch.version.cuda)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [86]:
%autoreload 2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN(BYTE_BLOCK_SIZE)
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)
model.to(device)

loss_function = nn.NLLLoss() # This is a convex loss function
optimizer = torch.optim.Adam(model.parameters(), lr=.03) # From my CS361 class SGD is shown to do well on convex functions

Let's use 4 GPUs!


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [80]:
%autoreload 2
time0 = time()
epochs = 10
for epoch in range(epochs):
    running_loss = 0
    for byte_vector, labels in train_loader:
        #print(byte_vector.shape)
        
        labels = labels.to(device)
        
        #Training Pass
        optimizer.zero_grad()
       
        output = model(byte_vector).to(device)
        #print(output.shape)
        loss = loss_function(output, labels)
        
        #backpropagation
        loss.backward()
        
        #optimization
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(epoch, running_loss/len(train_loader)))


print("\nTraining Time (in minutes) = ", (time()-time0)/60)

Epoch 0 - Training loss: -7.695251252631387e+16
Epoch 1 - Training loss: -8.12736058212467e+16
Epoch 2 - Training loss: -8.574409860843882e+16
Epoch 3 - Training loss: -9.036789000037315e+16
Epoch 4 - Training loss: -9.512069710138062e+16
Epoch 5 - Training loss: -1.0003913434016786e+17
Epoch 6 - Training loss: -1.0515376202414504e+17
Epoch 7 - Training loss: -1.1038894388896542e+17
Epoch 8 - Training loss: -1.1582011860655275e+17
Epoch 9 - Training loss: -1.2135831933902405e+17

Training Time (in minutes) =  0.3542299469312032


### Testing

In [None]:
correct_count, all_count = 0
for byte_vector, labels in val_loader:
    for i in range(len(labels)):
        byte_vector.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            output = model(byte_vector)

        probabilities = torch.exp(output)
        probab = list(probabilities.cpu().numpy()[0])
        pred_label = probab.index(max(probab))
        true_label = labels.cpu.numpy()[i]
        if true_label == pred_label:
            correct_count += 1
        all_count += 1

print("Number of Images Tested =", all_count)
print("\n Model Accuracy =", (correct_count/all_count))
