In [1]:
import torch
import pandas as pd
from torch.utils.data import Dataset, TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
import random
import os
import csv
import numpy as np
from tqdm import tqdm
import dask.dataframe as dd

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [24]:
work_dir = "D:\\UIUC\\Fall 2021\\Research"
os.chdir(work_dir)

# Reads labels into dict of (filename: value)
training_data_labels = {}
with open("training_classifier.txt") as f:
    for line in f:
        key, val = line.split()
        training_data_labels[key] = int(val)
        
test_data_labels = {}
with open("test_classifier.txt") as f:
    for line in f:
        key, val = line.split()
        test_data_labels[key] = int(val)

In [None]:
# intersect = 0
# for i in training_data_labels:
#     if i in test_data_labels:
#         intersect += 1
        
# print(len(training_data_labels), len(test_data_labels))

In [33]:
# Change directory to data
os.chdir("D:\\UIUC\\Fall 2021\\Research\\rf_without_tgc")

# Iterate thru data to create lists of tensors
training_data = []
training_labels = []
for file in tqdm(os.listdir()):
    if file not in training_data_labels:
        continue
        
    file_data = pd.read_csv(file, header=None).T
    
    x_tensor = file_data.to_numpy().astype(np.float32)
    y_tensor = int(training_data_labels[file])
    
    for x in x_tensor:
        training_data.append(x)
        training_labels.append(y_tensor)
    
test_data = []
test_labels = []
for file in tqdm(os.listdir()):
    if file not in test_data_labels:
        continue
        
    file_data = pd.read_csv(file, header=None).T
    
    x_tensor = file_data.to_numpy().astype(np.float32)
    y_tensor = int(test_data_labels[file])

    for x in x_tensor:
        test_data.append(x)
        test_labels.append(y_tensor)

100%|██████████████████████████████████████████████████████████████████████████████| 2040/2040 [01:27<00:00, 23.33it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2040/2040 [01:25<00:00, 23.95it/s]


In [34]:
# # Create class for DataLoader compatability
# class Data():
#     def __init__(self, x, y):
#         self.x = x
#         self.y = y
    
#     def __len__(self):
#         return len(self.x)

#     def __getitem__(self, idx):
#         X = self.x[idx]
#         y =  self.y[idx]

#         return X, y

training_data = np.array(training_data)
training_labels = np.array(training_labels)

test_data = np.array(test_data)
test_labels = np.array(test_labels)

print(training_data.shape)
print(test_data.shape)
    
# training_data = np.swapaxes(training_data, 1, 2)
# test_data = np.swapaxes(test_data, 1, 2)

# Create data tensors
training_data = torch.Tensor(training_data)
training_labels = torch.Tensor(training_labels)

test_data = torch.Tensor(test_data)
test_labels = torch.Tensor(test_labels)

# training_data = (training_data - torch.mean(training_data)) / torch.std(training_data)
# test_data = (test_data - torch.mean(test_data)) / torch.std(test_data)

train_dataset = TensorDataset(training_data, training_labels)
test_dataset = TensorDataset(test_data, test_labels)

# # Load tensors into class for torch DataLoaders
# train_data = Data(training_data, training_labels)
# test_data = Data(test_data, test_labels)

(261120, 1024)
(261120, 1024)


In [None]:
# # https://shashikachamod4u.medium.com/excel-csv-to-pytorch-dataset-def496b6bcc1
# class FeatureDataset(Dataset):
#     def __init__(self, file_name):
#         x = pd.read_csv(file_name).iloc[1:700]
#         y = [random.randint(0, 1) for i in range(x.shape[0])]

#         sc = StandardScaler()
#         x_train = sc.fit_transform(x)
#         y_train = y
        
#         self.x_train = torch.tensor(x_train, dtype=torch.float32)
#         self.y_train = torch.tensor(y_train)
        
#     def __len__(self):
#         return len(self.y_train)
    
#     def __getitem__(self, idx):
#         return self.x_train[idx], self.y_train[idx]

In [None]:
# # https://shashikachamod4u.medium.com/excel-csv-to-pytorch-dataset-def496b6bcc1
# class TestDataset(Dataset):
#     def __init__(self, file_name):
#         x = pd.read_csv(file_name).iloc[700:]
#         y = [random.randint(0, 1) for i in range(x.shape[0])]

#         sc = StandardScaler()
#         x_train = sc.fit_transform(x)
#         y_train = y
        
#         self.x_train = torch.tensor(x_train, dtype=torch.float32)
#         self.y_train = torch.tensor(y_train)
        
#     def __len__(self):
#         return len(self.y_train)
    
#     def __getitem__(self, idx):
#         return self.x_train[idx], self.y_train[idx]

In [35]:
loader_params = {
    "batch_size":  32, 
    "shuffle":     True,
    "num_workers": 0
}

loader = DataLoader(train_dataset, **loader_params)

In [None]:
# from torch.utils.data import DataLoader

# loaders = {
#     'train': torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True, num_workers=0),
#     'test': torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=True, num_workers=0),
# }

In [36]:
import torch.nn as nn

pool = 4

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 8, 8, 1, padding='same'),
            nn.Tanh(),
            nn.MaxPool1d(pool, stride=pool, padding=0)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv1d(8, 8, 4, 1, padding='same'),
            nn.Tanh(),
            nn.MaxPool1d(pool, stride=pool, padding=0)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv1d(8, 16, 4, 1, padding='same'),
            nn.Tanh(),
            nn.MaxPool1d(pool, stride=pool, padding=0)
        )
        
        self.fc1 = nn.Sequential(
            nn.Linear(256, 32),
            nn.Tanh()
        )
        
        self.fc2 = nn.Linear(32, 2)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

# import torch.nn as nn

# class CNN(nn.Module):
#     def __init__(self):
#         super(CNN, self).__init__()
        
#         self.conv1 = nn.Sequential(
#             nn.Conv2d(1, 8, 16, 1, 'same'),
#             nn.Tanh(),
#             nn.MaxPool2d(4, stride=4, padding=0)
#         )
        
#         self.conv2 = nn.Sequential(
#             nn.Conv2d(8, 8, 8, 1, padding='same'),
#             nn.Tanh(),
#             nn.MaxPool2d(4, stride=4, padding=0)
#         )
        
#         self.conv3 = nn.Sequential(
#             nn.Conv2d(8, 16, 8, 1, padding='same'),
#             nn.Tanh(),
#             nn.MaxPool2d(4, stride=4, padding=0)
#         )
        
#         self.fc1 = nn.Sequential(
#             nn.Flatten(),
#             nn.Linear(960, 32),
#             nn.Tanh()
#         )
        
#         self.fc2 = nn.Linear(32, 2)
    
#     def forward(self, x):
#         x = self.conv1(x)
#         x = self.conv2(x)
#         x = self.conv3(x)
#         x = self.fc1(x)
#         x = self.fc2(x)
#         return x

In [37]:
cnn = CNN()
cnn.to(device)

CNN(
  (conv1): Sequential(
    (0): Conv1d(1, 8, kernel_size=(8,), stride=(1,), padding=same)
    (1): Tanh()
    (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv1d(8, 8, kernel_size=(4,), stride=(1,), padding=same)
    (1): Tanh()
    (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv1d(8, 16, kernel_size=(4,), stride=(1,), padding=same)
    (1): Tanh()
    (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=256, out_features=32, bias=True)
    (1): Tanh()
  )
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [38]:
loss_func = nn.CrossEntropyLoss()

In [39]:
from torch import optim

optimiser = optim.Adagrad(cnn.parameters(), lr=.005)

In [42]:
from torch.autograd import Variable
import time

num_epochs = 50

def train(num_epochs, cnn, loader):
    cnn.train()
    
    accum = 8
    
    total_step = len(loader)

    for epoch in range(num_epochs):        
        for i, (signal, label) in enumerate(loader):
            # Send data to device
            signal, label = signal.to(device), label.to(device)
            signal = torch.unsqueeze(signal, 1)
            
            with torch.set_grad_enabled(True):
                # Pass inputs through net and obtain outputs
                output = cnn(signal)

                # Cast type to float and flatten results
                output = output.to(torch.float)
                output = torch.squeeze(output)
                label = label.to(torch.int64)
                
#                 print(output.shape)
#                 print(label.shape)
            
                # Call loss function on net outputs
                loss = loss_func(output, label)

    #             a = list(cnn.parameters())[0].clone()

                # Computes gradients for weights
                loss.backward()            

                if ((i + 1) % accum == 0) or (i + 1 == len(loader)):
                    # Apply gradients using optimiser policy
                    optimiser.step()

                    # Zero network gradients
                    optimiser.zero_grad()

#                 print(list(cnn.parameters())[0].grad)
    #             b = list(cnn.parameters())[0].clone()

    #             print(torch.equal(a.data, b.data))

            
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))

start = time.time()

train(num_epochs, cnn, loader)

end = time.time()
print(end - start)

Epoch [1/50], Loss: 0.1811
Epoch [2/50], Loss: 0.1530
Epoch [3/50], Loss: 0.0802
Epoch [4/50], Loss: 0.0523
Epoch [5/50], Loss: 0.0968
Epoch [6/50], Loss: 0.2381
Epoch [7/50], Loss: 0.2489
Epoch [8/50], Loss: 0.0804
Epoch [9/50], Loss: 0.0809
Epoch [10/50], Loss: 0.3418
Epoch [11/50], Loss: 0.0791
Epoch [12/50], Loss: 0.1619
Epoch [13/50], Loss: 0.0641
Epoch [14/50], Loss: 0.1042
Epoch [15/50], Loss: 0.1323
Epoch [16/50], Loss: 0.2571
Epoch [17/50], Loss: 0.2621
Epoch [18/50], Loss: 0.0892
Epoch [19/50], Loss: 0.0474
Epoch [20/50], Loss: 0.2253
Epoch [21/50], Loss: 0.0553
Epoch [22/50], Loss: 0.0641
Epoch [23/50], Loss: 0.1858
Epoch [24/50], Loss: 0.0492
Epoch [25/50], Loss: 0.1079
Epoch [26/50], Loss: 0.0704
Epoch [27/50], Loss: 0.0930
Epoch [28/50], Loss: 0.0775
Epoch [29/50], Loss: 0.0927
Epoch [30/50], Loss: 0.2011
Epoch [31/50], Loss: 0.0222
Epoch [32/50], Loss: 0.2666
Epoch [33/50], Loss: 0.0383
Epoch [34/50], Loss: 0.0787
Epoch [35/50], Loss: 0.1095
Epoch [36/50], Loss: 0.0836
E

In [46]:
loader_params = {
    "batch_size":  1, 
    "shuffle":     False,
    "num_workers": 0
}

test_loader = DataLoader(test_dataset, **loader_params)

def test():
    cnn.eval()
    
    correct = 0
    total = 0
    predictions = []
    labels = []
    with torch.no_grad():
        for i, (signal, label) in enumerate(test_loader):
            signal = torch.Tensor(signal).to(device)
            signal = torch.unsqueeze(signal, 1)
            output = cnn(signal).cpu()
            
            labels.append(label.numpy())

            pred_y = torch.max(output, 1)[1].data.squeeze().detach().numpy()
            predictions.append(pred_y)
        
    averaged_predictions = []
    
    for i in predictions:
        if np.mean(i) > .5:
            averaged_predictions.append(1.)
        else:
            averaged_predictions.append(0.)
        
    averaged_labels = []
    for i in labels:
        averaged_labels.append(np.mean(i))

    for i in range(len(averaged_labels)):
        total += 1
        if averaged_labels[i] == averaged_predictions[i]:
            correct += 1
#             for i in range(len(label)):
#                 if pred_y[i] == label[i]:
#                     correct += 1
#                     total += 1
#                 else:
#                     total += 1
            
    print('Test Accuracy of the model on the %i test signals: %.2f' % (total, (correct / total)))
    
test()

Test Accuracy of the model on the 261120 test signals: 0.89
