In [None]:
!pip uninstall torch torchvision torchaudio

In [25]:
import torch
import pandas as pd
from torch.utils.data import Dataset, TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
import random
import os
import csv
import numpy as np
from tqdm import tqdm
import dask.dataframe as dd

In [21]:
# Choose device for torch computing
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(torch.__version__)

device

1.10.2+cpu


device(type='cpu')

In [3]:
# Change working directory to labels
work_dir = "D:\\UIUC\\Fall 2021\\Research"
os.chdir(work_dir)

# Reads labels into dict of (filename: ff_value)
training_data_labels = {}
with open("training_ff.txt") as f:
    for line in f:
        key, val = line.split()
        training_data_labels[key] = float(val)
        
test_data_labels = {}
with open("test_ff.txt") as f:
    for line in f:
        key, val = line.split()
        test_data_labels[key] = float(val)

In [4]:
# intersect = 0
# for i in training_data_labels:
#     if i in test_data_labels:
#         intersect += 1
        
# print(len(training_data_labels), len(test_data_labels))

In [6]:
# Change directory to data
os.chdir("D:\\UIUC\\Fall 2021\\Research\\rf_without_tgc")

# Iterate thru data to create lists of tensors
training_data = []
training_labels = []
for file in tqdm(os.listdir()):
    if file not in training_data_labels:
        continue
        
    file_data = pd.read_csv(file, header=None).T
    
    x_tensor = file_data.to_numpy().astype(np.float32)
    y_tensor = float(training_data_labels[file])
    
    for x in x_tensor:
        training_data.append(x)
        training_labels.append(y_tensor)
    
test_data = []
test_labels = []
for file in tqdm(os.listdir()):
    if file not in test_data_labels:
        continue
        
    file_data = pd.read_csv(file, header=None).T
    
    x_tensor = file_data.to_numpy().astype(np.float32)
    y_tensor = float(test_data_labels[file])

    for x in x_tensor:
        test_data.append(x)
        test_labels.append(y_tensor)

100%|██████████████████████████████████████████████████████████████████████████████| 2040/2040 [01:38<00:00, 20.69it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2040/2040 [01:51<00:00, 18.36it/s]


In [7]:
# # Create class for DataLoader compatability
# class Data():
#     def __init__(self, x, y):
#         self.x = x
#         self.y = y
    
#     def __len__(self):
#         return len(self.x)

#     def __getitem__(self, idx):
#         X = self.x[idx]
#         y =  self.y[idx]

#         return X, y

training_data = np.array(training_data)
training_labels = np.array(training_labels)

test_data = np.array(test_data)
test_labels = np.array(test_labels)

print(training_data.shape)
print(test_data.shape)
    
# training_data = np.swapaxes(training_data, 1, 2)
# test_data = np.swapaxes(test_data, 1, 2)

# Create data tensors
training_data = torch.Tensor(training_data)
training_labels = torch.Tensor(training_labels)

test_data = torch.Tensor(test_data)
test_labels = torch.Tensor(test_labels)

# training_data = (training_data - torch.mean(training_data)) / torch.std(training_data)
# test_data = (test_data - torch.mean(test_data)) / torch.std(test_data)

train_dataset = TensorDataset(training_data, training_labels)
test_dataset = TensorDataset(test_data, test_labels)

# # Load tensors into class for torch DataLoaders
# train_data = Data(training_data, training_labels)
# test_data = Data(test_data, test_labels)

(261120, 1024)
(261120, 1024)


In [8]:
# DataLoader Parameters
loader_params = {
    "batch_size":  32, 
    "shuffle":     True,
    "num_workers": 0
}

# Create DataLoader for training data
loader = DataLoader(train_dataset, **loader_params)

In [9]:
import torch.nn as nn

pool = 4

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 8, 8, 1, padding='same'),
            nn.Tanh(),
            nn.MaxPool1d(pool, stride=pool, padding=0)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv1d(8, 8, 4, 1, padding='same'),
            nn.Tanh(),
            nn.MaxPool1d(pool, stride=pool, padding=0)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv1d(8, 16, 4, 1, padding='same'),
            nn.Tanh(),
            nn.MaxPool1d(pool, stride=pool, padding=0)
        )
        
        self.fc1 = nn.Sequential(
            nn.Linear(256, 32),
            nn.Tanh()
        )
        
        self.fc2 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

# import torch.nn as nn

# class CNN(nn.Module):
#     def __init__(self):
#         super(CNN, self).__init__()
        
#         self.conv1 = nn.Sequential(
#             nn.Conv2d(1, 8, 16, 1, 'same'),
#             nn.Tanh(),
#             nn.MaxPool2d(2, stride=2, padding=0)
#         )
        
#         self.conv2 = nn.Sequential(
#             nn.Conv2d(8, 8, 8, 1, padding='same'),
#             nn.Tanh(),
#             nn.MaxPool2d(2, stride=2, padding=0)
#         )
        
#         self.conv3 = nn.Sequential(
#             nn.Conv2d(8, 16, 8, 1, padding='same'),
#             nn.Tanh(),
#             nn.MaxPool2d(2, stride=2, padding=0)
#         )
        
#         self.fc1 = nn.Sequential(
#             nn.Flatten(),
#             nn.Linear(65024, 32),
#             nn.Tanh()
#         )
        
#         self.fc2 = nn.Linear(32, 2)
    
#     def forward(self, x):
#         x = self.conv1(x)
#         x = self.conv2(x)
#         x = self.conv3(x)
#         x = self.fc1(x)
#         x = self.fc2(x)
#         return x

In [10]:
# Initialise net and push to computing device
cnn = CNN()
cnn.to(device)

CNN(
  (conv1): Sequential(
    (0): Conv1d(1, 8, kernel_size=(8,), stride=(1,), padding=same)
    (1): Tanh()
    (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv1d(8, 8, kernel_size=(4,), stride=(1,), padding=same)
    (1): Tanh()
    (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv1d(8, 16, kernel_size=(4,), stride=(1,), padding=same)
    (1): Tanh()
    (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=256, out_features=32, bias=True)
    (1): Tanh()
  )
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)

In [11]:
# Define loss function
loss_func = nn.MSELoss()

In [12]:
from torch import optim

# Define optimiser and learning rate
optimiser = optim.Adagrad(cnn.parameters(), lr=.005)

In [148]:
# # DUMMY DATA
# train_data = []
# train_labels = []

# test_data = []
# test_labels = []


# for i in range(500):
#     f = np.random.uniform(1,10)
#     train_data.append((np.array([np.sin(f * i) for i in np.linspace(0, 20, 1000)]).astype(np.float32)))
#     train_labels.append(float(f))
    
#     f_test = np.random.uniform(1,10)
#     test_data.append((np.array([np.sin(f_test * i) for i in np.linspace(0, 20, 1000)]).astype(np.float32)))
#     test_labels.append(float(f_test))

# train_data = np.array(train_data)
# train_labels = np.array(train_labels)

# test_data = np.array(test_data)
# test_labels = np.array(test_labels)

In [149]:
# # Create data tensors for dummy data
# # train_data_ = torch.stack(train_data)
# # train_labels_ = torch.Tensor(train_labels)

# # test_data_ = torch.stack(test_data)
# # test_labels_ = torch.Tensor(test_labels)

# tensor_train_data = torch.Tensor(train_data)
# tensor_train_labels = torch.Tensor(train_labels)

# tensor_test_data = torch.Tensor(test_data)
# tensor_test_labels = torch.Tensor(test_labels)

# train_dataset = TensorDataset(tensor_train_data, tensor_train_labels)
# test_dataset = TensorDataset(tensor_test_data, tensor_test_labels)


# # train_data = (train_data_ - torch.mean(train_data_)) / torch.std(torch.max(train_data_))
# # test_data = (test_data_ - torch.mean(test_data_)) / torch.std(torch.max(test_data_))


# # # Load tensors into class for torch DataLoaders
# # train_data = Data(train_dataset)
# # test_data = Data(test_dataset)

In [150]:
# # DataLoader Parameters
# loader_params = {
#     "batch_size":  50, 
#     "shuffle":     True,
#     "num_workers": 0
# }

# # Create DataLoader for training data
# loader = DataLoader(train_dataset, **loader_params)

In [13]:
from torch.autograd import Variable
import time

num_epochs = 50

def train(num_epochs, cnn, loader):
    cnn.train()
    
    accum = 8
    
    total_step = len(loader)
    
    for epoch in range(num_epochs):        
        for i, (signal, label) in enumerate(loader):
            # Send data to device
            signal, label = signal.to(device), label.to(device)
            signal = torch.unsqueeze(signal, 1)
            
            with torch.set_grad_enabled(True):
                # Pass inputs through net and obtain outputs
                output = cnn(signal)

                # Cast type to float and flatten results
                output = output.to(torch.float)
                output = torch.squeeze(output)
                label = label.to(torch.float)
                
#                 print(output.shape)
#                 print(label.shape)
            
                # Call loss function on net outputs
                loss = loss_func(output, label)

    #             a = list(cnn.parameters())[0].clone()

                # Computes gradients for weights
                loss.backward()            

                if ((i + 1) % accum == 0) or (i + 1 == len(loader)):
                    # Apply gradients using optimiser policy
                    optimiser.step()

                    # Zero network gradients
                    optimiser.zero_grad()

#                 print(list(cnn.parameters())[0].grad)
    #             b = list(cnn.parameters())[0].clone()

    #             print(torch.equal(a.data, b.data))

            
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))

start = time.time()

train(num_epochs, cnn, loader)

end = time.time()
print(end - start)

  return F.conv1d(input, weight, bias, self.stride,


Epoch [1/50], Loss: 55.6049
Epoch [2/50], Loss: 38.6558
Epoch [3/50], Loss: 20.2157
Epoch [4/50], Loss: 30.7112
Epoch [5/50], Loss: 25.0720
Epoch [6/50], Loss: 13.9100
Epoch [7/50], Loss: 31.9539
Epoch [8/50], Loss: 49.8780
Epoch [9/50], Loss: 36.8339
Epoch [10/50], Loss: 21.5013
Epoch [11/50], Loss: 10.1994
Epoch [12/50], Loss: 18.2315
Epoch [13/50], Loss: 23.0485
Epoch [14/50], Loss: 36.9669
Epoch [15/50], Loss: 12.3179
Epoch [16/50], Loss: 32.2275
Epoch [17/50], Loss: 21.8762
Epoch [18/50], Loss: 34.5440
Epoch [19/50], Loss: 15.9651
Epoch [20/50], Loss: 15.5056
Epoch [21/50], Loss: 23.2938
Epoch [22/50], Loss: 21.0300
Epoch [23/50], Loss: 17.4086
Epoch [24/50], Loss: 20.2770
Epoch [25/50], Loss: 39.4670
Epoch [26/50], Loss: 11.3965
Epoch [27/50], Loss: 14.7045
Epoch [28/50], Loss: 32.8229
Epoch [29/50], Loss: 21.9086
Epoch [30/50], Loss: 17.1291
Epoch [31/50], Loss: 26.0479
Epoch [32/50], Loss: 40.9243
Epoch [33/50], Loss: 35.4108
Epoch [34/50], Loss: 27.9002
Epoch [35/50], Loss: 29

In [254]:
import scipy

# Create test DataLoader
test_loader = DataLoader(test_dataset, **loader_params)

def test():
    cnn.eval()
    
    predictions = np.array([])
    labels = np.array([])
    
    with torch.no_grad():
        for i, (signal, label) in enumerate(test_loader):
            # Send input to device
            signal = torch.Tensor(signal).to(device)
            signal = torch.unsqueeze(signal,1)
            
            # Get output of net, append to lists
            output = cnn(signal).cpu().detach().numpy()
            predictions = np.append(predictions, output)            
            labels = np.append(labels, label)

    p_max = max(predictions)
    p_min = min(predictions)
    scaled_predictions = []
    
    for i in predictions:
        scaled_predictions.append(((i - p_min) / (p_max - p_min)) * (10 - 1) + 1)
            
    print(scaled_predictions, labels)
    
    r = scipy.stats.pearsonr(scaled_predictions, labels)
    
    print('Pearson r of the model is %.2f' % r[0])
    
test()

RuntimeError: Expected 3-dimensional input for 3-dimensional weight[8, 256, 8], but got 4-dimensional input of size [1, 1, 256, 1023] instead

In [14]:
import scipy

# DataLoader Parameters
loader_params = {
    "batch_size":  1, 
    "shuffle":     False,
    "num_workers": 0
}

# Create test DataLoader
test_loader = DataLoader(test_dataset, **loader_params)

def test():
    cnn.eval()
    
    # Initialise arrays and dict
    predictions = np.array([])
    labels = np.array([])
    averaged_dict = {}
    
    with torch.no_grad():
        for i, (signal, label) in tqdm(enumerate(test_loader)):
            # Send input to device
            signal = torch.Tensor(signal).to(device)
            signal = torch.unsqueeze(signal, 1)
             
            # Get output of net, append to lists
            output = torch.squeeze(cnn(signal)).cpu().detach().numpy()
            predictions = np.append(predictions, output)            
            labels = np.append(labels, label)
        
#             print(output, label)
        
        for i in range(len(labels)):
            if labels[i] not in averaged_dict:
                averaged_dict[labels[i]] = [predictions[i]]
            else:
                averaged_dict[labels[i]].append(predictions[i])
            
    for i in averaged_dict:
        averaged_dict[i] = np.mean(averaged_dict[i])
    
    averaged_predictions = []
    ordered_labels = []
        
    for i in averaged_dict:
        ordered_labels.append(i)
        averaged_predictions.append(averaged_dict[i])
    
    print(predictions)
    print(labels)
    print(labels.shape)
    
#     r = scipy.stats.pearsonr(predictions, labels)
    
    r = scipy.stats.pearsonr(averaged_predictions, ordered_labels)
    
    print('Pearson r of the model is %.2f' % r[0])
    
test()

261120it [08:03, 539.88it/s]


[ 8.82579708  8.5583477   7.71505833 ... 16.63234711 13.95572281
 16.84137726]
[ 0.546314    0.546314    0.546314   ... 34.99910736 34.99910736
 34.99910736]
(261120,)
Pearson r of the model is 0.82
