In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, random_split, DataLoader
import torch.optim as optim

In [32]:
cwt_imgs = np.abs(np.load(r"cwt_data/cwt_dataset.npy")) #becasue output coefficients are complex
cwt_labels = np.abs(np.load(r"cwt_data/cwt_dataset_labels.npy"))

FileNotFoundError: [Errno 2] No such file or directory: 'cwt_data/cwt_dataset.npy'

In [21]:
#custom dataset class to manage all of the cwt images
class CWTDataset(Dataset):
    def __init__(self, imgs, labels): #initialize. pass in the images and label
        self.imgs = imgs
        self.labels = labels
    
    def __len__(self): 
        return len(self.labels)
    
    def __getitem__(self, idx): #return img, label pair with index
        sel_image = self.imgs[idx]
        sel_label = self.labels[idx]
        return sel_image, sel_label
    
cwt_data = CWTDataset(cwt_imgs, cwt_labels)

In [33]:
#split cwt_data (Dataset) into 80 20 train/test split
train_dataset, test_dataset = random_split(cwt_data, [0.8, 0.2])

In [34]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle = False)

In [35]:
#note the shape of the data
#for example CWT, we're looking at 99 scales x 7508 time points per image

class CWTNN(nn.Module):
    def __init__(self): #define model layers
        #first convolve with 6 kernels
        #then apply pooling/dimensionality reduction
        #then concolve with 16 kernels
        #transition to fully connected "linear" layers
        super().__init__()
        self.conv1 = nn.Conv2d(1, 1, kernel_size=3, padding = 1) #one in_channel bc its a grayscale image, one image at a time
        self.pool = nn.MaxPool2d(2, 2) #only need to define pooling once, but will get applied after each convolution
        self.conv2 = nn.Conv2d(1, 1, kernel_size=3, padding = 1)
        self.linear1 = nn.Linear(24 * 748, 120)
        self.linear2 = nn.Linear(120, 60)
        self.linear3 = nn.Linear(60, 2)  #binary classification   
    
    def forward(self, x): #define forward pass i.e. how does the model handle inputs?
        x = x.float() #convert to float first
        #print("before", x.shape)
        x = self.pool(F.relu(self.conv1(x)))
        #print("after first conv, then pool", x.shape)
        x = self.pool(F.relu(self.conv2(x)))
        #print("after second conv, then pool", x.shape)
        x = torch.flatten(x, 1) #flatten to input into linear layer
        #print("after flatten", x.shape)
        x = F.relu(self.linear1(x))
        #print("after linear 1", x.shape)
        x = F.relu(self.linear2(x))
        #print("after linear 2", x.shape)
        x = self.linear3(x) 
        #print("after linear 3", x.shape)
        return x

        
model = CWTNN()


In [36]:
#define additional hyperparameters
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

EPOCHS = 10 #loop 10 times through the entire dataset


In [None]:
for epoch in range(EPOCHS):
    
    total_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        X, y = data #input image and 0/1 classification
        print(X, y.long())
        optimizer.zero_grad() #zero out gradients so weights aren't misadjusted
        
        out = model(X)
        loss_val = loss(out, y.long()) #xent loss calculated with output and y 
        loss_val.backward() #backpropataion
        optimizer.step() #adjust weights based on backpropagation
        
        total_loss += loss_val.item()
        print(f"loss: {loss_val.item()}")
        
print("finished training")

In [28]:
cwt_labels

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])

In [30]:
corr = 0
n = 0

for image, labels in test_loader:
    out_label = model(X)
    _, pred = torch.max(out_label, 1)
    n += labels.size(0) #include all possible oucomes
    corr += (pred == labels).sum().item()
    
print(corr/n)

0.575
