In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import time
import random

2024-10-06 11:05:50.057141: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7ff7ec51ad70>

In [4]:
dataset = np.load('../data/dataset.npz',)
X, y = dataset['X'], dataset['y']
print(X.shape, y.shape)

(9834, 400) (9834,)


In [5]:
# fig, axis = plt.subplots(17, 5, figsize=(12, 48))

# for label in range(17):
#     for i in range(5):
#         axis[label, i].imshow(X[y == label][i+5].reshape(20, 20), vmin=0, vmax=255, cmap='gray')
#         axis[label, i].set_xticks([])
#         axis[label, i].set_yticks([])
#         axis[label, i].set_title(f'Label {label}')

# plt.show()

In [6]:
for i in range(17):
    print(f'Label {i}: {len(X[y == i])}')

max([len(X[y == i]) for i in range(17)]) * 17


Label 0: 627
Label 1: 230
Label 2: 525
Label 3: 950
Label 4: 500
Label 5: 695
Label 6: 912
Label 7: 605
Label 8: 427
Label 9: 205
Label 10: 825
Label 11: 525
Label 12: 950
Label 13: 909
Label 14: 74
Label 15: 450
Label 16: 425


16150

#### SMOTE

In [7]:
# sm = SMOTE(random_state=seed, k_neighbors=2)
# X_res, y_res = sm.fit_resample(X.reshape(X.shape[0], -1), y)
# print(X_res.shape, y_res.shape)

# #fig, axis = plt.subplots(17, 5, figsize=(12, 48))
# for label in range(17):
#     for i in range(5):
#         axis[label, i].imshow(X_res[y_res == label][949-i].reshape(20, 20), vmin=0, vmax=255, cmap='gray')
#         axis[label, i].set_xticks([])
#         axis[label, i].set_yticks([])
#         axis[label, i].set_title(f'Label {label}')

# plt.show()

In [8]:
def generate_label(X, y, label, n):
    """generates n augmented images for a given label"""
    X = X[y == label]
    y = y[y == label]
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest'
    )
    X_reshaped = X.reshape(X.shape[0], 20, 20, 1)

    augmented_data = datagen.flow(X_reshaped, y, batch_size=1, seed=seed)
    X_augs, y_augs = [], []
    for i in range(n):
        X_aug, y_aug = augmented_data.__next__()
        X_aug = X_aug.flatten()
        X_augs.append(X_aug)
        y_augs.append(y_aug)
    
    X_augs = np.array(X_augs)
    y_augs = np.array(y_augs).reshape(-1)

    return np.array(X_augs), np.array(y_augs)

print(X.shape, y.shape)
X_aug, y_aug = generate_label(X, y, 1, 5)
print(X_aug.shape, y_aug.shape)

(9834, 400) (9834,)
(5, 400) (5,)


In [9]:
from preprocessing import generate_balanced_data

X_aug, y_aug = generate_balanced_data(X, y, 42)

print(X_aug.shape, y_aug.shape)
# fig, axis = plt.subplots(17, 5, figsize=(12, 48))

# for label in range(0,17):
#     for i in range(5):
#         axis[label, i].imshow(X_aug[y_aug == label][i].reshape(20, 20), vmin=0, vmax=255, cmap='gray')
#         axis[label, i].set_xticks([])
#         axis[label, i].set_yticks([])
#         axis[label, i].set_title(f'Label {label}')

# plt.show()

(16150, 400) (16150,)


In [10]:
dataset = np.load('../data/corrupt_dataset.npz',)
CX = dataset['X']
print(CX.shape)
# fig, axis = plt.subplots(17, 5, figsize=(12, 48))

# for label in range(0,17):
#     for i in range(5):
#         axis[label, i].imshow(X[i+label+5].reshape(20, 20), vmin=0, vmax=255, cmap='gray')
#         axis[label, i].set_xticks([])
#         axis[label, i].set_yticks([])
#         axis[label, i].set_title(f'Label {label}')

# plt.show()


(935, 400)


In [11]:
class LeNet(nn.Module):
    def __init__(self, numChannels, classes):
        super(LeNet, self).__init__()

        #scales it down to 18x18 x 20
        self.conv1 = nn.Conv2d(
            in_channels=numChannels, 
            out_channels=20,
            kernel_size=(3,3), 
            )
        
        #first relu pass
        self.relu1 = nn.ReLU()
        
        #scales it down to 9x9 x 20
        self.maxpool1 = nn.MaxPool2d(
            kernel_size=(2,2),
            stride=(2,2)
            )

        #scales it down to 7x7 x 50 
        self.conv2 = nn.Conv2d(
            in_channels=20, 
            out_channels=50,
            kernel_size=(3,3), 
            )

        #second relu pass
        self.relu2 = nn.ReLU()

        #scales it down to 3x3 x 50
        self.maxpool2 = nn.MaxPool2d(
            kernel_size=(2,2),
            stride=(2,2),
            )

        #takes the 3x3x50 = 450
        self.fc1 = nn.Linear(
            in_features=450,
            out_features=500,
            )
        
        self.relu3 = nn.ReLU()
        
        self.fc2 = nn.Linear(
            in_features=500,
            out_features=classes,
        )
        self.logsoftmax = nn.LogSoftmax(dim=1)


    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu3(x)

        x = self.fc2(x)
        output = self.logsoftmax(x)
        return output
            

In [12]:
#https://proceedings.neurips.cc/paper_files/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf
class AlexNetEsque(nn.Module):
    def __init__(self, numChannels, classes):
        super(AlexNetEsque, self).__init__()

        # 20 x 20 x 64
        self.conv1 = nn.Conv2d(
            in_channels=numChannels, 
            out_channels=64,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.relu1 = nn.ReLU()

        # 10 x 10 x 64
        self.maxpool1 = nn.MaxPool2d(
            kernel_size=2,
            stride=2
        )

        # 10 x 10 x 128
        self.conv2 = nn.Conv2d(
            in_channels=64, 
            out_channels=128,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.relu2 = nn.ReLU()

        # 5 x 5 x 128
        self.maxpool2 = nn.MaxPool2d(
            kernel_size=2,
            stride=2
        )

        # 5 x 5 x 264
        self.conv3 = nn.Conv2d(
            in_channels=128, 
            out_channels=256,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.relu3 = nn.ReLU()

        # 5 x 5 x 256
        self.conv4 = nn.Conv2d(
            in_channels=256, 
            out_channels=256,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.relu4 = nn.ReLU()

        # 5 x 5 x 128
        self.conv5 = nn.Conv2d(
            in_channels=256, 
            out_channels=128,
            kernel_size=3,
            stride=1,
            padding=1
        )
        
        self.relu5 = nn.ReLU()

        # 2x2 x 128
        self.maxpool3 = nn.MaxPool2d(
            kernel_size=2,
            stride=2
        )

        # 2x2 x 128 = 512 
        self.fc1 = nn.Linear(
            in_features=512,
            out_features=512
        )
        self.relu6 = nn.ReLU()

        # helps with generalization
        # basically means if you run longer,
        # it gets better
        self.dropout1 = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(
            in_features=512,
            out_features=256
        )
        self.relu7 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=0.5)

        self.fc3 = nn.Linear(
            in_features=256,
            out_features=classes
        )

        self.logsoftmax = nn.LogSoftmax(dim=1)

    def _forward_features(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)

        x = self.conv3(x)
        x = self.relu3(x)

        x = self.conv4(x)
        x = self.relu4(x)

        x = self.conv5(x)
        x = self.relu5(x)
        x = self.maxpool3(x)

        return x

    def forward(self, x):
        x = self._forward_features(x)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = self.relu6(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.relu7(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        output = self.logsoftmax(x)
        return output

            

In [13]:
INIT_LR = 0.01
MOMENTUM = 0.9
BATCH_SIZE = 32
EPOCHS = 10

TRAIN_SPLIT = 0.70
TEST_SPLIT = 1 - TRAIN_SPLIT

device = torch.device("cpu")
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.set_num_threads(1)

In [14]:
from preprocessing import generate_balanced_data, scale_data, scale_data10

X = scale_data10(X)

X_train, X_val_test, y_train, y_val_test = train_test_split(
    X, 
    y, 
    test_size=TEST_SPLIT, 
    random_state=seed
    )

X_val, X_test, y_val, y_test = train_test_split(
    X_val_test, 
    y_val_test, 
    test_size=0.5, 
    random_state=seed
    )

X_train, y_train = generate_balanced_data(X_train, y_train, 42)

X_train = X_train.reshape(-1, 1, 20, 20)
X_val = X_val.reshape(-1, 1, 20, 20)
X_test = X_test.reshape(-1, 1, 20, 20)

# this is doen so that the values are on the cpu already
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
test_dataset = torch.utils.data.TensorDataset(X_test, y_test)

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True,
    worker_init_fn = lambda _: np.random.seed(seed)
    )

val_loader = torch.utils.data.DataLoader(
    dataset=val_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False,
    worker_init_fn = lambda _: np.random.seed(seed)
    )

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False,
    worker_init_fn = lambda _: np.random.seed(seed)
    )

model = LeNet(numChannels=1, classes=17).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=INIT_LR, momentum=MOMENTUM)
criterion = nn.NLLLoss()

H = {
    'train_loss': [],
    'val_loss': [],
    'train_acc': [],
    'val_acc': []
}

print("training the network now")

for epoch in range(EPOCHS):
    start = time.time()
    model.train()

    total_train_loss = 0
    total_val_loss = 0

    correct_train = 0
    correct_val = 0

    for i, (X_batch, y_batch) in enumerate(train_loader):
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        correct_train += (y_pred.argmax(1) == y_batch).type(torch.float).sum().item()
        # print(y_pred)
        # print("//")

    with torch.no_grad():
        model.eval()

        for X_val_batch, y_val_batch in val_loader:
            y_val_pred = model(X_val_batch)
            loss = criterion(y_val_pred, y_val_batch)
            total_val_loss += loss.item()
            correct_val += (y_val_pred.argmax(1) == y_val_batch).type(torch.float).sum().item()
         
    avg_train_loss = total_train_loss / len(train_loader.dataset)
    avg_val_loss = total_val_loss / len(val_loader.dataset)
    train_acc = correct_train / len(train_loader.dataset)
    val_acc = correct_val / len(val_loader.dataset)

    H['train_loss'].append(avg_train_loss)
    H['val_loss'].append(avg_val_loss)
    H['train_acc'].append(train_acc)
    H['val_acc'].append(val_acc)

    end = time.time()

    print(f"--- Epoch {epoch+1}/{EPOCHS} - time: {end-start:.2f}s ---")
    print(f"Train loss: {avg_train_loss:.4f}, Train accuracy: {train_acc:.4f}")
    print(f"Val loss: {avg_val_loss:.4f}, Val accuracy: {val_acc:.4f}")
    print()

print("training complete")



training the network now
--- Epoch 1/10 - time: 13.20s ---
Train loss: 0.0519, Train accuracy: 0.4860
Val loss: 0.0177, Val accuracy: 0.8264

--- Epoch 2/10 - time: 6.42s ---
Train loss: 0.0168, Train accuracy: 0.8320
Val loss: 0.0086, Val accuracy: 0.9173

--- Epoch 3/10 - time: 6.94s ---
Train loss: 0.0107, Train accuracy: 0.8921
Val loss: 0.0072, Val accuracy: 0.9268

--- Epoch 4/10 - time: 8.08s ---
Train loss: 0.0079, Train accuracy: 0.9182
Val loss: 0.0100, Val accuracy: 0.8936

--- Epoch 5/10 - time: 12.25s ---
Train loss: 0.0063, Train accuracy: 0.9371
Val loss: 0.0056, Val accuracy: 0.9390

--- Epoch 6/10 - time: 8.49s ---
Train loss: 0.0051, Train accuracy: 0.9460
Val loss: 0.0049, Val accuracy: 0.9451

--- Epoch 7/10 - time: 7.95s ---
Train loss: 0.0043, Train accuracy: 0.9551
Val loss: 0.0049, Val accuracy: 0.9519

--- Epoch 8/10 - time: 8.54s ---
Train loss: 0.0031, Train accuracy: 0.9669
Val loss: 0.0048, Val accuracy: 0.9525

--- Epoch 9/10 - time: 5.64s ---
Train loss: 

In [16]:
with torch.no_grad():
    model.eval()
    preds = []
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch)
        preds.append(y_pred.argmax(1))

    preds = torch.cat(preds).cpu().numpy()

print(classification_report(y_test.cpu().numpy(), preds))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93        82
           1       0.97      0.97      0.97        38
           2       0.99      1.00      0.99        88
           3       0.98      0.97      0.98       123
           4       0.97      0.97      0.97        80
           5       0.98      0.99      0.99       112
           6       1.00      0.96      0.98       141
           7       0.99      1.00      0.99        95
           8       0.97      0.95      0.96        62
           9       1.00      0.94      0.97        33
          10       0.98      0.96      0.97       137
          11       0.83      0.93      0.88        72
          12       0.99      0.98      0.99       133
          13       0.96      0.94      0.95       129
          14       1.00      0.78      0.88         9
          15       0.87      0.97      0.92        69
          16       1.00      1.00      1.00        73

    accuracy              