In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils import data
from tqdm import tqdm

In [None]:
# Grab the MNIST dataset
training_set = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
testing_set = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

tfm = transforms.Compose([
    transforms.ToTensor(),
])

trainset_full_CIFAR10 = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=tfm)
testset_full_CIFAR10  = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=tfm)

In [None]:
# Verify that GPU is connected and available

print(torch.__version__)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(torch.cuda.get_device_name(0))

In [12]:
class CIFAR10_Classifier(nn.Module):
    def __init__(self, C_dropout, F_dropout):
        super().__init__()

        conv2d_dropout = C_dropout

        conv_layer_1 = 30
        conv_layer_2 = 64

        conv_layer_3 = 128
        conv_layer_4 = 256

        self.forward_funnel_1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=conv_layer_1, kernel_size=5),   # Extract useful features from the beginning
            nn.BatchNorm2d(num_features=conv_layer_1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(conv2d_dropout),

            nn.Conv2d(in_channels=conv_layer_1, out_channels=conv_layer_2, kernel_size=3),  # Extract useful features from the learned features
            nn.BatchNorm2d(num_features=conv_layer_2),
            nn.ReLU(inplace=True),
            nn.Dropout2d(conv2d_dropout),
            nn.MaxPool2d(kernel_size=2, stride=2),                       # Reduce dimensionality
        )

        self.forward_funnel_2 = nn.Sequential(
            nn.Conv2d(in_channels=conv_layer_2, out_channels=conv_layer_3, kernel_size=3),   # Extract useful features from the beginning
            nn.BatchNorm2d(num_features=conv_layer_3),
            nn.ReLU(inplace=True),
            nn.Dropout2d(conv2d_dropout),

            nn.Conv2d(in_channels=conv_layer_3, out_channels=conv_layer_4, kernel_size=3),  # Extract useful features from the learned features
            nn.BatchNorm2d(num_features=conv_layer_4),
            nn.ReLU(inplace=True),
            nn.Dropout2d(conv2d_dropout),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # Compute the number of features after the input has passed the funnel
        with torch.no_grad():
            test_input = torch.zeros(1, 3, 32, 32)

            test_input.to(device)

            features = self.forward_funnel_1(test_input)
            features = self.forward_funnel_2(features)

            total_count = features.view(1, -1).size(1)

        full_node_dropout = F_dropout


        lin_layer_1_size = 1000
        lin_layer_2_size = 500
        lin_layer_3_size = 250



        self.output_nodes = 100

        self.classifer = nn.Sequential(
            nn.Flatten(),                                           # Flatten the image from the funnel
            nn.Linear(in_features=total_count, out_features=lin_layer_1_size),
            nn.LayerNorm(lin_layer_1_size),
            nn.ReLU(inplace=True),
            nn.Dropout(full_node_dropout),

            nn.Linear(in_features=lin_layer_1_size, out_features=lin_layer_2_size),
            nn.LayerNorm(lin_layer_2_size),
            nn.ReLU(inplace=True),
            nn.Dropout(full_node_dropout),

            nn.Linear(in_features=lin_layer_2_size, out_features=lin_layer_3_size),
            nn.LayerNorm(lin_layer_3_size),
            nn.ReLU(inplace=True),
            nn.Dropout(full_node_dropout),

            nn.Linear(in_features=lin_layer_3_size, out_features=self.output_nodes),
            nn.LayerNorm(self.output_nodes),
            nn.ReLU(inplace=True),
            nn.Dropout(full_node_dropout),
        )

        self.output_layer = nn.Linear(in_features=self.output_nodes, out_features=10)

    def partial_forward(self, x):
        x = self.forward_funnel_1(x)
        x = self.forward_funnel_2(x)
        x = self.classifer(x)

        return x

    def forward(self, x):
        x = self.partial_forward(x)
        logits = self.output_layer(x)

        return logits

In [14]:
epoch_over_training_loss_CIFAR10 = []
epoch_over_testing_loss_CIFAR10 = []

'''
Form of the data

data =
{
    epoch: int
    training/testing loss: float
}
'''

'\nForm of the data\n\ndata = \n{\n    epoch: int\n    training/testing loss: float\n}\n'

In [15]:
# Hyperparameter setup
epochs = 15
batch_size = 64
learning_rate = 5e-4
decay_rate = 4e-4

c_dropout = 0.25
f_dropout = 0.25

print('######## Begining training for CIFAR10 classifier ##########')

# Setup data loaders
trainset_loader_CIFAR10 = data.DataLoader(trainset_full_CIFAR10,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   # num_workers=5,
                                   pin_memory=True)

testset_loader_CIFAR10 = data.DataLoader(testset_full_CIFAR10,
                                   batch_size=batch_size,
                                   # num_workers=5,
                                   shuffle=False,
                                   pin_memory=True)

model = CIFAR10_Classifier(c_dropout, f_dropout)
model.to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(),
                       lr=learning_rate,
                       weight_decay=decay_rate
                       )

# Have references to variables outside of the epoch loop
avg_training_loss = 0
avg_testing_loss = 0

# Epoch Loop
for epoch in range(epochs):
    print(f'----- Epoch: {epoch + 1}/{epochs} -----')

    avg_training_loss = 0
    avg_testing_loss = 0

    model.train()

    for x, Y in tqdm(trainset_loader_CIFAR10, desc='Training', unit=' batch'):
        # Transfer images to GPU
        x = x.to(device)
        Y = Y.to(device)

        # Zero out gradients
        optimizer.zero_grad()

        # Send images to model
        x_pred = model(x)

        # Calc loss
        loss = loss_function(x_pred, Y)

        # Calc gradient and update weights
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            avg_training_loss += loss.item()

    # Switch to eval mode
    model.eval()

    with torch.no_grad():
        for x, Y in tqdm(testset_loader_CIFAR10, desc='Testing', unit=' batches'):
            # Move the images to the GPU
            x = x.to(device)
            Y = Y.to(device)

            # Get logits and sum up total loss
            x_pred = model(x)
            avg_testing_loss += loss_function(x_pred, Y).item()

    # Get training loss
    avg_training_loss /= len(trainset_loader_CIFAR10)

     # Get testing loss
    avg_testing_loss /= len(testset_loader_CIFAR10)

    # Switch model back to training mode
    model.train()

    epoch_over_training_loss_CIFAR10.append({
        "epoch": epoch,
        "training_loss": avg_training_loss
        })

    epoch_over_testing_loss_CIFAR10.append({
        "epoch": epoch,
        "testing_loss": avg_testing_loss
        })


    print("")

    print(f'   -> Training Loss: {avg_training_loss: .4f}\n')
    print(f'   -> Testing Loss: {avg_testing_loss: .4f}\n')


######## Begining training for CIFAR10 classifier ##########
----- Epoch: 1/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 106.90 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 151.55 batches/s]



   -> Training Loss:  1.9341

   -> Testing Loss:  1.5643

----- Epoch: 2/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.34 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 152.62 batches/s]



   -> Training Loss:  1.6016

   -> Testing Loss:  1.4643

----- Epoch: 3/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 106.98 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 151.72 batches/s]



   -> Training Loss:  1.4427

   -> Testing Loss:  1.2137

----- Epoch: 4/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 107.35 batch/s]
Testing: 100%|██████████| 157/157 [00:00<00:00, 158.85 batches/s]



   -> Training Loss:  1.3388

   -> Testing Loss:  1.1131

----- Epoch: 5/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.73 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 153.85 batches/s]



   -> Training Loss:  1.2517

   -> Testing Loss:  1.0442

----- Epoch: 6/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 109.14 batch/s]
Testing: 100%|██████████| 157/157 [00:00<00:00, 160.72 batches/s]



   -> Training Loss:  1.1807

   -> Testing Loss:  0.9841

----- Epoch: 7/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.53 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 156.33 batches/s]



   -> Training Loss:  1.1149

   -> Testing Loss:  0.9577

----- Epoch: 8/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 109.14 batch/s]
Testing: 100%|██████████| 157/157 [00:00<00:00, 157.78 batches/s]



   -> Training Loss:  1.0459

   -> Testing Loss:  0.8625

----- Epoch: 9/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 107.74 batch/s]
Testing: 100%|██████████| 157/157 [00:00<00:00, 159.76 batches/s]



   -> Training Loss:  1.0025

   -> Testing Loss:  0.8238

----- Epoch: 10/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.34 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 156.21 batches/s]



   -> Training Loss:  0.9518

   -> Testing Loss:  0.8015

----- Epoch: 11/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.79 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 153.66 batches/s]



   -> Training Loss:  0.9113

   -> Testing Loss:  0.7908

----- Epoch: 12/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.60 batch/s]
Testing: 100%|██████████| 157/157 [00:00<00:00, 159.46 batches/s]



   -> Training Loss:  0.8819

   -> Testing Loss:  0.7434

----- Epoch: 13/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.99 batch/s]
Testing: 100%|██████████| 157/157 [00:01<00:00, 155.45 batches/s]



   -> Training Loss:  0.8407

   -> Testing Loss:  0.7338

----- Epoch: 14/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.47 batch/s]
Testing: 100%|██████████| 157/157 [00:00<00:00, 160.60 batches/s]



   -> Training Loss:  0.8101

   -> Testing Loss:  0.7141

----- Epoch: 15/15 -----


Training: 100%|██████████| 782/782 [00:07<00:00, 108.65 batch/s]
Testing: 100%|██████████| 157/157 [00:00<00:00, 159.55 batches/s]


   -> Training Loss:  0.7777

   -> Testing Loss:  0.7039






In [19]:
class CIFAR10_Transformed_Classifier(CIFAR10_Classifier):
    def __init__(self, C_dropout, F_dropout):
        super().__init__(C_dropout, F_dropout)

        self.output_layer = nn.Linear(in_features=self.output_nodes, out_features=2)


    def forward(self, x):
        x = self.partial_forward(x)
        logits = self.output_layer(x)

        return logits

In [20]:
epoch_over_training_loss_CIFAR10_R = []
epoch_over_testing_loss_CIFAR10_R = []

In [21]:
# Hyperparameter setup
epochs = 50
batch_size = 50
learning_rate = 5e-4
decay_rate = 1e-3

c_dropout = 0.30
f_dropout = 0.30


print('######## Begining training for CIFAR10 classifier on rotated images ##########')

# Setup data loaders
trainset_loader_CIFAR10_R = data.DataLoader(trainset_full_CIFAR10,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   # num_workers=5,
                                   pin_memory=True)

testset_loader_CIFAR10_R = data.DataLoader(testset_full_CIFAR10,
                                   batch_size=batch_size,
                                   # num_workers=5,
                                   shuffle=False,
                                   pin_memory=True)

model = CIFAR10_Transformed_Classifier(c_dropout, f_dropout)
model.to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(),
                       lr=learning_rate,
                       weight_decay=decay_rate
                       )

# Have references to variables outside of the epoch loop
avg_training_loss = 0
avg_testing_loss = 0

# Epoch Loop
for epoch in range(epochs):
    print(f'----- Epoch: {epoch + 1}/{epochs} -----')

    avg_training_loss = 0
    avg_testing_loss = 0

    model.train()

    for x, _ in tqdm(trainset_loader_CIFAR10_R, desc='Training', unit=' batch'):

        labels_upright = torch.zeros(x.size(0), dtype=torch.long)

        images_rotated = torch.rot90(x, 1, [2, 3])
        labels_rotated = torch.ones(x.size(0), dtype=torch.long)

        all_images = torch.cat([x, images_rotated])
        all_labels = torch.cat([labels_upright, labels_rotated])


        # Transfer images to GPU
        all_images = all_images.to(device)
        all_labels = all_labels.to(device)

        # Zero out gradients
        optimizer.zero_grad()

        # Send images to model
        x_pred = model(all_images)

        # Calc loss
        loss = loss_function(x_pred, all_labels)

        # Calc gradient and update weights
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            avg_training_loss += loss.item()

    # Switch to eval mode
    model.eval()

    with torch.no_grad():
        for x, _ in tqdm(testset_loader_CIFAR10_R, desc='Testing', unit=' batches'):

            labels_upright = torch.zeros(x.size(0), dtype=torch.long)

            images_rotated = torch.rot90(x, 1, [2, 3])
            labels_rotated = torch.ones(x.size(0), dtype=torch.long)

            all_images = torch.cat([x, images_rotated])
            all_labels = torch.cat([labels_upright, labels_rotated])

            # Transfer images to GPU
            all_images = all_images.to(device)
            all_labels = all_labels.to(device)

            # Move the images to the GPU
            all_images = all_images.to(device)
            all_labels = all_labels.to(device)

            # Get logits and sum up total loss
            x_pred = model(all_images)
            avg_testing_loss += loss_function(x_pred, all_labels).item()

    # Get training loss
    avg_training_loss /= len(trainset_loader_CIFAR10_R)

     # Get testing loss
    avg_testing_loss /= len(testset_loader_CIFAR10_R)

    # Switch model back to training mode
    model.train()

    epoch_over_training_loss_CIFAR10_R.append({
        "epoch": epoch,
        "training_loss": avg_training_loss
        })

    epoch_over_testing_loss_CIFAR10_R.append({
        "epoch": epoch,
        "testing_loss": avg_testing_loss
        })


    print("")

    print(f'   -> Training Loss: {avg_training_loss: .4f}\n')
    print(f'   -> Testing Loss: {avg_testing_loss: .4f}\n')

######## Begining training for CIFAR10 classifier on rotated images ##########
----- Epoch: 1/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.69 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 185.14 batches/s]



   -> Training Loss:  0.5700

   -> Testing Loss:  0.4855

----- Epoch: 2/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.65 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 179.68 batches/s]



   -> Training Loss:  0.4979

   -> Testing Loss:  0.4475

----- Epoch: 3/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.60 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 183.99 batches/s]



   -> Training Loss:  0.4669

   -> Testing Loss:  0.4248

----- Epoch: 4/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.14 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 181.22 batches/s]



   -> Training Loss:  0.4467

   -> Testing Loss:  0.4051

----- Epoch: 5/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.92 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 187.75 batches/s]



   -> Training Loss:  0.4296

   -> Testing Loss:  0.3868

----- Epoch: 6/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.54 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 185.62 batches/s]



   -> Training Loss:  0.4154

   -> Testing Loss:  0.3709

----- Epoch: 7/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.38 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 181.68 batches/s]



   -> Training Loss:  0.4052

   -> Testing Loss:  0.3579

----- Epoch: 8/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.73 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 187.03 batches/s]



   -> Training Loss:  0.3954

   -> Testing Loss:  0.3492

----- Epoch: 9/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.94 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 179.70 batches/s]



   -> Training Loss:  0.3865

   -> Testing Loss:  0.3425

----- Epoch: 10/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.47 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 178.19 batches/s]



   -> Training Loss:  0.3783

   -> Testing Loss:  0.3343

----- Epoch: 11/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.36 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 182.76 batches/s]



   -> Training Loss:  0.3704

   -> Testing Loss:  0.3316

----- Epoch: 12/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.02 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 181.74 batches/s]



   -> Training Loss:  0.3636

   -> Testing Loss:  0.3223

----- Epoch: 13/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.42 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 181.71 batches/s]



   -> Training Loss:  0.3545

   -> Testing Loss:  0.3137

----- Epoch: 14/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.22 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.71 batches/s]



   -> Training Loss:  0.3496

   -> Testing Loss:  0.3092

----- Epoch: 15/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.07 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 176.27 batches/s]



   -> Training Loss:  0.3422

   -> Testing Loss:  0.3015

----- Epoch: 16/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.37 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.08 batches/s]



   -> Training Loss:  0.3367

   -> Testing Loss:  0.3002

----- Epoch: 17/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.87 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 183.62 batches/s]



   -> Training Loss:  0.3321

   -> Testing Loss:  0.2996

----- Epoch: 18/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.10 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.50 batches/s]



   -> Training Loss:  0.3263

   -> Testing Loss:  0.2980

----- Epoch: 19/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.64 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 187.14 batches/s]



   -> Training Loss:  0.3217

   -> Testing Loss:  0.2945

----- Epoch: 20/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.73 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 185.94 batches/s]



   -> Training Loss:  0.3159

   -> Testing Loss:  0.2911

----- Epoch: 21/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.75 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.50 batches/s]



   -> Training Loss:  0.3103

   -> Testing Loss:  0.2841

----- Epoch: 22/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.45 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 183.15 batches/s]



   -> Training Loss:  0.3036

   -> Testing Loss:  0.2817

----- Epoch: 23/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.92 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 187.48 batches/s]



   -> Training Loss:  0.3003

   -> Testing Loss:  0.2785

----- Epoch: 24/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.28 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.96 batches/s]



   -> Training Loss:  0.2952

   -> Testing Loss:  0.2756

----- Epoch: 25/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.81 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 183.68 batches/s]



   -> Training Loss:  0.2920

   -> Testing Loss:  0.2713

----- Epoch: 26/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.41 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 182.79 batches/s]



   -> Training Loss:  0.2856

   -> Testing Loss:  0.2715

----- Epoch: 27/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.78 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 182.94 batches/s]



   -> Training Loss:  0.2808

   -> Testing Loss:  0.2746

----- Epoch: 28/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 116.78 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 186.28 batches/s]



   -> Training Loss:  0.2768

   -> Testing Loss:  0.2686

----- Epoch: 29/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.98 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 186.38 batches/s]



   -> Training Loss:  0.2724

   -> Testing Loss:  0.2669

----- Epoch: 30/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.88 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 185.27 batches/s]



   -> Training Loss:  0.2672

   -> Testing Loss:  0.2680

----- Epoch: 31/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.60 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 185.84 batches/s]



   -> Training Loss:  0.2647

   -> Testing Loss:  0.2714

----- Epoch: 32/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.47 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 185.02 batches/s]



   -> Training Loss:  0.2591

   -> Testing Loss:  0.2638

----- Epoch: 33/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 118.37 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.57 batches/s]



   -> Training Loss:  0.2549

   -> Testing Loss:  0.2656

----- Epoch: 34/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 118.50 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 185.56 batches/s]



   -> Training Loss:  0.2530

   -> Testing Loss:  0.2668

----- Epoch: 35/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 118.09 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 178.38 batches/s]



   -> Training Loss:  0.2491

   -> Testing Loss:  0.2620

----- Epoch: 36/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 118.19 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 183.59 batches/s]



   -> Training Loss:  0.2441

   -> Testing Loss:  0.2602

----- Epoch: 37/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.75 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.87 batches/s]



   -> Training Loss:  0.2404

   -> Testing Loss:  0.2618

----- Epoch: 38/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.64 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 184.62 batches/s]



   -> Training Loss:  0.2370

   -> Testing Loss:  0.2617

----- Epoch: 39/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.79 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 188.04 batches/s]



   -> Training Loss:  0.2319

   -> Testing Loss:  0.2619

----- Epoch: 40/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.88 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 186.43 batches/s]



   -> Training Loss:  0.2294

   -> Testing Loss:  0.2684

----- Epoch: 41/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 118.69 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 183.41 batches/s]



   -> Training Loss:  0.2240

   -> Testing Loss:  0.2695

----- Epoch: 42/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 118.40 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 187.93 batches/s]



   -> Training Loss:  0.2215

   -> Testing Loss:  0.2660

----- Epoch: 43/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.63 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 187.33 batches/s]



   -> Training Loss:  0.2182

   -> Testing Loss:  0.2616

----- Epoch: 44/50 -----


Training: 100%|██████████| 1000/1000 [00:08<00:00, 117.20 batch/s]
Testing: 100%|██████████| 200/200 [00:01<00:00, 180.27 batches/s]



   -> Training Loss:  0.2124

   -> Testing Loss:  0.2649

----- Epoch: 45/50 -----


Training:  37%|███▋      | 373/1000 [00:03<00:05, 116.54 batch/s]


KeyboardInterrupt: 

In [None]:
# Save the model weights from problem 5
torch.save(model.state_dict(), 'HW4_Rotation_Model_Weights.pth')

In [None]:
def transfer_weights(model_final, model_src, k, is_frozen):

    for i in range(len(model_final.forward_funnel_1)):

        if k == 0: return

        src_layer = model_src.forward_funnel_1[i]
        final_layer = model_final.forward_funnel_1[i]

        if (hasattr(src_layer, 'weight') and hasattr(final_layer, 'weight')):
            final_layer.weight.data = src_layer.weight.data.clone()
            if is_frozen:
                final_layer.weight.requires_grad = False

            # This will always run if we get here. I do not intend on making layers with biases
            if (hasattr(src_layer, 'bias') and hasattr(final_layer, 'bias')):
                final_layer.bias.data = src_layer.bias.data.clone()
                if is_frozen:
                    final_layer.bias.requires_grad = False

                k -= 1


    for i in range(len(model_final.classifer)):

        if k == 0: return

        src_layer = model_src.classifer[i]
        final_layer = model_final.classifer[i]

        if (hasattr(src_layer, 'weight') and hasattr(final_layer, 'weight')):
            final_layer.weight.data = src_layer.weight.data.clone()
            if is_frozen:
                final_layer.weight.requires_grad = False

            # This will always run if we get here. I do not intend on making layers with biases
            if (hasattr(src_layer, 'bias') and hasattr(final_layer, 'bias')):
                final_layer.bias.data = src_layer.bias.data.clone()
                if is_frozen:
                    final_layer.bias.requires_grad = False

                k -= 1