In [1]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset

In [2]:
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [3]:
directory = '/Users/shenwang/Documents/CIFAR/cifar-10-python/cifar-10-batches-py'
data_prefix = 'data'
test_prefix = 'test'
num_channels = 3

training_files = glob.glob(directory+os.sep+data_prefix+'*')
test_files = glob.glob(directory+os.sep+test_prefix+'*')

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("mps")

In [4]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def changeDimension(x):
    
    assert isinstance(x,list),'x must be list type'
    np_x = np.array(x)
    # print(np_x.shape)
    sp = np_x.shape
    size_per_channel = sp[-1]/num_channels
    len_per_side = int(np.sqrt(size_per_channel))
    if len(sp) == 2:
        new_array = np.reshape(np_x,(sp[0]*sp[1]))
    if len(sp) == 3:
        new_array = np.reshape(np_x,(sp[0]*sp[1],num_channels,len_per_side,len_per_side))
        # sp_new = output.shape
        # new_array = np.zeros((sp_new[0],sp_new[2],sp_new[3],sp_new[1]))
        # for i in range(sp_new[0]):
        #     for j in range(sp_new[1]):
        #         new_array[i,:,:,j] = output[i,j,:,:]

    return new_array



In [5]:
def preprocessing(x,c_x=28,c_y=28,normalize=True,center_crop=True,whitening=True):
    sp = x.shape
    assert len(sp) == 4, 'The input shape must be number_of_frames * number_of_channels * len_of_image * len_of_image'
    len_x = sp[2]
    len_y = sp[3]
    start_x = (len_x - c_x)//2
    stop_x = start_x + c_x
    start_y = (len_y-c_y)//2
    stop_y = start_y + c_y
    new_x = np.zeros((sp[0],sp[1],c_x,c_y))


    for i in range(sp[0]):
        for j in range(sp[1]):
            if normalize:
                image = x[i,:,:,j]/255
            else:
                image = x[i,:,:,j]
            if center_crop:
                new_x[i,:,:,j] = image[start_x:stop_x,start_y:stop_y]
            else:
                new_x[i,:,:,j] = image

            if whitening:
                temp = image[start_x:stop_x,start_y:stop_y]
                mean = np.mean(temp)
                std = np.std(temp)
                std_mod = max(std,1/np.sqrt(np.size(temp)))
                new_x[i,:,:,j] = (temp - mean)/std_mod
            
    return new_x


In [6]:
training_raw_images = []
training_labels = []

test_raw_images = []
test_labels = []

for file in training_files:
    batch_dict = unpickle(file)
    training_raw_images.append(batch_dict[b'data'])
    training_labels.append(batch_dict[b'labels'])
for file in test_files:
    batch_dict = unpickle(file)
    test_raw_images.append(batch_dict[b'data'])
    test_labels.append(batch_dict[b'labels'])

training_raw_images = changeDimension(training_raw_images)
training_labels = changeDimension(training_labels)
test_raw_images = changeDimension(test_raw_images)
test_labels = changeDimension(test_labels)

training_images = preprocessing(training_raw_images)
test_images = preprocessing(test_raw_images)

In [7]:
class CIFAR(Dataset):
    def __init__(self,data,label):
        super(CIFAR,self).__init__()
        self.data = data
        self.label = label

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        return self.data[idx,:,:,:], self.label[idx]

In [8]:
class Conv(nn.Module):
    def __init__(self,input_channel,output_channel,kernel_size,stride,padding):
        super(Conv, self).__init__()
        self.conv = nn.Conv2d(input_channel, output_channel, kernel_size=kernel_size, stride=stride, padding=padding)
        self.batchnorm = nn.BatchNorm2d(output_channel) 
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        x = self.relu(x)
        return x
    
class Inception(nn.Module):
    def __init__(self, input_channel, output1, output3):
        super(Inception,self).__init__()
        self.branch1 = Conv(input_channel, output1, 1, 1,padding=0)
        self.branch3 = Conv(input_channel, output3, 3, 1,padding=1)

    def forward(self, x):
        b1 = self.branch1(x)  
        b3 = self.branch3(x) 
        # print(b1.shape,b3.shape)
        # Concatenate along the channel dimension
        return torch.cat([b1, b3], dim=1)

class Downsample(nn.Module):
    def __init__(self, input_channel, output_channel):
        super(Downsample,self).__init__()
        self.branch_conv = Conv(input_channel, output_channel, 3, 2,padding=1)
        self.branch_pool = nn.MaxPool2d(3, stride=2,padding=1)

    def forward(self, x):
        b_conv = self.branch_conv(x)  
        b_pool = self.branch_pool(x) 
        # print(b_conv.shape,b_pool.shape)
        # Concatenate along the channel dimension
        return torch.cat([b_conv, b_pool], dim=1)

class InceptionSmall(nn.Module):
    def __init__(self,input_channel):
        super(InceptionSmall, self).__init__()
        self.initial_conv = Conv(input_channel, 96, 3, 1,1)  

        # First Inception Block
        self.inception1 = Inception(96, 32, 32)
        self.inception2 = Inception(64, 32, 48)
        self.downsample1 = Downsample(80, 80)

        # Second Inception Block
        self.inception3 = Inception(160, 112, 48)
        self.inception4 = Inception(160, 96, 64)
        self.inception5 = Inception(160, 80, 80)
        self.inception6 = Inception(160, 48, 96)
        self.downsample2 = Downsample(144, 96)

        # Final Inception Block
        self.inception7 = Inception(240, 176, 160)
        self.inception8 = Inception(336, 176, 160)

        # Classification Head
        self.global_pool = nn.AvgPool2d(7)  # 7x7 kernel global pooling
        self.fc = nn.Linear(336, 10)  # 10-way classification

    def forward(self, x):
        x = self.initial_conv(x)
        # print('initial_conv done')
        x = self.inception1(x)
        # print('inception1 done')
        x = self.inception2(x)
        # print('inception2 done')

        x = self.downsample1(x)
        # print('downsample1 done')
        x = self.inception3(x)
        # print('inception3 done')

        x = self.inception4(x)
        # print('inception4 done')

        x = self.inception5(x)
        # print('inception5 done')

        x = self.inception6(x)
        # print('inception6 done')

        x = self.downsample2(x)
        # print('downsample2 done',x.shape)
        x = self.inception7(x)
        # print('inception7 done')

        x = self.inception8(x)
        # print('inception8 done')

        x = self.global_pool(x)
        # print('global_pool done')
        # print(x.shape)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x



In [9]:
def train_loop(dataloader, model, loss_fn, optimizer,device):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        # print(batch,X.shape,y.shape)
        X,y = X.to(device),y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn,device):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            X,y = X.to(device),y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [10]:
learning_rate = 0.1
batch_size = 64
epochs = 25000
momentum = 0.9
weight_decay = 0.95

data_train = CIFAR(torch.tensor(training_images,dtype=torch.float32),torch.tensor(training_labels,dtype=torch.long))
data_test = CIFAR(torch.tensor(test_images,dtype=torch.float32),torch.tensor(test_labels,dtype=torch.long))

train_dataloader = DataLoader(data_train, batch_size= batch_size)
test_dataloader = DataLoader(data_test, batch_size=batch_size)

model = InceptionSmall(3).to(device)

optimizer = optim.SGD(model.parameters(), lr=learning_rate,momentum=momentum)
loss_fn = nn.CrossEntropyLoss()

scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=weight_decay)

In [11]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

    train_loop(train_dataloader, model, loss_fn, optimizer,device)
    
    test_loop(test_dataloader, model, loss_fn,device)
    scheduler.step()
print("Done!")

Epoch 1
-------------------------------
loss: 2.356857  [   64/50000]
loss: 2.114672  [ 6464/50000]
loss: 1.999472  [12864/50000]
loss: 2.142463  [19264/50000]
loss: 2.172346  [25664/50000]
loss: 2.002348  [32064/50000]
loss: 2.035090  [38464/50000]
loss: 1.958720  [44864/50000]
Test Error: 
 Accuracy: 25.6%, Avg loss: 2.069321 

Epoch 2
-------------------------------
loss: 2.052804  [   64/50000]
loss: 1.924896  [ 6464/50000]
loss: 1.859381  [12864/50000]
loss: 2.060254  [19264/50000]
loss: 1.961628  [25664/50000]
loss: 1.881349  [32064/50000]
loss: 1.968502  [38464/50000]
loss: 1.906920  [44864/50000]


KeyboardInterrupt: 

In [None]:


# # Define the Inception block
# class InceptionBlock(nn.Module):
#     def __init__(self, in_channels, ch1x1, ch3x3reduce, ch3x3, ch5x5reduce, ch5x5, pool_proj):
#         super(InceptionBlock, self).__init__()

#         # 1x1 Convolution
#         self.branch1x1 = nn.Conv2d(in_channels, ch1x1, kernel_size=1)

#         # 1x1 followed by 3x3 Convolution
#         self.branch3x3 = nn.Sequential(
#             nn.Conv2d(in_channels, ch3x3reduce, kernel_size=1),
#             nn.Conv2d(ch3x3reduce, ch3x3, kernel_size=3, padding=1)
#         )

#         # 1x1 followed by 5x5 Convolution
#         self.branch5x5 = nn.Sequential(
#             nn.Conv2d(in_channels, ch5x5reduce, kernel_size=1),
#             nn.Conv2d(ch5x5reduce, ch5x5, kernel_size=5, padding=2)
#         )

#         # MaxPooling followed by 1x1 Convolution
#         self.branch_pool = nn.Sequential(
#             nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
#             nn.Conv2d(in_channels, pool_proj, kernel_size=1)
#         )

#     def forward(self, x):
#         branch1x1 = self.branch1x1(x)
#         branch3x3 = self.branch3x3(x)
#         branch5x5 = self.branch5x5(x)
#         branch_pool = self.branch_pool(x)

#         # Concatenate along the channel dimension
#         outputs = torch.cat([branch1x1, branch3x3, branch5x5, branch_pool], dim=1)
#         return outputs

# # Define the complete Inception Network (simplified version)
# class InceptionNet(nn.Module):
#     def __init__(self, num_classes=10):
#         super(InceptionNet, self).__init__()

#         # Initial convolution and pooling layers
#         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
#         self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

#         # Inception blocks
#         self.inception1 = InceptionBlock(64, 64, 96, 128, 16, 32, 32)
#         self.inception2 = InceptionBlock(256, 128, 128, 192, 32, 96, 64)

#         # Global average pooling and fully connected layer
#         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
#         self.fc = nn.Linear(480, num_classes)

#     def forward(self, x):
#         x = F.relu(self.conv1(x))
#         x = self.maxpool1(x)

#         x = self.inception1(x)
#         x = self.inception2(x)

#         x = self.avgpool(x)
#         x = torch.flatten(x, 1)
#         x = self.fc(x)
#         return x

# # Instantiate the network
# model = InceptionNet(num_classes=10)

# # Print the model summary
# print(model)
