In [4]:
import torchvision

In [5]:
import torch

In [50]:
import torch.nn.functional as F

In [51]:
import functools

In [15]:
class Network(torch.nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5)
        self.conv3 = torch.nn.Conv2d(in_channels=20, out_channels=30, kernel_size=5)

        self.fc1 = torch.nn.Linear(in_features=30*4*4, out_features=128)
        self.fc2 = torch.nn.Linear(in_features=128, out_features=10)

    def forward(t):
        pass

In [17]:
network = Network()
print(network)

Network(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(20, 30, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=480, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [20]:
network.conv1.weight

Parameter containing:
tensor([[[[ 0.0696,  0.1397, -0.1070,  0.0685,  0.0728],
          [-0.0172,  0.0304, -0.1043, -0.0294,  0.0216],
          [ 0.0922,  0.1093, -0.0095, -0.0862,  0.0972],
          [-0.1005,  0.0687, -0.0308, -0.0536,  0.0168],
          [ 0.1091, -0.1127, -0.1150, -0.0829, -0.0890]]],


        [[[-0.0413, -0.1690,  0.0675,  0.0741, -0.0410],
          [ 0.1363,  0.1618,  0.0063, -0.0943,  0.0557],
          [-0.0358,  0.0774, -0.0388, -0.1385, -0.0352],
          [ 0.0103,  0.0996,  0.0216,  0.0062, -0.0587],
          [-0.1205,  0.0988,  0.0273, -0.1992, -0.0904]]],


        [[[ 0.1164, -0.0738,  0.0761, -0.0042,  0.1734],
          [-0.0770,  0.1194,  0.0738, -0.1738,  0.0536],
          [ 0.1792, -0.1288, -0.0382, -0.0767, -0.0873],
          [ 0.0069, -0.1926,  0.1858,  0.0532,  0.0231],
          [ 0.1954,  0.0603, -0.1010,  0.1382,  0.0113]]],


        [[[-0.1041, -0.1749,  0.0325, -0.1112, -0.0872],
          [ 0.0129, -0.0899,  0.1517,  0.0722,  0.1090

In [21]:
dir(network)

['__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_name',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_modules',
 '_named_members',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',
 '_tracing_name',
 '_version',
 'add_module',
 'apply',
 'buffers',
 'children',
 'conv1',
 'conv2',
 'conv3',
 'cpu',
 'cuda',
 'double',
 'dump_patches',
 'eval',
 'extra_repr',
 'fc1',
 'fc2',
 'float',
 'forward',
 'half',
 'load_state_dict',

In [29]:
{n: t.shape for n, t in list(network.named_parameters())}

{'conv1.weight': torch.Size([10, 1, 5, 5]),
 'conv1.bias': torch.Size([10]),
 'conv2.weight': torch.Size([20, 10, 5, 5]),
 'conv2.bias': torch.Size([20]),
 'conv3.weight': torch.Size([30, 20, 5, 5]),
 'conv3.bias': torch.Size([30]),
 'fc1.weight': torch.Size([128, 480]),
 'fc1.bias': torch.Size([128]),
 'fc2.weight': torch.Size([10, 128]),
 'fc2.bias': torch.Size([10])}

In [None]:
# Python objects are callable objects because of __call__ method

# Tensors are not directly used as weights in the layers, 
# They are instatiated as "Parameters" and then used as weights.
# this is because there is a "register_parameter" function in the nn.Module class,
# that requires all the weights to be "registered". More details to follow.

In [40]:
weight_tensor = torch.tensor(
[
  [1,2,3,4],
  [5,6,7,8],
  [9, 10, 11, 12]
], dtype = torch.float32
)

In [41]:
data_tensor = torch.tensor([1,2,3,4], dtype=torch.float32)

In [42]:
weight_tensor.matmul(data_tensor)

tensor([ 30.,  70., 110.])

In [46]:
layer = torch.nn.Linear(in_features=4, out_features=3, bias=False)

In [None]:
# each Linear layer has a shape of (out_features, in_features)

In [48]:
layer.weight = torch.nn.Parameter(weight_tensor)

In [49]:
layer(data_tensor)

tensor([ 30.,  70., 110.], grad_fn=<SqueezeBackward3>)

In [None]:
# each layer has a forward method, which never get explicitly called.
# everytime you call the layer, __call__ method of the layer is called,
# then that method calls the forard method for the layer object.
# So all you need to do is just call the layer object.
# Same applies for the network objects too

In [None]:
# layers have weights, operations dont.
# conv is a layer, maxpooling and activations are operations
# operations are implemented using torch.nn.functional module (a.k.a F module)

In [None]:
# Rewriting Network class again

In [6]:
class Network(torch.nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # Layers
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        self.fc1 = torch.nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = torch.nn.Linear(in_features=120, out_features=60)
        self.fc3 = torch.nn.Linear(in_features=60, out_features=10)

        #Operations
        self.maxpool2d2k2s = functools.partial(F.max_pool2d, kernel_size=2, stride=2)
        self.relu = F.relu
        self.softmax = F.softmax

    def forward(self, t):
        t = self.conv1(t)
        t = self.relu(t)
        t = self.maxpool2d2k2s(t)

        t = self.conv2(t)
        t = self.relu(t)
        t = self.maxpool2d2k2s(t)

        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = self.relu(t)

        t = self.fc2(t)
        t = self.relu(t)

        t = self.fc3(t)
        return t
#     t = self.softmax(t) #Not needed because the training loop has Crossentropy loss func

network = Network()

In [5]:
# standard pytorch import statements

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms


import functools
# torch.set_grad_enabled(True)  #optional
torch.set_printoptions(precision=4, linewidth=120)

In [7]:
print(torch.__version__)
print(torchvision.__version__)

1.3.1+cpu
0.4.2+cpu


In [8]:
training_set = torchvision.datasets.FashionMNIST(
  root = './data/FashionMNIST',
  train=True,
  download=True,
  transform = transforms.Compose([
    transforms.ToTensor()
  ])
)

In [167]:
sample = next(iter(training_set))

In [168]:
image, label = sample

In [169]:
image.shape

torch.Size([1, 28, 28])

In [66]:
label

9

In [9]:
network

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=10, bias=True)
)

In [170]:
start = torch.randn_like(image.unsqueeze(0))

In [171]:
start.shape

torch.Size([1, 1, 28, 28])

In [172]:
network(image.unsqueeze(0))

tensor([[ 0.07, -0.05,  0.17,  0.05, -0.07,  0.08, -0.13, -0.07, -0.08,  0.11]], grad_fn=<AddmmBackward>)

In [111]:
data_loader = torch.utils.data.DataLoader(
  training_set,
  batch_size=10
)

In [112]:
batch = next(iter(data_loader))
images, labels = batch

In [113]:
images.shape

torch.Size([10, 1, 28, 28])

In [114]:
labels.shape

torch.Size([10])

In [115]:
labels

tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5])

In [116]:
pred = network(images)
print(pred.shape)
print(pred)

torch.Size([10, 10])
tensor([[ 0.00, -0.02,  0.13,  0.13, -0.11, -0.11, -0.07, -0.06, -0.07,  0.08],
        [-0.02, -0.03,  0.12,  0.15, -0.09, -0.10, -0.08, -0.06, -0.07,  0.04],
        [-0.03, -0.05,  0.11,  0.16, -0.10, -0.12, -0.10, -0.04, -0.06,  0.06],
        [-0.02, -0.05,  0.11,  0.15, -0.11, -0.12, -0.09, -0.06, -0.07,  0.06],
        [-0.03, -0.04,  0.13,  0.15, -0.08, -0.11, -0.09, -0.06, -0.07,  0.07],
        [ 0.00, -0.04,  0.11,  0.15, -0.08, -0.10, -0.07, -0.06, -0.07,  0.06],
        [-0.02, -0.04,  0.12,  0.14, -0.11, -0.12, -0.09, -0.08, -0.07,  0.07],
        [-0.00, -0.04,  0.12,  0.14, -0.09, -0.09, -0.07, -0.06, -0.06,  0.07],
        [-0.03, -0.03,  0.11,  0.16, -0.13, -0.11, -0.10, -0.04, -0.07,  0.05],
        [-0.00, -0.02,  0.11,  0.13, -0.13, -0.09, -0.07, -0.05, -0.06,  0.06]], grad_fn=<AddmmBackward>)


In [118]:
torch.argmax(pred, dim=1)

tensor([2, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [121]:
torch.argmax(pred, dim=1).eq(labels).sum()

tensor(1)

In [122]:
torch.argmax(pred, dim=1).eq(labels).sum().item()

1

In [14]:
def get_num_correct(pred, labels):
    return torch.argmax(pred, dim=1).eq(labels).sum().item()

In [124]:
get_num_correct(pred, labels)

1

The size of the output dimension along any axes after a CNN layer/operation:

O = ((n - f - 2p) / s) + 1

In [125]:
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fce390cd588>

In [126]:
preds = network(images)
loss = F.cross_entropy(preds, labels)
loss.item()

2.281947612762451

In [129]:
print(network.conv1.weight.grad)

None


In [130]:
loss.backward()

In [131]:
print(network.conv1.weight.grad.shape)

torch.Size([10, 1, 5, 5])


In [135]:
optimizer = optim.Adam(network.parameters(), lr = 0.1)

In [136]:
loss.item()

2.281947612762451

In [137]:
get_num_correct(pred, labels)

1

In [138]:
optimizer.step()

In [10]:
data_loader = torch.utils.data.DataLoader(training_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(data_loader))
images, labels = batch

pred = network(images)
loss = F.cross_entropy(pred, labels)
print(loss.item())

loss.backward()
optimizer.step()

pred = network(images)
loss = F.cross_entropy(pred, labels)
print(loss.item())

2.3102736473083496
2.293565273284912


In [16]:
data_loader = torch.utils.data.DataLoader(training_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for i in range(10):
    batch = next(iter(data_loader))
    images, labels = batch

    pred = network(images)
    loss = F.cross_entropy(pred, labels)
    print(loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

pred = network(images)
loss = F.cross_entropy(pred, labels)
print(loss.item())

1.3269104957580566
1.6831839084625244
1.1321316957473755
1.059425711631775
1.125962734222412
1.127094030380249
1.0762988328933716
0.9790597558021545
0.8695643544197083
0.7778597474098206
0.7108258605003357


In [19]:
new_network = Network()

data_loader = torch.utils.data.DataLoader(training_set, batch_size=100)
optimizer = optim.Adam(new_network.parameters(), lr=0.01)
NUM_EPOCHS = 10

for i in range(NUM_EPOCHS):
    total_loss = 0
    total_correct = 0
    loss_list = []
    for batch in data_loader:
        
        images, labels = batch

        predictions = new_network(images)
        loss = F.cross_entropy(predictions, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

#         weight_update.append(old_weights == new_weights)

        total_correct += get_num_correct(predictions, labels)
        total_loss += loss.item()
        loss_list.append(loss.item())

    print("epoch: ", i,' loss: ', total_loss, ' correct: ', total_correct)

epoch:  0  loss:  1382.1912021636963  correct:  5954
epoch:  1  loss:  1382.1912021636963  correct:  5954
epoch:  2  loss:  1382.1912021636963  correct:  5954
epoch:  3  loss:  1382.1912021636963  correct:  5954
epoch:  4  loss:  1382.1912021636963  correct:  5954
epoch:  5  loss:  1382.1912021636963  correct:  5954


KeyboardInterrupt: 

In [17]:
loss_list

[2.3059635162353516,
 2.3099279403686523,
 2.311288356781006,
 2.308073043823242,
 2.3052587509155273,
 2.299856185913086,
 2.30435848236084,
 2.3113608360290527,
 2.300299882888794,
 2.3156239986419678,
 2.3029723167419434,
 2.310032606124878,
 2.3170361518859863,
 2.3073227405548096,
 2.321134567260742,
 2.305598258972168,
 2.309094190597534,
 2.3059206008911133,
 2.309600830078125,
 2.316791296005249,
 2.2997803688049316,
 2.313253402709961,
 2.3110742568969727,
 2.314945936203003,
 2.3100972175598145,
 2.3127431869506836,
 2.2995803356170654,
 2.304556131362915,
 2.284764289855957,
 2.2998709678649902,
 2.311671018600464,
 2.309366226196289,
 2.305908203125,
 2.3097825050354004,
 2.2980103492736816,
 2.3105008602142334,
 2.289398193359375,
 2.313318967819214,
 2.3142566680908203,
 2.321885585784912,
 2.3086190223693848,
 2.323125123977661,
 2.3101603984832764,
 2.3035483360290527,
 2.3054797649383545,
 2.303162097930908,
 2.3187601566314697,
 2.3051633834838867,
 2.3067989349365234

In [198]:
weight_update

tensor([[[[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True]]],


        [[[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True]]],


        [[[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True]]],


        [[[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True]]],


        [[[True, True, True, True, True],
          [True, True, True, True, True],
          [True, True, True, True, True],
          [True, T

In [158]:
total_correct/60000.

0.1

In [155]:
del new_network