In [22]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as transforms

from collections import OrderedDict

import numpy as np
import timeit
import glob
import matplotlib.pyplot as plt
from PIL import Image

## Switching from ND Array > Pytorch Tensor -> ND Array

In [6]:
randomArray = np.random.randn(5, 3)
randomArray

array([[-1.15830809,  0.49762122,  0.09137921],
       [-2.3819232 ,  0.47606143, -1.99124683],
       [-1.69857664,  0.14952153, -0.05160555],
       [ 0.0166688 ,  0.89088917, -0.47966096],
       [ 0.16908941,  1.43305621, -0.83057521]])

In [7]:
randomTensor = torch.from_numpy(randomArray)
randomTensor

tensor([[-1.1583,  0.4976,  0.0914],
        [-2.3819,  0.4761, -1.9912],
        [-1.6986,  0.1495, -0.0516],
        [ 0.0167,  0.8909, -0.4797],
        [ 0.1691,  1.4331, -0.8306]], dtype=torch.float64)

In [8]:
randomTensor.numpy()

array([[-1.15830809,  0.49762122,  0.09137921],
       [-2.3819232 ,  0.47606143, -1.99124683],
       [-1.69857664,  0.14952153, -0.05160555],
       [ 0.0166688 ,  0.89088917, -0.47966096],
       [ 0.16908941,  1.43305621, -0.83057521]])

# Switching from using CPU - GPU


In [9]:
cpu = torch.device('cpu')
cuda = torch.device('cuda')

In [None]:
cuda_0 = torch.device('cuda:0')
cuda_2 = torch.device('cuda:2')

In [14]:
w1 = torch.rand(10000, 10000, device=cpu)
x1 = torch.rand(10000, 10000, device=cpu)

In [21]:
start_cpu = timeit.default_timer()

x1.matmul(w1)

end_cpu = timeit.default_timer()
print('Timelapse: ', end_cpu- start_cpu)

Timelapse:  9.464088493827148


In [22]:
start_gpu = timeit.default_timer()

x1 = x1.cuda()
w1 = w1.cuda()

x1.matmul(w1)

end_gpu = timeit.default_timer()
print('Timelapse: ', end_gpu - start_gpu)

Timelapse:  0.5371105185184888


In [30]:
print('Difference: ', np.round((end_cpu- start_cpu)/(end_gpu - start_gpu), 0),'x faster')

Difference:  18.0 x faster


# Calculate the gradient

In [3]:
# By default, requires_grad is set to False
x = torch.randn(5, 5)
y = torch.randn(5, 5)
z = torch.randn((5, 5), requires_grad=True)

result = x + y
print(result.requires_grad)

second_result = result + z
print(second_result.requires_grad)

False
True


In [None]:
input = Variable(torch.ones(2, 2), requires_grad=True)
print("Before: ", input.grad)

output = input.mean()
output.backward()

print("After: ", input.grad)

#  Object Oriented Programming Style

In [7]:
input_size = 784
hidden_size = [128, 64]
output_size = 10

In [14]:
class Network(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Network, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_size[0])
        self.fc2 = nn.Linear(hidden_size[0], hidden_size[1])
        self.fc3 = nn.Linear(hidden_size[1], output_size)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        
        return x

In [16]:
oop_model = Network(input_size=input_size, 
                    hidden_size=hidden_size, 
                    output_size=output_size)
oop_model

Network(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

By default, weights and bias are automatically filled up

In [17]:
print(oop_model.fc1.weight)
print(oop_model.fc1.bias)

Parameter containing:
tensor([[ 0.0334,  0.0349,  0.0099,  ...,  0.0143, -0.0273, -0.0090],
        [-0.0218, -0.0161, -0.0073,  ..., -0.0026,  0.0082,  0.0172],
        [ 0.0113, -0.0095,  0.0001,  ..., -0.0025, -0.0066,  0.0297],
        ...,
        [ 0.0303,  0.0197,  0.0251,  ...,  0.0280, -0.0012,  0.0355],
        [ 0.0238, -0.0035,  0.0249,  ..., -0.0151,  0.0099, -0.0339],
        [-0.0237, -0.0225,  0.0079,  ...,  0.0066,  0.0294,  0.0161]],
       requires_grad=True)
Parameter containing:
tensor([ 0.0317,  0.0100, -0.0192, -0.0028, -0.0024,  0.0003,  0.0218,  0.0340,
         0.0087, -0.0174,  0.0250, -0.0033, -0.0197,  0.0130, -0.0136,  0.0229,
        -0.0150, -0.0104, -0.0188,  0.0048, -0.0349, -0.0313,  0.0147,  0.0038,
         0.0222,  0.0204,  0.0070, -0.0352,  0.0320, -0.0268,  0.0100, -0.0078,
         0.0132,  0.0146, -0.0053,  0.0245,  0.0085,  0.0227, -0.0203,  0.0186,
        -0.0038, -0.0208, -0.0135,  0.0206,  0.0192, -0.0245,  0.0148, -0.0251,
        -0.0025

We can also modify the value of these tensors

In [18]:
# Set bias to be zero
oop_model.fc1.bias.data.fill_(0)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])

In [19]:
# Set weight into normal distribution with standard deviation of .01
oop_model.fc1.weight.data.normal_(std=.01)

tensor([[ 0.0069, -0.0008, -0.0109,  ..., -0.0039,  0.0123, -0.0008],
        [-0.0027,  0.0198, -0.0050,  ...,  0.0021,  0.0011, -0.0044],
        [ 0.0034,  0.0233,  0.0141,  ...,  0.0094, -0.0049,  0.0153],
        ...,
        [-0.0195,  0.0073,  0.0061,  ...,  0.0207, -0.0077, -0.0110],
        [-0.0055,  0.0001, -0.0076,  ...,  0.0099,  0.0087,  0.0060],
        [-0.0041, -0.0229,  0.0102,  ..., -0.0013,  0.0103,  0.0014]])

# Sequential Style

In [21]:
sequential_model = nn.Sequential(nn.Linear(in_features=input_size, out_features=hidden_size[0]),
                                 nn.ReLU(),
                                 nn.Linear(in_features=hidden_size[0], out_features=hidden_size[1]),
                                 nn.ReLU(),
                                 nn.Linear(in_features=hidden_size[1], out_features=output_size),
                                 nn.Softmax(dim=1))

sequential_model

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): Softmax()
)

# Sequential Style - Alternative

In [28]:
alt_sequential_model = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(input_size, hidden_size[0])),
    ('relu1', nn.ReLU()),
    ('fc2', nn.Linear(hidden_size[0], hidden_size[1])),
    ('relu2', nn.ReLU()),
    ('output', nn.Linear(hidden_size[1], output_size)),
    ('softmax', nn.Softmax(dim=1))
]))

alt_sequential_model

Sequential(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (output): Linear(in_features=64, out_features=10, bias=True)
  (softmax): Softmax()
)

We can pick up any layer easily like you used to code in Python

In [29]:
print(sequential_model[0])
print(sequential_model[1])

print(oop_model.fc1)
print(oop_model.fc2)

print(alt_sequential_model.fc2)
print(alt_sequential_model.relu1)

Linear(in_features=784, out_features=128, bias=True)
ReLU()
Linear(in_features=784, out_features=128, bias=True)
Linear(in_features=128, out_features=64, bias=True)
Linear(in_features=128, out_features=64, bias=True)
ReLU()


# Autograd

# Finetuning by Using Pre-trained Model

# Convert from Pytorch to another framework by using ONNX