# PyTorch Introduction Tutorial
### By: Lela Bones

In this Notebook, I will be learning the ropes of PyTorch and will include all of my notes and code from the tutorials here. I hope to be able to use this notebook as a reference guide when I start creating my own code with PyTorch

### Tensors

Tensors are used on a GPU to accelerate computing and are similay to Numpy arrays

In [7]:
import torch

In [8]:
#constructs a 5X3 matrix, unanitialzized
x = torch.empty(5, 3)
print(x)

tensor([[        nan,  4.5556e-41,         nan],
        [ 4.5556e-41,         nan,  0.0000e+00],
        [ 7.6194e+31,  1.5564e+28,  1.8484e+31],
        [ 1.8370e+25,  1.4603e-19,  2.7517e+12],
        [ 7.5338e+28,  3.0313e+32,  6.3828e+28]])


In [9]:
#constructs a randomly initialized matrix
x = torch.rand(5, 3)
print(x)

tensor([[ 0.3911,  0.3460,  0.0103],
        [ 0.6538,  0.6150,  0.7664],
        [ 0.9886,  0.1854,  0.6489],
        [ 0.5517,  0.4877,  0.1015],
        [ 0.9330,  0.3709,  0.6915]])


In [10]:
#constructs a matrix initialized at 0 with a dtype of long
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]])


In [11]:
#Construct a tensor directly from data:
x = torch.tensor([5.5, 3])
print(x)

tensor([ 5.5000,  3.0000])


In [12]:
#create a tensor on existing tensors
x = x.new_ones(5, 3, dtype=torch.double)      # new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x)                                      # result has the same size

tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]], dtype=torch.float64)
tensor([[-0.0816, -1.7337, -0.6427],
        [-1.5981,  0.4825,  1.5763],
        [ 0.3542,  0.3225, -1.0253],
        [ 0.8435,  0.8574,  0.9914],
        [ 0.5484,  1.1965, -0.2321]])


In [13]:
#get size is a function that supports all tuple operations
print(x.size())

torch.Size([5, 3])


### Operations
There are multiple syntaxes for opterations, here a few for addition

In [14]:
y = torch.rand(5, 3)
print(x + y)

tensor([[ 0.1131, -0.9918, -0.6185],
        [-0.9703,  0.7504,  1.8301],
        [ 0.8899,  0.8838, -0.5619],
        [ 1.5629,  1.8555,  1.2082],
        [ 1.1401,  1.3445,  0.6089]])


In [15]:
print(torch.add(x, y))


tensor([[ 0.1131, -0.9918, -0.6185],
        [-0.9703,  0.7504,  1.8301],
        [ 0.8899,  0.8838, -0.5619],
        [ 1.5629,  1.8555,  1.2082],
        [ 1.1401,  1.3445,  0.6089]])


In [16]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)


tensor([[ 0.1131, -0.9918, -0.6185],
        [-0.9703,  0.7504,  1.8301],
        [ 0.8899,  0.8838, -0.5619],
        [ 1.5629,  1.8555,  1.2082],
        [ 1.1401,  1.3445,  0.6089]])


In [17]:
# adds x to y
y.add_(x)
print(y)

tensor([[ 0.1131, -0.9918, -0.6185],
        [-0.9703,  0.7504,  1.8301],
        [ 0.8899,  0.8838, -0.5619],
        [ 1.5629,  1.8555,  1.2082],
        [ 1.1401,  1.3445,  0.6089]])


Indexing works in a standard NumPy-like way

In [18]:
print(x[:, 1])

tensor([-1.7337,  0.4825,  0.3225,  0.8574,  1.1965])


In [19]:
#Resizing: If you want to resize/reshape tensor, you can use torch.view
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [20]:
#If you have a one element tensor, use .item() to get the value as a Python number
x = torch.randn(1)
print(x)
print(x.item())

tensor([-1.1113])
-1.1113348007202148


Code to concatenate two tensors

In [74]:
# By default, it concatenates along the first axis (concatenates rows)
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 = torch.cat([x_1, y_1])
print(z_1)

# Concatenate columns:
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
# second arg specifies which axis to concat along
z_2 = torch.cat([x_2, y_2], 1)
print(z_2)

# If your tensors are not compatible, torch will complain.  Uncomment to see the error
# torch.cat([x_1, x_2])

tensor([[-2.1841,  0.4688,  0.2894, -0.0896,  0.7296],
        [ 0.1081,  1.1202, -1.8196,  0.0438, -1.0204],
        [ 0.2311, -1.9862,  0.4892,  1.4754,  0.7444],
        [ 0.9261, -0.1043, -0.4938, -0.3879, -2.6101],
        [-0.0823, -0.9036, -0.5419,  0.0493, -0.0833]])
tensor([[-1.7384, -0.9035,  0.3781, -0.1223,  0.5735,  1.0504, -0.2626,
          0.2761],
        [-0.1651, -0.3159, -0.2802, -0.1472, -0.9112, -0.3547,  1.5807,
          2.5320]])


```.view()``` reshapes the tensors

In [76]:
x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12))  # Reshape to 2 rows, 12 columns
# Same as above.  If one of the dimensions is -1, its size can be inferred
print(x.view(2, -1))


tensor([[[ 0.9884,  0.4878, -0.5547, -1.8035],
         [-0.9788, -0.5993,  1.0200, -1.1102],
         [ 0.8180,  0.6798,  0.4932, -0.6763]],

        [[-0.3489, -0.7651, -1.3794,  0.6154],
         [-1.6373, -1.3311,  1.2952,  2.5071],
         [ 0.3142, -0.5030,  0.3076, -0.0094]]])
tensor([[ 0.9884,  0.4878, -0.5547, -1.8035, -0.9788, -0.5993,  1.0200,
         -1.1102,  0.8180,  0.6798,  0.4932, -0.6763],
        [-0.3489, -0.7651, -1.3794,  0.6154, -1.6373, -1.3311,  1.2952,
          2.5071,  0.3142, -0.5030,  0.3076, -0.0094]])
tensor([[ 0.9884,  0.4878, -0.5547, -1.8035, -0.9788, -0.5993,  1.0200,
         -1.1102,  0.8180,  0.6798,  0.4932, -0.6763],
        [-0.3489, -0.7651, -1.3794,  0.6154, -1.6373, -1.3311,  1.2952,
          2.5071,  0.3142, -0.5030,  0.3076, -0.0094]])


Documentation on torch operations: https://pytorch.org/docs/stable/torch.html

### Converting Torch Tensor and NumPy Arrays to one another

In [21]:
# torch tensor to numpy array
a = torch.ones(5)
b = a.numpy()
a.add_(1)
print(a)
print(b)

tensor([ 2.,  2.,  2.,  2.,  2.])
[2. 2. 2. 2. 2.]


In [22]:
# numpy to torch tensor
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([ 2.,  2.,  2.,  2.,  2.], dtype=torch.float64)


### Autograd
```autograd``` is a package that is central to all PyTorch neural netowrks. It is a package that provides automaic differentiation for all operations on Tensors. 
 * ```torch.Tensor``` is the central class of this package
    * ```.requires_grad``` as True allows you to track all operations
    * ```.backward()``` automatically computes all the gradients
    * ```.grad accumulates``` the gradient for this tensor
    * ```.detach()``` stops a tensor from tracking history and detatches it from the computation history
    * ```torch.no_grad()``` allows you to evaluate models without the history
 * Tensor and Function creates an acyclic graph of the complete computation history
     * ```.grad_fn``` refernces a Function of a Tensor

In [43]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[ 1.,  1.],
        [ 1.,  1.]])


In [44]:
y = x + 2
print(y)

tensor([[ 3.,  3.],
        [ 3.,  3.]])


In [45]:
print(y.grad_fn)

<AddBackward0 object at 0x7efea11d3ef0>


In [46]:
z = y * y * 3
out = z.mean()
print(z, out)

tensor([[ 27.,  27.],
        [ 27.,  27.]]) tensor(27.)


In [47]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7efe9da9a710>


In [48]:
out.backward()

In [49]:
print(x.grad)

tensor([[ 4.5000,  4.5000],
        [ 4.5000,  4.5000]])


In [57]:
x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)

tensor([-903.7942, -522.8957, -233.8321])


In [58]:
gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)

print(x.grad)

tensor([  51.2000,  512.0000,    0.0512])


In [59]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


Documentation of autograd and Function is at http://pytorch.org/docs/autograd

### Neural Networks
```torch.nn``` is the package that creates NNs
   * ```nn``` defines models and differentiates them using autograd
   * ```nn.Module``` contains layers and forward(input) which returns output
   * ```net.parameters()``` returns the learnable parameters

In [62]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [63]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

10
torch.Size([6, 1, 5, 5])


In [64]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.1566, -0.0689,  0.1590,  0.0072,  0.0044, -0.0321,  0.0526,
          0.0167,  0.0097,  0.0680]])


In [65]:
net.zero_grad()
out.backward(torch.randn(1, 10))

**NOTE:** ```torch.nn``` ony supports mini patches, not single samples. If you have a single sample, add a fake batch dimension using ```input.unsqueeze(0)```

#### Loss Function
In general, loss functions computes a value that estimates how far away the output is from the target. The ```nn.MSELOSS``` evaluates the mean-squared error. (See https://pytorch.org/docs/stable/nn.html for more types besides this small example) 

In [71]:
output = net(input)
target = torch.arange(1, 11)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(38.2444)


In [72]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x7efe9daa6630>
<AddmmBackward object at 0x7efe9daa6208>
<ExpandBackward object at 0x7efe9daa6630>


#### Backpropogation
```loss.backward()``` differentiates with respect to the loss and that is all you ahve to do to backpropogate the error, but first you want to clear your existing gradients or else they will accumulate

In [73]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([ 0.,  0.,  0.,  0.,  0.,  0.])
conv1.bias.grad after backward
tensor(1.00000e-02 *
       [ 1.2347, -4.9902,  2.3266,  4.4054,  4.7160,  7.9233])
