# PyTorch introduction
* In the python language with the machine learning application


In [None]:
import torch
import torch.nn as nn

In [None]:
# Numbers
t1 = torch.tensor(4.)
t1.shape
t1.dtype

torch.float32

In [None]:
# Vector
t2 = torch.tensor([1.0 , 2 , 3, 4])
t2.shape

torch.Size([4])

In [None]:
# Matrix
t3 = torch.tensor([[5. , 6] , [7 , 8] , [9 , 10]])
t3

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])

In [None]:
# 3-dimensional array
t4 = torch.tensor([
    [[11 , 12 , 13],
     [13 , 14 , 15]],
    [[15 , 16 , 17] ,
     [17 , 18 , 19.]]
])

t4

tensor([[[11., 12., 13.],
         [13., 14., 15.]],

        [[15., 16., 17.],
         [17., 18., 19.]]])

### Tensor operation and gradients

In [None]:
x  = torch.tensor(3.)
w = torch.tensor(4. , requires_grad = True)
b = torch.tensor(5. , requires_grad = True) # We need the requires_grad =  True to set the value for the backpropagation true

In [None]:
y = w * x  + b
y

tensor(17., grad_fn=<AddBackward0>)

In [None]:
# Compute the derivatives
y.backward()

In [None]:
# Display gradients
print('dy/dx:' , x.grad)
print('dy/dw' , w.grad)
print('dy/db' , b.grad)

dy/dx: None
dy/dw tensor(3.)
dy/db tensor(1.)


In [None]:
import numpy as np

x = np.array([[1 , 2] , [3. , 4]] )
x.shape

(2, 2)

In [None]:
# Conver numpy to the tensor torch
y = torch.from_numpy(x)
y = torch.tensor(x)
y

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

In [None]:
x.dtype , y.dtype

(dtype('float64'), torch.float64)

In [None]:
# Convert a torch tensor to anumpy array
z = y.numpy()
z

array([[1., 2.],
       [3., 4.]])

In [None]:
# !pip install jovian --upgrade
# import jovian

In [None]:
# jovian.commit()

In [None]:
# Training the data using inputs and the outputs
# Input (temp , rainfall , humidity)
inputs = np.array([[73 , 67 , 43],
                   [91 , 88  , 64],
                   [87 , 134 , 58],
                   [102 , 43 , 37],
                   [69 , 96 , 70]
                   ] , dtype = 'float32')





In [None]:
targets = np.array([[56 , 70],
                   [81 , 101],
                   [119 , 133],
                   [22 , 37],
                   [103 , 119]
                   ] , dtype = 'float32')

In [None]:
# Converts all the inputs and the targets to tensor
inpute = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
inputs.shape , targets.shape

((5, 3), torch.Size([5, 2]))

In [None]:
# weights and nbiases
w = torch.randn(2 , 3 , requires_grad = True)
b = torch.randn(2 , requires_grad = True)
w , b

(tensor([[-0.4313,  0.7108, -1.2652],
         [-0.4533,  2.3586,  0.1752]], requires_grad=True),
 tensor([ 0.2871, -0.8974], requires_grad=True))

In [None]:
def model(x):
  return (((x @ w.t())) + b)

In [None]:
def model(x):
  return ((torch.matmul(x , w.t())) + b)

In [None]:
pred = model(inpute)
pred

tensor([[-37.9739, 131.5706],
        [-57.3784, 176.6205],
        [-15.3642, 285.8786],
        [-59.9500,  60.7672],
        [-49.7945, 206.5130]], grad_fn=<AddBackward0>)

In [None]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [None]:
# implement the mean  squared error(MSE)
def mse(targets , pred):
  diff = targets - pred
  return torch.sum(diff * diff) / diff.numel() # numel() return the numbers of the data points like len()


In [None]:
loss = mse(targets , pred)
loss

tensor(11720.0059, grad_fn=<DivBackward0>)

In [None]:
# Compute the gradient
# loss.backward()


In [None]:
# Gradients for the weights
print(w)
print(w.grad) # with gradient is the derivative of the loss with respect the weights w

tensor([[-0.4313,  0.7108, -1.2652],
        [-0.4533,  2.3586,  0.1752]], requires_grad=True)
None


In [None]:
print(b)
print(b.grad)


tensor([ 0.2871, -0.8974], requires_grad=True)
None


In [None]:
# w.grad.zero_()
# b.grad.zero_()
# print(w.grad)
# print(b.grad) # we need to use zero because pytorch use accumlate function about all the gradients that's why we need to set the value to 0 if we starting working from begineer


In [None]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-10008.7891, -10934.0967,  -6883.5977],
        [  6627.8413,   8137.7612,   4671.9004]])
tensor([-120.2922,   80.2700])


In [None]:
# Adjust weights & reset gradients

with torch.no_grad():

  w -= w.grad * 1e-5
  b -= b.grad * 1e-5
  w.grad.zero_()
  b.grad.zero_()



In [None]:
# train for the 100 epochs

for i in range(10000):
  preds = model(inpute)
  loss  = mse(preds , targets)
  loss.backward()
  with torch.no_grad():
    w -= w.grad * 1e-5 # learning rate
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [None]:
# Calculate the loss
preds = model(inpute)
loss = mse(preds , targets)
print(loss)

tensor(0.5057, grad_fn=<DivBackward0>)


In [None]:
print(preds)

tensor([[ 57.2151,  70.1597],
        [ 82.1748, 100.7634],
        [118.6866, 132.9838],
        [ 21.0757,  37.0344],
        [101.9267, 119.1128]], grad_fn=<AddBackward0>)


In [None]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [None]:
inpute

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

In [None]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [None]:
from torch.utils.data import TensorDataset
# Define the dataset
train_ds = TensorDataset(inpute , targets)
train_ds[1 :  , 1: ]

(tensor([[ 88.,  64.],
         [134.,  58.],
         [ 43.,  37.],
         [ 96.,  70.]]),
 tensor([[101.],
         [133.],
         [ 37.],
         [119.]]))

In [None]:
q = 0
for i , n in train_ds:
  q+=1
  print(f"batch:{q}")
  print(i)
  print(n)

batch:1
tensor([73., 67., 43.])
tensor([56., 70.])
batch:2
tensor([91., 88., 64.])
tensor([ 81., 101.])
batch:3
tensor([ 87., 134.,  58.])
tensor([119., 133.])
batch:4
tensor([102.,  43.,  37.])
tensor([22., 37.])
batch:5
tensor([69., 96., 70.])
tensor([103., 119.])


### nn.linear

In [None]:
# Define the model  with the pytorch

model  = nn.Linear(3 , 2) # 3 inputs into the data & 2 output from the data
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.4017, -0.2155,  0.0763],
        [ 0.1457, -0.0436, -0.2801]], requires_grad=True)
Parameter containing:
tensor([0.3057, 0.5211], requires_grad=True)


In [None]:
# Parameters
list(model.parameters())

[Parameter containing:
 tensor([[-0.4017, -0.2155,  0.0763],
         [ 0.1457, -0.0436, -0.2801]], requires_grad=True),
 Parameter containing:
 tensor([0.3057, 0.5211], requires_grad=True)]

In [None]:
inputs = torch.from_numpy(inputs)

### The model values predictions  compataible with tensorflow

In [None]:
# Generate predictions
preds = model(inputs) # The model coming from the nn.linear
preds

tensor([[-40.1765,  -3.8087],
        [-50.3307,  -7.9843],
        [-59.0943,  -8.8918],
        [-47.1115,   3.1442],
        [-42.7597, -13.2196]], grad_fn=<AddmmBackward0>)

#### Loss Function

In [None]:
# Imdport nn.functional
import torch.nn.functional as F

In [None]:
# Define the loss function
loss_fn = F.mse_loss

In [None]:
# Let's compute the loss the current predictions of our model
loss = loss_fn(model(inputs) , targets)
print(loss)

tensor(14032.4346, grad_fn=<MseLossBackward0>)


### To improve the model use the optimizaers
* use SGD called the stochastic gradient descent called stochastic  because samples are selected in batches (often with random shuffling) insted nof the single torch.group_norm

In [None]:
# Define the opt
opt =  torch.optim.SGD(model.parameters() , lr = 1e-5)

### Train the model
* Generate predictions
* Calculate the loss
* Compute gradient the weights and the biases
* Adjust the weights by subtracting a small quantity proportional to gradient
* Reset the gradients to zero

The only change is that we will work batches of data , insted of processing of

In [None]:
# Utility function to train the model
def fit(num_epochs , model , loss_fn , opt ):

  # Repeate for the given epochs
  for epoch in range(num_epochs):

    # Train with batches of data
    for xb , yb in train_ds:

      # 1. Generate predictions
      pred = model(xb)

      loss = loss_fn(pred , yb)

      # 3 . Compute gradients
      loss.backward()

      # 4. Update parameters using gradients
      opt.step()

      # 5. Reset the gradients to zero
      opt.zero_grad()

    # Print the progress
    if (epoch+1) % 10 == 0:
      print("Epoch [{} / {}] , Loss: {:.4f}".format(epoch + 1 , num_epochs , loss ))

* If we have nan values the reason is the gradient blow up becaue of the large gradient descent

In [None]:
fit(50 ,  model , loss_fn , opt)

Epoch [10 / 50] , Loss: 0.6672
Epoch [20 / 50] , Loss: 0.6656
Epoch [30 / 50] , Loss: 0.6642
Epoch [40 / 50] , Loss: 0.6629
Epoch [50 / 50] , Loss: 0.6618
