In [115]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import pandas as pd

import torch.utils.data as data
from torch.utils.data import DataLoader as DL
from torch.utils.data import TensorDataset as TD


$$ DiscoveringActivationFunction

In [116]:
# f(x) = max(0, x)

In [117]:
relu = nn.ReLU()
leaky_ReLU = nn.LeakyReLU(negative_slope=0.01) 
# default negative_slope=0.01 is mean slope of x<0

In [137]:
n_features = 2
n_classes = 8

model_leaky = nn.Sequential(
    nn.Linear(n_features, 8),
    nn.Linear(8, 4),
    nn.Linear(4, n_classes)
)

total  = 0
for p in model_leaky.parameters():
    total += p.numel() # numel() is number of elements in tensor

print(total)

100


$$ Learning Rate And Momentum

### Updating the weights of the model with SGD


In [119]:
sgd = optim.SGD(model_leaky.parameters(), lr=0.01, momentum=0.95)
#momentum is a hyperparameter that multiplies the gradient 
# of the previous step before adding the gradient of the current step

#bad values can lead to divergence or slow convergence


| **Learning rate**                          | **Momentum**                                              |
|--------------------------------------------|-----------------------------------------------------------|
| Controls the step size                     | Controls the inertia                                       |
| Too small leads to long training times     | Null momentum can lead to the optimizer being stuck in a local minimum |
| Too high leads to poor performances        | Non-null momentum can help find the function minimum      |
| Typical values between 10^(-2) and 10^(-4) | Typical values between 0.85 and 0.99                        |


### Layer initilization (1)


In [120]:
layer = nn.Linear(64, 128)
print(layer.weight.min(), layer.weight.max())

tensor(-0.1250, grad_fn=<MinBackward1>) tensor(0.1250, grad_fn=<MaxBackward1>)


### layer initilization (2)

In [121]:
layer = nn.Linear(64, 128)
nn.init.uniform_(layer.weight)

Parameter containing:
tensor([[0.8476, 0.1572, 0.0792,  ..., 0.2461, 0.1292, 0.9178],
        [0.4511, 0.8923, 0.4014,  ..., 0.0015, 0.9416, 0.0273],
        [0.5610, 0.9758, 0.2160,  ..., 0.5174, 0.1567, 0.8896],
        ...,
        [0.5182, 0.3257, 0.5692,  ..., 0.5583, 0.7704, 0.4541],
        [0.0565, 0.9794, 0.9163,  ..., 0.2847, 0.2188, 0.1318],
        [0.2694, 0.6144, 0.1585,  ..., 0.1855, 0.6918, 0.1816]],
       requires_grad=True)

In [122]:
print(layer.weight.min(), layer.weight.max())

tensor(2.6226e-05, grad_fn=<MinBackward1>) tensor(1.0000, grad_fn=<MaxBackward1>)


In [123]:
#transfer learning and fine tuning (1)
# 
torch.save(layer, 'model_initilization.pth')

In [124]:
new_layer = torch.load('model_initilization.pth')

  new_layer = torch.load('model_initilization.pth')


In [125]:
print(new_layer)

Linear(in_features=64, out_features=128, bias=True)


### Transfer learning and fine-tuning

In [126]:


model_fine_tuning = nn.Sequential(
    nn.Linear(64, 128),
    nn.Linear(128, 256))

for name, param in model_fine_tuning.named_parameters():
    if name == "0.weight":
        param.requires_grad = False

#transfer learning and fine tuning
# Fine-turning is a technique 
# that consists of unfreezing the entire (or part of) pre-trained model 
# and re-training it with a different dataset.



In [127]:
animals_target = pd.array(
    ['animal_name', 'hair', 'feathers', 'eggs', 
     'milk', 'predator', 'fins', 'legs', 'tail','type'])

animals_data = pd.DataFrame(
    [["skimmer",     0,1,1,0,1,0,2,1,2],
    ["gull",        0,1,1,0,1,0,2,1,2],
    ["seahourse",   0,0,1,0,0,1,0,1,4],
    ["tuatara",     0,0,1,0,1,0,4,1,3],
    ["squirrel",    1,0,0,1,0,0,2,1,1]]
)

#type key : 
# 1 = mammal, 2 = bird, 3 = reptile, 4 = fish, 
# 5 = amphibian, 6 = bug, 7 = invertebrate



In [128]:
features = animals_data.iloc[:, 1:-1]
#this code will select all rows and all columns except the last one

X = features.to_numpy()
print(X)

[[0 1 1 0 1 0 2 1]
 [0 1 1 0 1 0 2 1]
 [0 0 1 0 0 1 0 1]
 [0 0 1 0 1 0 4 1]
 [1 0 0 1 0 0 2 1]]


In [None]:
target = animals_data.iloc[:, -1]
y = target.to_numpy()
print(y)

[2 2 4 3 1]


### Recalling TensorDataset


In [150]:
#recalling tensor dataset is a class that wraps a tensor 
# and allows us to access rows and columns
# that we can use to train our model
#


dataset = TD(torch.tensor(X.astype(np.float32)), torch.tensor(y).float().unsqueeze(1))

In [151]:
sample = dataset[0]
input_sample,  label_sample = sample

print(input_sample)
print(label_sample)


tensor([0., 1., 1., 0., 1., 0., 2., 1.])
tensor([2.])


### Recalling DataLoader

In [152]:
#recall that DataLoader is a class that wraps a dataset and provides
# an iterable over the dataset

#batch size is the number of samples that will be passed through the model

batch_size = 2
shuffle = True

#create a DataLoader 
dl = DL(dataset, batch_size=batch_size, shuffle=shuffle)
print(dl)


<torch.utils.data.dataloader.DataLoader object at 0x31b49be00>


In [133]:
for batch_inpus, batch_labels in dl:
    print(batch_inpus)
    print(batch_labels)
    print("\n")

tensor([[1., 0., 0., 1., 0., 0., 2., 1.],
        [0., 0., 1., 0., 1., 0., 4., 1.]])
tensor([1., 3.])


tensor([[0., 1., 1., 0., 1., 0., 2., 1.],
        [0., 1., 1., 0., 1., 0., 2., 1.]])
tensor([2., 2.])


tensor([[0., 0., 1., 0., 0., 1., 0., 1.]])
tensor([4.])




$$ Eluating Model Performance

In [148]:
print(dl)

<torch.utils.data.dataloader.DataLoader object at 0x31b1e3ec0>


### Conculating training loss

In [159]:
#raw dateset is usually in three subsets: training, validation, and test
#training dataset (80-90% of dataset) is  used to adjust the model's paramenters
#validation dataset (10-20% of dataset) is used to fine-tune the model's hyperparameters
#testing dataset (5-10% of dataset) is only used once to evaluate the model's performance, final metrics

# Define the model, criterion, and optimizer

criterion = nn.MSELoss()
model = nn.Sequential(
    nn.Linear(8, 8),
    nn.ReLU(),
    nn.Linear(8, 4),
    nn.ReLU(),
    nn.Linear(4, 1)
)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.95)


epochs = 5
for epoch in range(epochs):
    training_loss = 0.0
    for i, data in enumerate(dl, 0):
        inputs, labels = data

        # Zero the gradient
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        
        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

        # Accumulate loss
        training_loss += loss.item()
    
    epoch_loss = training_loss / len(dl)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/5, Loss: 5.7033
Epoch 2/5, Loss: 2.1391
Epoch 3/5, Loss: 4.2162
Epoch 4/5, Loss: 1.4497
Epoch 5/5, Loss: 0.9146


### Conculating validation loss

In [167]:
validation_loss = .0

model.eval() # Put model is evaluation mode

validationloader = DL(dataset, batch_size=2, shuffle=False)

for i in range(epochs):
    # training_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(validationloader, 0):
            #run the forward pass
            inputs, labels = data

            outputs = model(inputs)

            #calculate the loss
            loss = criterion(outputs, labels)

            # loss.backward()
            # optimizer.step()

            validation_loss += loss.item()

    epoch_loss = validation_loss / len(validationloader)
    model.train()

    print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {epoch_loss:.4f}")


Epoch 5/5, Validation Loss: 0.7314
Epoch 5/5, Validation Loss: 1.4628
Epoch 5/5, Validation Loss: 2.1942
Epoch 5/5, Validation Loss: 2.9256
Epoch 5/5, Validation Loss: 3.6570


 ta có thể so sánh hai phương thức tìm hàm loss trên là so sánh giữa training và validation
 
loss để xem xem model có bị overfitting hay không.

  Nếu training loss nhỏ hơn validation loss

 thì model đang bị overfitting, ngược lại thì model đang bị underfitting.

ở đây ta thấy rằng __validation loss__ lớn hơn __training loss__ nên model đang bị 
 _underfitting_

![Alt text](/Users/hongviet/Documents/GitHub/Data-Analysis/thuchanh/fristweek/hinh1.png)
