<a href="https://colab.research.google.com/github/wallisonferreira/machine-learning-pavic/blob/main/pavic_11_pytorch_CV_script_mode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trabalhando com datasets customizados

## 0 Importar bibliotecas

In [2]:
import os
os.makedirs("going_modular", exist_ok=True)

In [None]:
import torch
from torch import nn

torch.__version__

'2.1.0+cu118'

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

## 1 Aquisição de dados

In [3]:
import requests
import zipfile
from pathlib import Path

data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

if image_path.is_dir():
    print(f"{image_path} directory exists.")
else:
    image_path.mkdir(parents=True, exist_ok=True)

    with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
        request = requests.get("https://github.com/mafaldasalomao/pavic_treinamento_ml/raw/main/data/pizza_steak_sushi.zip")
        f.write(request.content)

    with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
        zip_ref.extractall(image_path)


In [4]:
train_dir = image_path / "train" # data/pizza_steak_sushi/train
test_dir = image_path / "test"
train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

## 2 Load to Tensors

In [21]:
%%writefile going_modular/data_setup.py
import os
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int=NUM_WORKERS
  ):
  #ImageFolder
    train_data = datasets.ImageFolder(root=train_dir, transform=transform)
    test_data = datasets.ImageFolder(root=test_dir, transform=transform)
    # acessando nome das classes
    #get class
    class_names = train_data.classes

    # carregar os dados no dataloader
    train_dataloader = DataLoader(dataset=train_data,
                                  batch_size=batch_size,
                                  num_workers=num_workers,
                                  shuffle=True,
                                  pin_memory=True)

    test_dataloader = DataLoader(dataset=test_data,
                                 batch_size=batch_size,
                                 num_workers=num_workers,
                                 shuffle=False,
                                 pin_memory=True)

    return train_dataloader, test_dataloader, class_names

Overwriting going_modular/data_setup.py


## 4 Criar modelo - TinyVGG

In [15]:
%%writefile going_modular/model_builder.py

import torch
from torch import nn

class TinyVGG(nn.Module):
  def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
    super().__init__()
    self.conv_block_1 = nn.Sequential( # 3, 64,64
        nn.Conv2d(in_channels=input_shape,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2) # 10, 32, 32
    )
    self.conv_block_2 = nn.Sequential(# 10, 32, 32
        nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2) # 10, 16, 16
    )
    self.classiffier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*16*16,
                  out_features=output_shape) #2560->40960->2.621.440
    )
  def forward(self, x):  # 3, 64,64
    x = self.conv_block_1(x)  # 10, 32,32
    x = self.conv_block_2(x)  # 10, 16,16
    x = self.classiffier(x)   # 3
    return x

Overwriting going_modular/model_builder.py


In [None]:
try:
  import torchinfo
except:
  !pip install torchinfo
  import torchinfo

from torchinfo import summary

summary(model_0, input_size=[1, 3, 64, 64])


## 5 Treinar o modelo Tiny

In [33]:
%%writefile going_modular/engine.py
from tqdm.auto import tqdm
import torch
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device):
  model.train()

  train_loss, train_acc = 0, 0

  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    #1 forward pass
    y_pred = model(X)
    #2 calcular a perda acumulativa
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()
    #3 zero grad
    optimizer.zero_grad()
    #4 loss backward
    loss.backward()
    #5 optimizer step
    optimizer.step()
    y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
    train_acc += (y_pred_class == y).sum().item() / len(y_pred)

  # calcular a média do erro e acurácia
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device):
  model.eval()

  test_loss, test_acc = 0, 0
  with torch.inference_mode():
    for batch, (X, y) in enumerate(dataloader):
      X, y = X.to(device), y.to(device)
      #1forward pass
      test_pred_logits = model(X)
      #2 calcular o erro cumulativo
      loss = loss_fn(test_pred_logits, y)
      test_loss += loss.item()

      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_acc += ((test_pred_labels == y)).sum().item()/len(test_pred_labels)

  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          device: torch.device,
          loss_fn: torch.nn.Module = torch.nn.CrossEntropyLoss(),
          epochs: int = 5):
  # criar um dicionario vazio para salvar os dados de treinamento
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []
  }
  for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_step(model=model,
                                       dataloader=train_dataloader,
                                       loss_fn=loss_fn,
                                       optimizer=optimizer,
                                       device=device)
    test_loss, test_acc = test_step(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)
    #visualizar o comportamento da rede
    print(
        f"Epoch: {epoch + 1} | "
        f"Train_loss: {train_loss:.4f} | "
        f"Train_acc: {train_acc:.4f} | "
        f"Test_loss: {test_loss:.4f} | "
        f"Test_acc: {test_acc:.4f}"
    )
    #atualizar nosso dicionario com as metricas
    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)

  return results

Overwriting going_modular/engine.py


In [19]:
%%writefile going_modular/utils.py

from pathlib import Path
import torch

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):

    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True,
                          exist_ok=True)

    #save model
    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "A extensão do modelo deve ser .pt ou .pth"
    model_save_path = target_dir_path / model_name

    print(f"[INFO] Salvando o modelo em: {model_save_path}")
    torch.save(obj=model.state_dict(),
               f=model_save_path)

Writing going_modular/utils.py


In [None]:
NUM_EPOCHS = 50
model_0 = TinyVGG(input_shape=3,
                hidden_units=10,
                output_shape=len(train_data.classes)).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_0.parameters(), lr=0.001)

from timeit import default_timer as timer

start_time = timer()
model_0_results = train(model=model_0,
                        train_dataloader=train_dataloader_custom,
                        test_dataloader=test_dataloader_custom,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)
end_time = timer()
print(f"Tempo de treinamento: {end_time-start_time:.3f} segundos.")

In [35]:
%%writefile going_modular/train.py
import os
import torch
from torchvision import transforms
import data_setup, engine, model_builder, utils

#Hiperparametros
NUM_EPOCHS    = 25
BATCH_SIZE    = 32
HIDDEN_UNITS  = 10
LEARNING_RATE = 0.001

#set noss diretorios de treino e teste
train_dir = "data/pizza_steak_sushi/train"
test_dir  = "data/pizza_steak_sushi/test"

#set device
device = "cuda" if torch.cuda.is_available() else "cpu"

#criar o transforms para preprocessar os dados
data_transform = transforms.Compose([
    transforms.Resize(size=(64, 64)),
    transforms.ToTensor()
])
#Criar os dataloaders
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=BATCH_SIZE
)

model = model_builder.TinyVGG(
    input_shape=3,
    hidden_units=HIDDEN_UNITS,
    output_shape=len(class_names)
).to(device)

#set loss e optim
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)
#Treinamento....
engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=NUM_EPOCHS,
             device=device)

#salavar o modelo
utils.save_model(model=model,
                 target_dir="models",
                 model_name="pavic_model_script_tinyVGG.pth")

Overwriting going_modular/train.py


In [36]:
!python going_modular/train.py

  0% 0/25 [00:00<?, ?it/s]Epoch: 1 | Train_loss: 1.1049 | Train_acc: 0.2930 | Test_loss: 1.1128 | Test_acc: 0.2604
  4% 1/25 [00:09<03:36,  9.01s/it]Epoch: 2 | Train_loss: 1.1077 | Train_acc: 0.3008 | Test_loss: 1.1158 | Test_acc: 0.1979
  8% 2/25 [00:10<01:41,  4.42s/it]Epoch: 3 | Train_loss: 1.0977 | Train_acc: 0.2891 | Test_loss: 1.0958 | Test_acc: 0.2983
 12% 3/25 [00:11<01:02,  2.84s/it]Epoch: 4 | Train_loss: 1.1002 | Train_acc: 0.3633 | Test_loss: 1.0988 | Test_acc: 0.3722
 16% 4/25 [00:12<00:43,  2.09s/it]Epoch: 5 | Train_loss: 1.1019 | Train_acc: 0.3438 | Test_loss: 1.1336 | Test_acc: 0.2604
 20% 5/25 [00:13<00:33,  1.68s/it]Epoch: 6 | Train_loss: 1.0905 | Train_acc: 0.3438 | Test_loss: 1.1248 | Test_acc: 0.3021
 24% 6/25 [00:14<00:27,  1.43s/it]Epoch: 7 | Train_loss: 1.0587 | Train_acc: 0.5469 | Test_loss: 1.1541 | Test_acc: 0.2917
 28% 7/25 [00:14<00:22,  1.27s/it]Epoch: 8 | Train_loss: 1.1060 | Train_acc: 0.3438 | Test_loss: 1.1271 | Test_acc: 0.2917
 32% 8/25 [00:15<00:19, 

In [37]:
!zip -r pavic.zip /content/ -x */content/sample_data/*

  adding: content/ (stored 0%)
  adding: content/.config/ (stored 0%)
  adding: content/.config/config_sentinel (stored 0%)
  adding: content/.config/default_configs.db (deflated 98%)
  adding: content/.config/active_config (stored 0%)
  adding: content/.config/.last_update_check.json (deflated 22%)
  adding: content/.config/configurations/ (stored 0%)
  adding: content/.config/configurations/config_default (deflated 15%)
  adding: content/.config/.last_opt_in_prompt.yaml (stored 0%)
  adding: content/.config/.last_survey_prompt.yaml (stored 0%)
  adding: content/.config/gce (stored 0%)
  adding: content/.config/logs/ (stored 0%)
  adding: content/.config/logs/2023.12.04/ (stored 0%)
  adding: content/.config/logs/2023.12.04/14.27.00.107426.log (deflated 56%)
  adding: content/.config/logs/2023.12.04/14.23.59.638040.log (deflated 86%)
  adding: content/.config/logs/2023.12.04/14.26.48.840108.log (deflated 58%)
  adding: content/.config/logs/2023.12.04/14.20.49.627769.log (deflated 91%)

In [None]:
!zip -r pavic.zip content/ -x *samples*