<a href="https://colab.research.google.com/github/vhrique/anne_ptbr/blob/main/03a_Exemplo_Visao_Computacional.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install timm

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import timm

# MNIST com LeNet5

In [2]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)  # MNIST tem 1 canal de entrada
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)  # As dimensões após o conv2 dependem do tamanho da entrada
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # Saída de 10 classes para o MNIST

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(-1, 16 * 4 * 4)  # Flatten
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
# Transformações dos dados
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Carregar o dataset MNIST
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=1000, shuffle=False)

In [4]:
# Instanciar a rede, a função de perda e o otimizador
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LeNet5().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Treinamento
epochs = 5
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zerar os gradientes
        optimizer.zero_grad()

        # Forward + backward + otimização
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(trainloader):.4f}')

Epoch 1, Loss: 0.2913
Epoch 2, Loss: 0.0726
Epoch 3, Loss: 0.0513
Epoch 4, Loss: 0.0407
Epoch 5, Loss: 0.0335


In [5]:
# Avaliação no conjunto de teste
model.eval()  # Modo de avaliação
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on test set: {100 * correct / total:.2f}%')

Accuracy on test set: 98.74%


# MNIST com EfficientNet-Lite0

In [9]:
# Criar o novo classificador
class CustomClassifier(nn.Module):
    def __init__(self, feature_extractor):
        super(CustomClassifier, self).__init__()
        self.feature_extractor = feature_extractor
        self.classifier = nn.Sequential(
            nn.Linear(1280, 512),  # EfficientNet-Lite0 tem 1280 features de saída
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)  # Classificação para 10 classes do MNIST
        )

    def forward(self, x):
        with torch.no_grad():
            features = self.feature_extractor(x)
        x = self.classifier(features)
        return x

In [13]:
# Transformação para o MNIST
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Redimensionar para 224x224, tamanho esperado pelo EfficientNet
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # Replicar o canal de cinza para 3 canais
    transforms.Normalize((0.5,), (0.5,))
])

# Carregar o dataset MNIST
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=1000, shuffle=False)

In [11]:
# Carregar o modelo EfficientNet-Lite0 pré-treinado, removendo a camada de classificação
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
effnet = timm.create_model('efficientnet_lite0', pretrained=True)
effnet.classifier = nn.Identity()  # Remover a última camada de classificação

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/18.8M [00:00<?, ?B/s]

In [None]:
# Instanciar o classificador customizado
model = CustomClassifier(effnet).to(device)

# Definir função de perda e otimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)  # Treinamos apenas o classificador

# Treinamento
epochs = 5
for epoch in range(epochs):
    running_loss = 0.0
    model.train()
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zerar os gradientes
        optimizer.zero_grad()

        # Forward + backward + otimização
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(trainloader):.4f}')

In [None]:
# Avaliação no conjunto de teste
model.eval()  # Modo de avaliação
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on test set: {100 * correct / total:.2f}%')