In [1]:
# !python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cu126
# !python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
# !pip uninstall torch torchvision torchaudio -y

# !python -c "import torch; print(torch.__version__); print('CUDA disponível:', torch.cuda.is_available())"

import torch
print(torch.__version__)
print('CUDA disponível:', torch.cuda.is_available())




2.9.0+cpu
CUDA disponível: False


In [23]:
import zipfile
from PIL import Image
# import torch
from torchvision import transforms
from io import BytesIO
from torch.utils.data import DataLoader, TensorDataset
import numpy as np


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("GPU is", "available" if device else "NOT AVAILABLE")

# Caminho para o arquivo ZIP
zip_path = 'Cops_DB.zip'
# zip_path2 = 'not-bird.zip'

# Transforms para redimensionar e converter para tensor
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),  # Converte para [C x H x W]
])

# Lista para armazenar os tensores
image_in_tensors = []
image_out_tensors = []


# Abre o ZIP e processa as imagens diretamente
# def loadImages(zip_path,label,max):
#     count = 0
#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#         for file_name in zip_ref.namelist():
#             if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
#                 with zip_ref.open(file_name) as file:
#                     image = Image.open(BytesIO(file.read())).convert('RGB')
#                     tensor = transform(image)  # Shape: [3, 256, 256]
#                     #print(tensor.shape)
#                     image_in_tensors.append(tensor)
#                     image_out_tensors.append([label])
#                 count+=1
#                 if count%1000 == 0:
#                     print(count)
                    
#                 if count >= max:
#                     return
                
def loadImages(zip_path):
    labels_map = {}  # nome_da_pasta -> índice
    current_label = 0
    count_total = 0

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        for file_name in zip_ref.namelist():
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                parts = file_name.split('/')
                if len(parts) < 3:
                    continue
                class_name = parts[1]
                if class_name not in labels_map:
                    labels_map[class_name] = current_label
                    current_label += 1

                label = labels_map[class_name]
                with zip_ref.open(file_name) as file:
                    image = Image.open(BytesIO(file.read())).convert('RGB')
                    tensor = transform(image)
                    image_in_tensors.append(tensor)
                    image_out_tensors.append(label)
                count_total += 1

    print(f"Total de imagens: {count_total}")
    print(f"Total de classes: {len(labels_map)}")
    return labels_map

labels_map = loadImages(zip_path)
num_classes = len(labels_map)                

# Empilha os tensores em um batch
loadImages(zip_path)
# loadImages(zip_path2,0,5000)

print(f'Total de imagens carregadas: {len(image_in_tensors)}')

t_x = torch.stack(image_in_tensors)
t_y = torch.tensor(image_out_tensors,dtype=torch.long)

shuffler = np.random.permutation(len(t_x))

x_shuffled = t_x[shuffler]
y_shuffled = t_y[shuffler]

tlen = len(x_shuffled)
l = int(tlen*0.4) #20%
xa = x_shuffled[0:l]
ya = y_shuffled[0:l]

xb = x_shuffled[l:tlen]
yb = y_shuffled[l:tlen]

t_xt = xa.to(device)
t_yt = ya.to(device)



dataset = TensorDataset(t_xt, t_yt)
#batch_tensor = torch.stack(image_in_tensors)  # Shape: [N, 3, 256, 256]


print(f'Shape do batch: {t_xt.shape}')
print(f'Shape dos rótulos: {t_yt.shape}')



GPU is available
Total de imagens: 829
Total de classes: 30
Total de imagens: 829
Total de classes: 30
Total de imagens carregadas: 1658
Shape do batch: torch.Size([663, 3, 32, 32])
Shape dos rótulos: torch.Size([663])


In [24]:
BATCH_SIZE = 16

train_idx, val_idx = [], []
for label in range(num_classes):
    label_indices = (t_y == label).nonzero(as_tuple=True)[0]
    train_samples = label_indices[:20]
    val_samples = label_indices[20:]
    train_idx.extend(train_samples.tolist())
    val_idx.extend(val_samples.tolist())

x_train, y_train = t_x[train_idx], t_y[train_idx]
x_val, y_val = t_x[val_idx], t_y[val_idx]

train_ds = TensorDataset(x_train, y_train)
val_ds = TensorDataset(x_val, y_val)

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE)

print(f"Tamanho do treino: {len(train_ds)}, validação: {len(val_ds)}")

Tamanho do treino: 600, validação: 1058


In [None]:
import torch;
import torch.nn as nn

class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 8 * 8, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes),
        )
    def forward(self, x):
        x = self.conv_layer(x)
        x = self.fc_layer(x)
        return x
    
model = CNNClassifier(num_classes).to(device)

In [None]:
import torch;
import torch.nn as nn

class RedeCnnBirdNotBird(nn.Module):
    def __init__(self):
        super(RedeCnnBirdNotBird, self).__init__()

        self.conv1 = nn.Conv2d(3, 6, 3, stride=1)
        self.conv2 = nn.Conv2d(6, 12, 3, stride=1)
        self.conv3 = nn.Conv2d(12, 24, 5, stride=1)
        self.poll1 = nn.MaxPool2d(2,2)
        self.poll2 = nn.MaxPool2d(2,2)
        
        self.linear1 = nn.Linear(864,256)
        self.linear2 = nn.Linear(256,1)
        #self.linear3 = nn.Linear(164,164)
        #self.linear4 = nn.Linear(512,1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.poll1(x)
        x = self.poll2(x)
        
        #print("x poll1: ",x.size())
        
        x = torch.flatten(x, start_dim=1)
        
        #print("x flatten: ",x.size())
        
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        
        #x = torch.relu(self.linear1(x))
        #x = torch.relu(self.linear2(x))
        #x = torch.tanh(self.linear3(x))
        #x = torch.relu(self.linear4(x))
        return x

Tamanho do treino: 600, validação: 229


In [None]:
import time

def trainOld(cnn,dataset, epochs=10):
    opt = torch.optim.Adam(cnn.parameters(),lr=0.0001)#0.00001  #0.0000001

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=64, shuffle=True
    )

    for epoch in range(epochs):
        totalloss = 0
        batch = 0
        start_time = time.time()
        opt.zero_grad()

        for inputs, targets in train_loader:
            
            #inputs = x[:, :insize]    
            #targets = x[:, insize:] 
            #x = x.to(device) # GPU
            x_hat = cnn(inputs)
            #print(inputs.size()," ",targets.size()," ",x_hat.size())
            loss = ((targets - x_hat)**2).sum()

            loss.backward(retain_graph=True)
            totalloss+=loss
            batch+=1
 
            opt.step()
            opt.zero_grad()
            #print("step: ")

        end_time = time.time()
        print(epoch," Total Loss: ",(totalloss/len(dataset))," time ",(end_time-start_time))
        #if epoch%250==0:
        #    torch.save(cnn, "backup_training_gam_v02.pth")
    return cnn


Época 1/10 - Loss: 3.4101
Época 2/10 - Loss: 3.3948
Época 3/10 - Loss: 3.3051
Época 4/10 - Loss: 3.1383
Época 5/10 - Loss: 2.9496
Época 6/10 - Loss: 2.7653
Época 7/10 - Loss: 2.6100
Época 8/10 - Loss: 2.4372
Época 9/10 - Loss: 2.3303
Época 10/10 - Loss: 2.2066

Acurácia na validação: 21.83%


In [39]:
import torch.optim as optim

def train(cnn,dataset, epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cnn.parameters(), lr=0.001)

    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in dataset:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = cnn(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataset)}')

    return cnn


In [None]:
# cnn = RedeCnnBirdNotBird().to(device)
# cnn = trainOld(cnn, dataset,epochs=100)

cnn = CNNClassifier(num_classes).to(device)
cnn = train(cnn, train_dl, epochs=100)





Epoch 1, Loss: 3.40944472739571
Epoch 2, Loss: 3.3945072889328003
Epoch 3, Loss: 3.3145860182611564
Epoch 4, Loss: 3.086020093215139
Epoch 5, Loss: 2.8526153062519275
Epoch 6, Loss: 2.66401375594892
Epoch 7, Loss: 2.4789590521862634
Epoch 8, Loss: 2.3019910141041406
Epoch 9, Loss: 2.2042783185055383
Epoch 10, Loss: 2.0337852239608765
Epoch 11, Loss: 1.8593720103565015
Epoch 12, Loss: 1.6907846896271956
Epoch 13, Loss: 1.5767765358874672
Epoch 14, Loss: 1.4157531920232271
Epoch 15, Loss: 1.2284743644689258
Epoch 16, Loss: 1.037464373990109
Epoch 17, Loss: 0.8855181838336744
Epoch 18, Loss: 0.794564953760097
Epoch 19, Loss: 0.6232949303169
Epoch 20, Loss: 0.5369615319528078
Epoch 21, Loss: 0.4397844766315661
Epoch 22, Loss: 0.3686230406165123
Epoch 23, Loss: 0.2669883901743512
Epoch 24, Loss: 0.25114631143055466
Epoch 25, Loss: 0.17766035955987478
Epoch 26, Loss: 0.17167082968118944
Epoch 27, Loss: 0.13480623223279653
Epoch 28, Loss: 0.14273068828410224
Epoch 29, Loss: 0.0929309414013436

In [None]:
def calculaMetricasOld(dataset,cnn):
    tabela_verdade = [
        [0, 0],  # classe 0
        [0, 0]   # classe 1
    ]

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=True
    )


    for inputs, targets in train_loader:
        x_hat = cnn(inputs)
        classe = 0
        if x_hat[0] > 0.5:
            classe = 1
            
        #print(targets[0][0])
        tabela_verdade[classe][int(targets[0][0])]+=1
        
    print(tabela_verdade)

    print('   B','   NB')
    print('PB ',tabela_verdade[1][1],' ',tabela_verdade[1][0])
    print('PNB ',tabela_verdade[0][1],' ',tabela_verdade[0][0])

    VP = tabela_verdade[1][1]
    FP = tabela_verdade[1][0]
    FN = tabela_verdade[0][1]
    VN = tabela_verdade[0][0]
    Total = VP+FP+FN+VN
    ACC = (VP+VN)/Total
    PRE = VP/(VP+FP)
    REV = VP/(VP+FN)
    F1 = (PRE*REV/(PRE+REV))*2

    print("ACC ",ACC)
    print("PRE ",PRE)
    print("REV ",REV)
    print("F1 ",F1)

In [51]:
import torch
import numpy as np

def calculaMetricas(dataset, model, device='cpu', nomes_classes=None):
    model.eval()
    num_classes = len(torch.unique(dataset.tensors[1]))  # detecta nº de classes
    confusion = torch.zeros(num_classes, num_classes, dtype=torch.int32)

    loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)

    with torch.no_grad():
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            confusion[predicted.item(), targets.item()] += 1

    # -------------------------------------------------------------
    total = confusion.sum().item()
    correct = confusion.diag().sum().item()
    acc = correct / total if total > 0 else 0

    # Ajuste de largura para o nome da classe
    largura_nome = max(len(nome) for nome in nomes_classes) + 4 if nomes_classes else 20

    print("\n===================== RESULTADOS POR CLASSE =====================")
    print(f"{'Classe':<{largura_nome}}{'FP':<8}{'FN':<8}{'TP':<8}{'Precisão':<12}{'Revocação':<12}{'F1-score':<10}")
    print("-" * (largura_nome + 55))

    precision, recall, f1 = [], [], []

    for c in range(num_classes):
        TP = confusion[c, c].item()
        FP = confusion[c, :].sum().item() - TP
        FN = confusion[:, c].sum().item() - TP

        pre = TP / (TP + FP) if (TP + FP) > 0 else 0
        rec = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1c = (2 * pre * rec) / (pre + rec) if (pre + rec) > 0 else 0

        precision.append(pre)
        recall.append(rec)
        f1.append(f1c)

        nome_classe = nomes_classes[c] if nomes_classes and c < len(nomes_classes) else f"Planta{c+1}"
        print(f"{nome_classe:<{largura_nome}}{FP:<8}{FN:<8}{TP:<8}{pre:<12.4f}{rec:<12.4f}{f1c:<10.4f}")

    print("-" * (largura_nome + 55))
    print(f"Acurácia geral: {acc:.4f}")
    print(f"Precisão média: {np.mean(precision):.4f}")
    print(f"Revocação média: {np.mean(recall):.4f}")
    print(f"F1-score médio: {np.mean(f1):.4f}")
    print("=" * (largura_nome + 55))

    return confusion, acc, precision, recall, f1


In [47]:
import zipfile

def obter_nomes_classes_zip(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        nomes_classes = set()
        for file_name in zip_ref.namelist():
            partes = file_name.split('/')
            if len(partes) > 1 and partes[1] != '':
                nomes_classes.add(partes[1])
        
        nomes_classes = sorted(list(nomes_classes))
        return nomes_classes


In [57]:
nome_plantas = obter_nomes_classes_zip('Cops_DB.zip')

print("------------------- Treinamento ---------------------------------")
calculaMetricas(dataset,cnn, device, nome_plantas)
print("------------------- TESTE ---------------------------------")
t_xtb = xb.to(device)
t_ytb = yb.to(device)
datasetb = TensorDataset(t_xtb, t_ytb)
calculaMetricas(datasetb,cnn, device, nome_plantas)

------------------- Treinamento ---------------------------------

Classe                 FP      FN      TP      Precisão    Revocação   F1-score  
------------------------------------------------------------------------------
Cherry                 2       6       21      0.9130      0.7778      0.8400    
Coffee-plant           6       9       14      0.7000      0.6087      0.6512    
Cucumber               3       4       16      0.8421      0.8000      0.8205    
Fox_nut(Makhana)       5       0       14      0.7368      1.0000      0.8485    
Lemon                  4       6       16      0.8000      0.7273      0.7619    
Olive-tree             6       7       16      0.7273      0.6957      0.7111    
Pearl_millet(bajra)    4       10      24      0.8571      0.7059      0.7742    
Tobacco-plant          2       7       20      0.9091      0.7407      0.8163    
almond                 4       0       15      0.7895      1.0000      0.8824    
banana                 6       7  

(tensor([[21,  1,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  0],
         [ 2, 26,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  5,  0,  1,  1,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0],
         [ 0,  0, 26,  0,  0,  0,  0,  3,  0,  2,  0,  0,  2,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
         [ 2,  0,  0, 30,  0,  1,  2,  1,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0],
         [ 3,  0,  1,  0, 26,  1,  0,  2,  0,  2,  0,  0,  0,  1,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0, 26,  0,  0,  0,  0,  0,  0,  0,  1,  3,  0,  2,  0,
           0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  0],
         [ 0,  0,  0,  0,  0,  0, 32,  0,  0,  0,  0,  0,  0,  0,  0,  2,  4,  0,
           0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0],
         [ 0,

In [14]:
import zipfile
from io import BytesIO
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os

# ============================================================
# CONFIGURAÇÕES
# ============================================================
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Usando dispositivo:", device)

zip_path = 'Cops_DB.zip'  # Caminho do dataset compactado
IMG_SIZE = (64, 64)       # Tamanho padronizado
BATCH_SIZE = 16
EPOCHS = 10

# ============================================================
# TRANSFORMAÇÃO DAS IMAGENS
# ============================================================
transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
])

# ============================================================
# LISTAS PARA ARMAZENAR TENSORES
# ============================================================
image_in_tensors = []
image_out_tensors = []

# ============================================================
# FUNÇÃO PARA LER AS IMAGENS DO ZIP
# ============================================================
def loadImages(zip_path):
    labels_map = {}  # nome_da_pasta -> índice
    current_label = 0
    count_total = 0

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        for file_name in zip_ref.namelist():
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                parts = file_name.split('/')
                if len(parts) < 3:
                    continue
                class_name = parts[1]
                if class_name not in labels_map:
                    labels_map[class_name] = current_label
                    current_label += 1

                label = labels_map[class_name]
                with zip_ref.open(file_name) as file:
                    image = Image.open(BytesIO(file.read())).convert('RGB')
                    tensor = transform(image)
                    image_in_tensors.append(tensor)
                    image_out_tensors.append(label)
                count_total += 1

    print(f"Total de imagens: {count_total}")
    print(f"Total de classes: {len(labels_map)}")
    return labels_map

labels_map = loadImages(zip_path)
num_classes = len(labels_map)

# ============================================================
# CONVERTE LISTAS PARA TENSORES E EMBARALHA
# ============================================================
t_x = torch.stack(image_in_tensors)
t_y = torch.tensor(image_out_tensors, dtype=torch.long)

indices = np.random.permutation(len(t_x))
t_x = t_x[indices]
t_y = t_y[indices]

# ============================================================
# SEPARA TREINO (20 imagens por classe) e VALIDAÇÃO
# ============================================================
train_idx, val_idx = [], []
for label in range(num_classes):
    label_indices = (t_y == label).nonzero(as_tuple=True)[0]
    train_samples = label_indices[:20]
    val_samples = label_indices[20:]
    train_idx.extend(train_samples.tolist())
    val_idx.extend(val_samples.tolist())

x_train, y_train = t_x[train_idx], t_y[train_idx]
x_val, y_val = t_x[val_idx], t_y[val_idx]

train_ds = TensorDataset(x_train, y_train)
val_ds = TensorDataset(x_val, y_val)

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE)

print(f"Tamanho do treino: {len(train_ds)}, validação: {len(val_ds)}")

# ============================================================
# REDE NEURAL CONVOLUCIONAL SIMPLES
# ============================================================
class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * (IMG_SIZE[0]//4) * (IMG_SIZE[1]//4), 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

model = CNNClassifier(num_classes).to(device)

# ============================================================
# TREINAMENTO
# ============================================================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Época {epoch+1}/{EPOCHS} - Loss: {total_loss/len(train_dl):.4f}")

# ============================================================
# AVALIAÇÃO (VALIDAÇÃO)
# ============================================================
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for xb, yb in val_dl:
        xb, yb = xb.to(device), yb.to(device)
        preds = model(xb)
        _, predicted = torch.max(preds, 1)
        total += yb.size(0)
        correct += (predicted == yb).sum().item()

accuracy = 100 * correct / total
print(f"\nAcurácia na validação: {accuracy:.2f}%")


Usando dispositivo: cpu
Total de imagens: 829
Total de classes: 30
Tamanho do treino: 600, validação: 229
Época 1/10 - Loss: 3.4152
Época 2/10 - Loss: 3.3018
Época 3/10 - Loss: 2.9650
Época 4/10 - Loss: 2.5928
Época 5/10 - Loss: 2.2759
Época 6/10 - Loss: 1.9404
Época 7/10 - Loss: 1.6452
Época 8/10 - Loss: 1.3162
Época 9/10 - Loss: 0.9873
Época 10/10 - Loss: 0.7131

Acurácia na validação: 33.19%
