In [34]:
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

In [35]:
img_height = 300
img_width = 200
batch_size = 32

In [36]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

transform = transforms.Compose([
    transforms.Resize((img_height, img_width)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Przykładowe wartości
])

data_dir = './raw-img'  # Path to your dataset
dataset = ImageFolder(root=data_dir, transform=transform)

classes = dataset.classes

print(classes)

dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size

train_data, val_data= random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

cpu
['cane', 'cavallo', 'elefante', 'farfalla', 'gallina', 'gatto', 'mucca', 'pecora', 'ragno', 'scoiattolo']


In [37]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, batch_size, kernel_size=5, padding=1),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # tutaj nie dawać dropout
            nn.Flatten(),
        )

        n_channels = self.feature_extractor(torch.empty(1, 3, img_height, img_width)).size(-1)
        
        self.classifier = nn.Sequential(
            nn.Linear(in_features=n_channels, out_features=512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(in_features=512, out_features=512),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=512, out_features=len(classes))
        )


    def forward(self, x):
        features = self.feature_extractor(x)
        out = self.classifier(features)
        return out


net = SimpleCNN()

In [31]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
# uczenie


net.train()
for epoch in range(10):  # liczba epok
    loss_epoch = 0
    total_correct = 0
    total_samples = 0
    
    for i, batch in enumerate(train_loader, 0):
        inputs, labels = batch
        optimizer.zero_grad()
        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        loss_epoch += loss.item()
        loss.backward()
        optimizer.step()
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)
    accuracy = 100 * total_correct / total_samples
        # print(f"epoch {epoch}, batch = {i}, loss = {loss.item()}, inputs.shape = {inputs.shape}")
    print(f"epoch = {epoch}, accuracy = {accuracy}, loss = {loss_epoch}")

In [None]:
# zapisz model

torch.save(net.state_dict(), './neural-network')

In [38]:
# załaduj wytrenowany model

net = SimpleCNN()
net.load_state_dict(torch.load('./cnn_20.pth', weights_only=True, map_location=torch.device('cpu')))
net.eval()

SimpleCNN(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU(inplace=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Flatten(start_dim=1, end_dim=-1)
  )
  (classifier): Sequential(
    (0): Linear(in_features=52992, out_features=5

In [39]:
# testowanie
net.eval()

outputs_all = []
labels_all = []
with torch.no_grad():
    for batch in test_loader:
        images, labels = batch
        outputs = net(images)
        outputs_all.append(torch.softmax(outputs, dim=1).detach().cpu().numpy())
        labels_all.append(labels.numpy())

outputs_all = np.concatenate(outputs_all)
labels_all = np.concatenate(labels_all)

print(outputs_all)
print(labels_all)

print(outputs_all.shape)
print(labels_all.shape)

[[9.9897110e-01 7.4415773e-05 1.2439184e-05 ... 1.3125040e-06
  3.4076001e-07 7.3896757e-07]
 [9.8935741e-01 2.0782067e-05 2.0576856e-06 ... 7.3342747e-03
  3.5656881e-04 5.8953117e-07]
 [9.9979860e-01 1.2054069e-07 4.6015660e-11 ... 5.9563745e-09
  4.9447449e-06 3.0852116e-11]
 ...
 [4.7624469e-01 1.3884458e-02 1.1914163e-01 ... 2.5841083e-02
  3.1987999e-02 2.9390654e-01]
 [9.8871326e-01 8.9211063e-03 1.6895961e-03 ... 1.4293512e-05
  1.4541167e-06 8.1865515e-08]
 [9.6717715e-01 4.7309627e-04 1.6334279e-03 ... 5.4824154e-04
  7.9612837e-05 1.5120078e-03]]
[0 0 0 ... 0 0 0]
(5236, 10)
(5236,)


In [43]:
from PIL import Image
image_path = './val_img/cat.jpg'
image = Image.open(image_path).convert('RGB')
image_tensor = transform(image).unsqueeze(0)

with torch.no_grad():  # No gradient computation for inference
    outputs = net(image_tensor)
    _, predicted_class = torch.max(outputs, 1)  # Get the class index with the highest score


predicted_label = classes[predicted_class.item()]

print(f"Predicted Class: {predicted_label}")


Predicted Class: gatto


In [41]:

AUC = roc_auc_score(np.eye(10)[labels_all], outputs_all, multi_class='ovo')
print(f"AUC = {AUC:.4f}")

#fpr, tpr, thresholds = roc_curve(y_true=labels_all, y_score=outputs_all, pos_label=1)
#plt.plot(fpr, tpr)


predictions = np.argmax(outputs_all, axis=1)
#tn, fp, fn, tp = confusion_matrix(y_true=labels_all, y_pred=predictions).ravel()

#print(f"tn {tn}, fp {fp}, fn {fn}, tp {tp}")

# _ = ConfusionMatrixDisplay.from_estimator(classifier_05, X_test, y_test)

#def get_label(x):
#    return [classes[z] for z in x]

#cm = confusion_matrix(y_true=get_label(labels_all), y_pred=get_label(y_d), labels=classes)
#disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=classes)
#disp.plot()
#plt.show()

accuracy = accuracy_score(labels_all, predictions)
print(f"Accuracy = {accuracy:.4f}")

precision = precision_score(labels_all, predictions, average='macro') # macro oznacza średnią dla wszystkich kals
print(f"Precision = {precision:.4f}")
recall = recall_score(labels_all, predictions, average='macro')
print(f"Recall = {recall:.4f}")
f1 = f1_score(labels_all, predictions, average='macro')
print(f"F1 = {f1:.4f}")


conf_matrix = confusion_matrix(labels_all, predictions).ravel()
print(conf_matrix)

class_report = classification_report(labels_all, predictions)
print(class_report)

roc_auc = roc_auc_score(
    np.eye(10)[labels_all],  # One-hot encoding prawdziwych etykiet
    outputs_all,             # Prawdopodobieństwa (logity po softmax)
    multi_class='ovr'        # Podejście "One-vs-Rest"
)

print(f"AUC = {roc_auc:.4f}")

AUC = 0.9904
Accuracy = 0.9211
Precision = 0.9252
Recall = 0.9100
F1 = 0.9171
[894  11   5   6   9  13   5  10  14   4  20 455   3   0   7   1   8   3
   1   1   8   4 251   1   2   0   5   1   3   2   5   2   2 424   6   1
   0   0  21   2   7   3   1   3 577   0   2   0  11   3  17   1   1   2
   2 308   1   1  17   5  10   8   4   0   4   2 325  14   2   2  13   5
   3   1   2   2   4 325   7   3   2   1   1   5   6   1   1   2 939   2
  15   1   0   4   3   1   0   1  18 325]
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       971
           1       0.93      0.91      0.92       499
           2       0.93      0.91      0.92       277
           3       0.95      0.92      0.93       463
           4       0.93      0.95      0.94       607
           5       0.94      0.87      0.90       355
           6       0.93      0.88      0.90       371
           7       0.91      0.89      0.90       365
           8       0.91     

In [42]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: cane  is 92.1 %
Accuracy for class: cavallo is 91.2 %
Accuracy for class: elefante is 90.6 %
Accuracy for class: farfalla is 91.6 %
Accuracy for class: gallina is 95.1 %
Accuracy for class: gatto is 86.8 %
Accuracy for class: mucca is 87.6 %
Accuracy for class: pecora is 89.0 %
Accuracy for class: ragno is 97.8 %
Accuracy for class: scoiattolo is 88.3 %
