In [1]:
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

In [2]:
img_height = 300
img_width = 200
batch_size = 32

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize((img_height, img_width)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Przykładowe wartości
])

data_dir = './raw-img-2'  # Path to your dataset
dataset = ImageFolder(root=data_dir, transform=transform)

classes = dataset.classes

print(classes)

dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size

train_data, val_data= random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

cpu
['butterfly', 'cat', 'chicken', 'cow', 'dog', 'elephant', 'horse', 'sheep', 'spider', 'squirrel']


In [4]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, batch_size, kernel_size=5, padding=1),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten(),
        )

        n_channels = self.feature_extractor(torch.empty(1, 3, img_height, img_width)).size(-1)
        
        self.classifier = nn.Sequential(
            nn.Linear(in_features=n_channels, out_features=512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(in_features=512, out_features=512),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=512, out_features=len(classes))
        )


    def forward(self, x):
        features = self.feature_extractor(x)
        out = self.classifier(features)
        return out


net = SimpleCNN()

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [None]:
# uczenie
epochs = 20

net.train()
for epoch in range(epochs):
    loss_epoch = 0
    total_correct = 0
    total_samples = 0
    
    for i, batch in enumerate(train_loader, 0):
        inputs, labels = batch
        optimizer.zero_grad()
        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        loss_epoch += loss.item()
        loss.backward()
        optimizer.step()
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)
    accuracy = 100 * total_correct / total_samples
        # print(f"epoch {epoch}, batch = {i}, loss = {loss.item()}, inputs.shape = {inputs.shape}")
    print(f"epoch = {epoch}, accuracy = {accuracy}, loss = {loss_epoch}")

In [None]:
# zapisz model

torch.save(net.state_dict(), './neural-network')

In [6]:
# załaduj wytrenowany model

net = SimpleCNN()
net.load_state_dict(torch.load('./cnn_20_en_v2.pth', weights_only=True, map_location=torch.device('cpu')))
net.eval()

SimpleCNN(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU(inplace=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Flatten(start_dim=1, end_dim=-1)
  )
  (classifier): Sequential(
    (0): Linear(in_features=52992, out_features=5

In [7]:
# testowanie
net.eval()

outputs_all = []
labels_all = []
with torch.no_grad():
    for batch in test_loader:
        images, labels = batch
        outputs = net(images)
        outputs_all.append(torch.softmax(outputs, dim=1).detach().cpu().numpy())
        labels_all.append(labels.numpy())

outputs_all = np.concatenate(outputs_all)
labels_all = np.concatenate(labels_all)

print(outputs_all)
print(labels_all)

print(outputs_all.shape)
print(labels_all.shape)

[[7.2041084e-04 4.0947314e-02 1.0577036e-02 ... 8.4640384e-02
  1.0081687e-03 1.5953753e-02]
 [4.8734903e-04 3.0005045e-02 1.9337343e-03 ... 2.0631100e-03
  3.8184195e-03 1.6428629e-03]
 [9.9999952e-01 3.6359010e-11 4.1177906e-08 ... 2.1001870e-12
  2.3953839e-07 5.0393441e-09]
 ...
 [1.9626400e-08 1.1850730e-08 9.2079899e-10 ... 2.7979638e-07
  5.9896883e-09 7.3614865e-09]
 [5.6026998e-04 9.9043894e-01 9.5272332e-04 ... 3.7836097e-07
  4.1101966e-04 6.2243671e-05]
 [2.9135012e-04 9.9608201e-01 2.3082037e-05 ... 5.0516970e-05
  2.7990730e-03 7.2960298e-05]]
[4 4 0 ... 6 1 1]
(5236, 10)
(5236,)


In [11]:
from PIL import Image
image_path = './val_img/cat.jpg'
image = Image.open(image_path).convert('RGB')
image_tensor = transform(image).unsqueeze(0)

with torch.no_grad():  # No gradient computation for inference
    outputs = net(image_tensor)
    _, predicted_class = torch.max(outputs, 1)  # Get the class index with the highest score


predicted_label = classes[predicted_class.item()]

print(f"Predicted Class: {predicted_label}")


Predicted Class: cat


In [10]:

AUC = roc_auc_score(np.eye(10)[labels_all], outputs_all, multi_class='ovo')
print(f"AUC = {AUC:.4f}")

#fpr, tpr, thresholds = roc_curve(y_true=labels_all, y_score=outputs_all, pos_label=1)
#plt.plot(fpr, tpr)


predictions = np.argmax(outputs_all, axis=1)
#tn, fp, fn, tp = confusion_matrix(y_true=labels_all, y_pred=predictions).ravel()

#print(f"tn {tn}, fp {fp}, fn {fn}, tp {tp}")

# _ = ConfusionMatrixDisplay.from_estimator(classifier_05, X_test, y_test)

#def get_label(x):
#    return [classes[z] for z in x]

#cm = confusion_matrix(y_true=get_label(labels_all), y_pred=get_label(y_d), labels=classes)
#disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=classes)
#disp.plot()
#plt.show()

accuracy = accuracy_score(labels_all, predictions)
print(f"Accuracy = {accuracy:.4f}")

precision = precision_score(labels_all, predictions, average='macro') # macro oznacza średnią dla wszystkich kals
print(f"Precision = {precision:.4f}")
recall = recall_score(labels_all, predictions, average='macro')
print(f"Recall = {recall:.4f}")
f1 = f1_score(labels_all, predictions, average='macro')
print(f"F1 = {f1:.4f}")


conf_matrix = confusion_matrix(labels_all, predictions).ravel()
print(conf_matrix)

class_report = classification_report(labels_all, predictions)
print(class_report)

roc_auc = roc_auc_score(
    np.eye(len(classes))[labels_all],  # One-hot encoding prawdziwych etykiet
    outputs_all,             # Prawdopodobieństwa (logity po softmax)
    multi_class='ovr'        # Podejście "One-vs-Rest"
)

print(f"AUC = {roc_auc:.4f}")

AUC = 0.9886
Accuracy = 0.9110
Precision = 0.9150
Recall = 0.8990
F1 = 0.9066
[392   1   3   0   3   0   1   1  25   2   2 269   1   1  25   0   0   0
  15   4   5   1 557   0  10   0   2   2  10   6   3   2   0 319  16   4
  14   6   1   0   2  21   4   8 859   4  10   9  21   5   1   0   1   3
  16 267   6   2  11   1   2   3   5   7  13   6 512   3   2   1   1   1
   1   9  16   2   5 319   6   2   7   5   3   0   7   3   1   4 932   8
   3   5   7   2  15   3   1   1  15 344]
              precision    recall  f1-score   support

           0       0.94      0.92      0.93       428
           1       0.87      0.85      0.86       317
           2       0.96      0.94      0.95       593
           3       0.91      0.87      0.89       365
           4       0.88      0.91      0.89       943
           5       0.92      0.87      0.89       308
           6       0.93      0.92      0.93       554
           7       0.92      0.88      0.90       362
           8       0.90     

In [9]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: butterfly is 91.6 %
Accuracy for class: cat   is 84.9 %
Accuracy for class: chicken is 93.9 %
Accuracy for class: cow   is 87.4 %
Accuracy for class: dog   is 91.1 %
Accuracy for class: elephant is 86.7 %
Accuracy for class: horse is 92.4 %
Accuracy for class: sheep is 88.1 %
Accuracy for class: spider is 96.1 %
Accuracy for class: squirrel is 86.9 %
