In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import os
import re
import shutil
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import ReduceLROnPlateau
import random

writer = SummaryWriter("runs/fruits")

# Wykrycie dostępności GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

SEED = 42
torch.manual_seed(SEED)
random.seed(SEED)

Using device: cpu


In [25]:
train_dir = "100x100_dataset/train"
valid_dir = "100x100_dataset/val"
test_dir  = "100x100_dataset/test"

In [26]:
train_transforms = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),  # mean (R,G,B)
                         (0.5, 0.5, 0.5))  # std  (R,G,B)
])

# Zbiór walidacyjny / testowy zwykle bez augmentacji, tylko normalizacja
test_transforms = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])

In [27]:
full_train = datasets.ImageFolder(root=train_dir, transform=train_transforms)

val_frac = 0.2
train_len = int((1 - val_frac) * len(full_train))
val_len = len(full_train) - train_len

train_dataset, valid_dataset = random_split(
    full_train, [train_len, val_len],
    generator=torch.Generator().manual_seed(SEED)
)

valid_dataset.dataset.transform = test_transforms
test_dataset  = datasets.ImageFolder(root=test_dir, transform=test_transforms)

In [30]:
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=2)

# Liczba klas
num_classes   = len(full_train.classes)
class_to_idx  = full_train.class_to_idx
print("Liczba klas:", num_classes)
print("Klasy (index -> nazwa):", class_to_idx)

Liczba klas: 66
Klasy (index -> nazwa): {'Apple': 0, 'Apricot': 1, 'Avocado': 2, 'Banana': 3, 'Blackberry': 4, 'Blueberry': 5, 'Cabbage': 6, 'Cactus': 7, 'Cantaloupe': 8, 'Carambula': 9, 'Carrot': 10, 'Cauliflower': 11, 'Cherry': 12, 'Chestnut': 13, 'Clementine': 14, 'Cocos': 15, 'Corn': 16, 'Corn Husk': 17, 'Cucumber': 18, 'Dates': 19, 'Eggplant': 20, 'Ginger Root': 21, 'Gooseberry': 22, 'Granadilla': 23, 'Grape Blue': 24, 'Grape White': 25, 'Grapefruit Pink': 26, 'Grapefruit White': 27, 'Guava': 28, 'Hazelnut': 29, 'Kaki': 30, 'Kiwi': 31, 'Kohlrabi': 32, 'Kumquats': 33, 'Lemon Meyer': 34, 'Mandarine': 35, 'Mango Red': 36, 'Mangostan': 37, 'Nectarine Flat': 38, 'Nut Forest': 39, 'Nut Pecan': 40, 'Onion Red': 41, 'Orange': 42, 'Papaya': 43, 'Passion Fruit': 44, 'Peach': 45, 'Peach Flat': 46, 'Pear': 47, 'Pepino': 48, 'Pepper Green': 49, 'Pepper Red': 50, 'Pepper Yellow': 51, 'Physalis': 52, 'Pistachio': 53, 'Plum': 54, 'Pomelo Sweetie': 55, 'Potato Red': 56, 'Potato White': 57, 'Quince

In [31]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 100 -> 50

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 50 -> 25

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)   # 25 -> ~12
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 12 * 12, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = SimpleCNN(num_classes).to(device)
print(model)

SimpleCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=18432, out_features=256, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=256, out_features=66, bias=True)
  )
)


In [32]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', 
                              factor=0.3, patience=2, verbose=True)
early_patience = 5
best_val_loss = float("inf")
epochs_no_improve = 0



In [33]:
def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Statystyki
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [34]:
epochs = 20

for epoch in range(1, epochs + 1):
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc     = evaluate(model, valid_loader, criterion, device)

    # TensorBoard + log na konsolę
    writer.add_scalars("Loss", {"train": train_loss, "val": val_loss}, epoch)
    writer.add_scalars("Acc",  {"train": train_acc,  "val": val_acc},  epoch)
    writer.flush()
    print(f"Ep {epoch:02d} | "
          f"train {train_loss:.3f}/{train_acc:.3f} | "
          f"val {val_loss:.3f}/{val_acc:.3f} | "
          f"LR {optimizer.param_groups[0]['lr']:.2e}")

    # scheduler LR
    scheduler.step(val_loss)

    # early‑stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), "best.pth")   # zapisz najlepszy
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= early_patience:
            print("⏹️  Early stopping – koniec uczenia.")
            break

Ep 01 | train 0.738/0.792 | val 0.033/0.992 | LR 1.00e-03
Ep 02 | train 0.121/0.963 | val 0.011/0.997 | LR 1.00e-03
Ep 03 | train 0.078/0.976 | val 0.009/0.997 | LR 1.00e-03
Ep 04 | train 0.070/0.978 | val 0.006/0.999 | LR 1.00e-03
Ep 05 | train 0.046/0.986 | val 0.004/0.998 | LR 1.00e-03
Ep 06 | train 0.038/0.987 | val 0.003/0.999 | LR 1.00e-03
Ep 07 | train 0.047/0.987 | val 0.003/1.000 | LR 1.00e-03
Ep 08 | train 0.040/0.988 | val 0.002/1.000 | LR 1.00e-03
Ep 09 | train 0.031/0.991 | val 0.002/1.000 | LR 1.00e-03
Ep 10 | train 0.039/0.989 | val 0.003/1.000 | LR 1.00e-03
Ep 11 | train 0.022/0.994 | val 0.003/1.000 | LR 1.00e-03
Ep 12 | train 0.033/0.991 | val 0.001/1.000 | LR 1.00e-03
Ep 13 | train 0.024/0.993 | val 0.003/0.999 | LR 1.00e-03
Ep 14 | train 0.030/0.992 | val 0.004/0.999 | LR 1.00e-03
Ep 15 | train 0.017/0.995 | val 0.003/1.000 | LR 1.00e-03
Ep 16 | train 0.009/0.997 | val 0.003/1.000 | LR 3.00e-04
Ep 17 | train 0.004/0.999 | val 0.003/1.000 | LR 3.00e-04
⏹️  Early stop

In [56]:
print("train classes:", len(full_train.classes))
print("test  classes:", len(test_dataset.classes))

print("Różnica test – train:",
      set(test_dataset.classes) - set(full_train.classes))

test_loss, test_acc = evaluate(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

train classes: 66
test  classes: 69
Różnica test – train: {'Zucchini dark 1', 'Limes 1', 'Zucchini 1', 'Beetroot 1', 'Fig', 'Pomegranate 1', 'Grape Pink 1', 'Mango 1', 'Pitahaya Red 1', 'Strawberry 1', 'Pepper Orange 1', 'Mulberry 1', 'Raspberry 1', 'Potato Sweet 1', 'Beans', 'Walnut 1', 'Maracuja 1', 'Huckleberry 1', 'Tamarillo 1', 'Pineapple 1', 'Lemon 1'}


UnidentifiedImageError: Caught UnidentifiedImageError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "D:\agh\Podstawy-sztucznej-inteligencji\.venv\lib\site-packages\torch\utils\data\_utils\worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "D:\agh\Podstawy-sztucznej-inteligencji\.venv\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "D:\agh\Podstawy-sztucznej-inteligencji\.venv\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "D:\agh\Podstawy-sztucznej-inteligencji\.venv\lib\site-packages\torchvision\datasets\folder.py", line 245, in __getitem__
    sample = self.loader(path)
  File "D:\agh\Podstawy-sztucznej-inteligencji\.venv\lib\site-packages\torchvision\datasets\folder.py", line 284, in default_loader
    return pil_loader(path)
  File "D:\agh\Podstawy-sztucznej-inteligencji\.venv\lib\site-packages\torchvision\datasets\folder.py", line 263, in pil_loader
    img = Image.open(f)
  File "D:\agh\Podstawy-sztucznej-inteligencji\.venv\lib\site-packages\PIL\Image.py", line 3532, in open
    raise UnidentifiedImageError(msg)
PIL.UnidentifiedImageError: cannot identify image file <_io.BufferedReader name='100x100_dataset/test\\Apple\\100_100.jpg'>


In [53]:
extra = sorted(set(os.listdir(test_dir)) - set(os.listdir(train_dir)))
print("Foldery obecne TYLKO w teście:", extra, len(extra))

extra = sorted(set(os.listdir(train_dir)) - set(os.listdir(test_dir)))
print("Foldery obecne TYLKO w trainie:", extra, len(extra))

Foldery obecne TYLKO w teście: ['Beans', 'Beetroot 1', 'Fig', 'Grape Pink 1', 'Huckleberry 1', 'Lemon 1', 'Limes 1', 'Mango 1', 'Maracuja 1', 'Mulberry 1', 'Pepper Orange 1', 'Pineapple 1', 'Pitahaya Red 1', 'Pomegranate 1', 'Potato Sweet 1', 'Raspberry 1', 'Strawberry 1', 'Tamarillo 1', 'Walnut 1', 'Zucchini 1', 'Zucchini dark 1'] 21
Foldery obecne TYLKO w trainie: ['Cantaloupe', 'Carrot', 'Clementine', 'Cocos', 'Corn', 'Granadilla', 'Grape Blue', 'Guava', 'Mandarine', 'Mango Red', 'Mangostan', 'Nectarine Flat', 'Nut Forest', 'Nut Pecan', 'Orange', 'Papaya', 'Pepper Red', 'Pepper Yellow'] 18


In [None]:
torch.save(model.state_dict(), "fruits_cnn.pth")

# wczytanie (w nowej sesji lub innym skrypcie)
model = SimpleCNN(num_classes)
model.load_state_dict(torch.load("fruits_cnn.pth", map_location=device))
model.to(device)
model.eval()

In [69]:
merge_map = {
    r'^Apple': 'Apple',
    r'^Apricot': 'Apricot',
    r'^Avocado': 'Avocado',
    r'^Banana': 'Banana',
    r'^Blackberrie': 'Blackberry',
    r'^Blueberry': 'Blueberry',
    r'^Cabbage': 'Cabbage',
    r'^Cactus': 'Cactus',
    r'^Cantaloupe': 'Cantaloupe',
    r'^Carambula': 'Carambula',
    r'^Carrot': 'Carrot',
    r'^Cauliflower': 'Cauliflower',
    r'^Cherry': 'Cherry',
    r'^Chestnut': 'Chestnut',
    r'^Clementine': 'Clementine',
    r'^Cocos': 'Cocos',
    r'^Corn Husk': 'Corn Husk',
    r'^Corn': 'Corn',
    r'^Cucumber': 'Cucumber',
    r'^Dates': 'Dates',
    r'^Eggplant': 'Eggplant',
    r'^Ginger Root': 'Ginger Root',
    r'^Gooseberry': 'Gooseberry',
    r'^Granadilla': 'Granadilla',
    r'^Grape Blue': 'Grape Blue',
    r'^Grape White': 'Grape White',
    r'^Grape Pink': 'Grape Pink',
    r'^Grapefruit Pink': 'Grapefruit Pink',
    r'^Grapefruit White': 'Grapefruit White',
    r'^Guava': 'Guava',
    r'^Hazelnut': 'Hazelnut',
    r'^Kaki': 'Kaki',
    r'^Kiwi': 'Kiwi',
    r'^Kohlrabi': 'Kohlrabi',
    r'^Kumquats': 'Kumquats',
    r'^Pineapple': 'Pineapple',
    r'^Lemon': 'Lemon',
    r'^Mandarine': 'Mandarine',
    r'^Mango': 'Mango',
    r'^Mangostan': 'Mangostan',
    r'^Nectarine Flat': 'Nectarine Flat',
    r'^Nut Forest': 'Nut Forest',
    r'^Nut Pecan': 'Nut Pecan',
    r'^Onion Red': 'Onion Red',
    r'^Orange': 'Orange',
    r'^Papaya': 'Papaya',
    r'^Passion Fruit': 'Passion Fruit',
    r'^Peach Flat': 'Peach Flat',
    r'^Peach': 'Peach',
    r'^Pear': 'Pear',
    r'^Pepino': 'Pepino',
    r'^Pepper Green': 'Pepper Green',
    r'^Pepper Red': 'Pepper Red',
    r'^Pepper Yellow': 'Pepper Yellow',
    r'^Pepper Orange': 'Pepper Orange',
    r'^Physalis with Husk': 'Physalis',
    r'^Physalis': 'Physalis',
    r'^Pistachio': 'Pistachio',
    r'^Plum': 'Plum',
    r'^Mulberry': 'Mulberry',
    r'^Limes': 'Lime',
    r'^Pitahaya': 'Pitahaya',
    r'^Maracuja': 'Maracuja',
    r'^Pomegranate': 'Pomegranate',
    r'^Pomelo Sweetie': 'Pomelo Sweetie',
    r'^Potato Red Washed': 'Potato Red',
    r'^Potato Red': 'Potato Red',
    r'^Potato Sweet': 'Potato Sweet',
    r'^Potato White': 'Potato White',
    r'^Quince': 'Quince',
    r'^Rambutan': 'Rambutan',
    r'^Tamarillo': 'Tamarillo',
    r'^Redcurrant': 'Redcurrant',
    r'^Salak': 'Salak',
    r'^Strawberry': 'Strawberry',
    r'^Raspberry': 'Raspberry',
    r'^Tangelo': 'Tangelo',
    r'^Tomato Heart': 'Tomato',
    r'^Tomato not Ripened': 'Tomato',
    r'^Tomato': 'Tomato',
    r'^Watermelon': 'Watermelon',
    r'^Beans': 'Beans',
    r'^Walnut': 'Walnut',
    r'^Fig': 'Fig',
    r'^Beetroot': 'Beetroot',
    r'^Huckleberry': 'Huckleberry',
    r'^Zucchini': 'Zucchini',
    r'^Durian': 'Durian'
}

In [70]:
def map_class(folder_name):
    for pattern, general_name in merge_map.items():
        if re.match(pattern, folder_name):
            return general_name
    return folder_name

source_dirs = {
    'train': 'fruits-360_100x100/fruits-360/Training',
    # 'val': 'drive/MyDrive/content/fruits-360_original_100_30_20_test/test_val',
    # 'test': 'fruits-360_100x100/fruits-360/Test'
}

target_root = '100x100_dataset'

for split, source_dir in source_dirs.items():
    for class_folder in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_folder)
        if not os.path.isdir(class_path):
            continue
        new_class = map_class(class_folder)
        target_class_dir = os.path.join(target_root, split, new_class)
        os.makedirs(target_class_dir, exist_ok=True)

        for img_file in os.listdir(class_path):
            src = os.path.join(class_path, img_file)
            dst = os.path.join(target_class_dir, img_file)
            shutil.copy2(src, dst)

print("✅ Gotowe! Nowy zbiór utworzony w:", target_root)

✅ Gotowe! Nowy zbiór utworzony w: 100x100_dataset


In [71]:
def count_images_in_subfolders(root_dir):
    """
    Dla każdego bezpośredniego podfolderu w root_dir
    wypisuje nazwę folderu i liczbę plików w nim.
    """
    for name in os.listdir(root_dir):
        subpath = os.path.join(root_dir, name)
        if os.path.isdir(subpath):
            # liczymy wszystkie pliki (nie foldery) w subpath
            count = sum(
                1 for fname in os.listdir(subpath)
                if os.path.isfile(os.path.join(subpath, fname))
            )
            print(f"{name}: {count}")

if __name__ == "__main__":
    root = "100x100_dataset/train"
    count_images_in_subfolders(root)

Apple: 1610
Apricot: 656
Avocado: 657
Banana: 1134
Beans: 77
Beetroot: 150
Blackberry: 675
Blueberry: 616
Cabbage: 193
Cactus: 893
Cantaloupe: 250
Carambula: 234
Carrot: 151
Cauliflower: 936
Cherry: 1072
Chestnut: 603
Clementine: 490
Cocos: 490
Corn: 450
Corn Husk: 616
Cucumber: 1514
Dates: 656
Eggplant: 864
Fig: 234
Ginger Root: 396
Gooseberry: 620
Granadilla: 490
Grape Blue: 984
Grape Pink: 164
Grape White: 656
Grapefruit Pink: 656
Grapefruit White: 656
Guava: 490
Hazelnut: 621
Huckleberry: 166
Kaki: 656
Kiwi: 622
Kohlrabi: 628
Kumquats: 370
Lemon: 656
Lime: 166
Mandarine: 490
Mango: 541
Maracuja: 166
Mulberry: 164
Nectarine Flat: 480
Nut Forest: 654
Nut Pecan: 534
Onion Red: 600
Orange: 479
Papaya: 492
Passion Fruit: 416
Peach: 994
Peach Flat: 656
Pear: 1259
Pepino: 656
Pepper Green: 592
Pepper Orange: 234
Pepper Red: 250
Pepper Yellow: 666
Physalis: 652
Pineapple: 166
Pistachio: 930
Pitahaya: 166
Plum: 606
Pomegranate: 164
Pomelo Sweetie: 603
Potato Red: 604
Potato Sweet: 150
Potat

In [72]:
import os
import shutil
import random
import math
from pathlib import Path

# >>> 1. ustawienia <<< --------------------------------------------------------
DATA_ROOT = Path("100x100_dataset")          # katalog z danymi
TRAIN_DIR = DATA_ROOT / "train"              # obecny zbiór (same obrazki)
VAL_DIR   = DATA_ROOT / "val"
TEST_DIR  = DATA_ROOT / "test"
VAL_FRACTION  = 0.15                         # 15 % walidacja
TEST_FRACTION = 0.15                         # 15 % test
RANDOM_SEED   = 42                           # powtarzalność

# >>> 2. przygotuj katalogi <<< ------------------------------------------------
for target_dir in (VAL_DIR, TEST_DIR):
    target_dir.mkdir(parents=True, exist_ok=True)

# >>> 3. losowy, stratyfikowany podział <<< -----------------------------------
random.seed(RANDOM_SEED)

for class_dir in TRAIN_DIR.iterdir():
    if not class_dir.is_dir():
        continue

    images = sorted(class_dir.glob("*"))            # lista ścieżek
    random.shuffle(images)

    n_total = len(images)
    n_val   = math.ceil(n_total * VAL_FRACTION)
    n_test  = math.ceil(n_total * TEST_FRACTION)

    # ── 3a. utwórz katalogi klas w val/test ───────────────────────────────────
    (VAL_DIR  / class_dir.name).mkdir(parents=True, exist_ok=True)
    (TEST_DIR / class_dir.name).mkdir(parents=True, exist_ok=True)

    # ── 3b. przenieś pliki ────────────────────────────────────────────────────
    for i, img_path in enumerate(images):
        if i < n_val:
            dst = VAL_DIR  / class_dir.name / img_path.name
        elif i < n_val + n_test:
            dst = TEST_DIR / class_dir.name / img_path.name
        else:
            continue  # zostaje w TRAIN_DIR
        shutil.move(img_path, dst)

print("✅  Zbiory utworzone — możesz trenować model.")


✅  Zbiory utworzone — możesz trenować model.
