In [1]:
import pandas as pd
from PIL import Image, PngImagePlugin
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import models, transforms
import pytorch_lightning as pl
import torch
from torch import nn
from lightning.pytorch import Trainer
from lightning.pytorch.loggers import TensorBoardLogger
from torchmetrics.classification import Accuracy
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

PngImagePlugin.MAX_TEXT_CHUNK = int(1e15)

  warn(


In [2]:
df = pd.read_parquet('../new_images_path_all.parquet.gzip')
len(df)

26974

In [3]:
class ImageDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        label = row['Id_typology']
        img = Image.open(f"/home/jupyter/datasphere/project/{row['Img_folder']}/{row['Img_path']}")
        
        if self.transform:
            img = self.transform(img)
        return img, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [4]:
BATCH_SIZE = 64

dataset = ImageDataset(df, transform=transform)
train_set, val_set = torch.utils.data.random_split(dataset, [0.75, 0.25])
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True)

logger = TensorBoardLogger("transfer_learn_logs", name="resnet")

In [5]:
class ImageClassifier(pl.LightningModule):
    def __init__(self):
        super().__init__()
        backbone = models.resnet50(weights="DEFAULT")
        num_filters = backbone.fc.in_features
        layers = list(backbone.children())[:-1]
        self.feature_extractor = nn.Sequential(*layers)
        self.feature_extractor.eval()
        self.classifier = nn.Linear(num_filters, 15)
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=15)

    def forward(self, x):
        with torch.no_grad():
            representations = self.feature_extractor(x).flatten(1)
        x = self.classifier(representations)
        return x

    def training_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = self.criterion(outputs, labels)
        # correct = outputs.argmax(dim=1).eq(labels).sum().item()
        accuracy = self.accuracy(outputs, labels)
        logs = {'loss': loss, 'accuracy': accuracy}
        self.log_dict(logs, logger=True)
        return logs
    
    def validation_step(self, batch, batch_idx):
        images, labels = batch 
        outputs = self(images) 
        loss = self.criterion(outputs, labels.view(-1))
        # correct = outputs.argmax(dim=1).eq(labels).sum().item()
        accuracy = self.accuracy(outputs, labels)
        logs = {"val_loss": loss, "val_accuracy": accuracy}
        self.log_dict(logs, logger=True)
        return logs

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [6]:
model = ImageClassifier()

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /tmp/xdg_cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 66.3MB/s]


In [7]:
trainer = pl.Trainer(
    max_epochs=80,
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1,
    logger=logger,
    log_every_n_steps=317,
    callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=5)],
    enable_checkpointing=True
)

trainer.fit(model, train_loader, val_loader)
# trainer.save_checkpoint("final.ckpt")
# trainer.fit(model, train_loader, val_loader, ckpt_path="./transfer_learn_logs/resnet/version_0/checkpoints/epoch=7-step=1272.ckpt")

Epoch 1: 100%|██████████| 317/317 [11:06<00:00,  0.48it/s, v_num=1]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/106 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/106 [00:00<?, ?it/s][A
Validation DataLoader 0:   1%|          | 1/106 [00:00<00:02, 43.01it/s][A
Validation DataLoader 0:   2%|▏         | 2/106 [00:02<02:15,  0.77it/s][A
Validation DataLoader 0:   3%|▎         | 3/106 [00:02<01:30,  1.14it/s][A
Validation DataLoader 0:   4%|▍         | 4/106 [00:02<01:07,  1.50it/s][A
Validation DataLoader 0:   5%|▍         | 5/106 [00:07<02:33,  0.66it/s][A
Validation DataLoader 0:   6%|▌         | 6/106 [00:12<03:34,  0.47it/s][A
Validation DataLoader 0:   7%|▋         | 7/106 [00:12<03:02,  0.54it/s][A
Validation DataLoader 0:   8%|▊         | 8/106 [00:12<02:38,  0.62it/s][A
Validation DataLoader 0:   8%|▊         | 9/106 [00:17<03:05,  0.52it/s][A
Validation DataLoader 0:   9%|▉         | 10/106 [00:21<03:21,  0.48it/s][A

PermissionError: Caught PermissionError in DataLoader worker process 2.
Original Traceback (most recent call last):
  File "/home/jupyter/.local/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/jupyter/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 50, in fetch
    data = self.dataset.__getitems__(possibly_batched_index)
  File "/home/jupyter/.local/lib/python3.10/site-packages/torch/utils/data/dataset.py", line 420, in __getitems__
    return [self.dataset[self.indices[idx]] for idx in indices]
  File "/home/jupyter/.local/lib/python3.10/site-packages/torch/utils/data/dataset.py", line 420, in <listcomp>
    return [self.dataset[self.indices[idx]] for idx in indices]
  File "/tmp/ipykernel_8828/3777491006.py", line 12, in __getitem__
    img = Image.open(f"/home/jupyter/datasphere/project/{row['Img_folder']}/{row['Img_path']}")
  File "/usr/local/lib/python3.10/dist-packages/PIL/Image.py", line 3227, in open
    fp = builtins.open(filename, "rb")
PermissionError: [Errno 13] Permission denied: '/home/jupyter/datasphere/project/images_path30k/29487.png'


In [None]:
type_id = pd.read_parquet('new_type_id.parquet.gzip')

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC
import seaborn as sns

def draw_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=type_id['typology'], yticklabels=['typology'])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()