In [1]:
import pandas as pd
import numpy as np
import os,shutil,scipy,cv2

from sklearn.utils import shuffle
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from torchvision import models, transforms as T
from torch.utils.data import Dataset, DataLoader
import torch

from PIL import Image

from torchvision import datasets
from torchvision import transforms as T

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy

In [2]:
img_dir = "../raw/images/Images"


TESTSPLIT = 0.2

classes = len(os.listdir(img_dir)) # 120
np.random.seed(42)

In [3]:
def parse_directory(img_dir):
    data = []
    for breed in os.listdir(img_dir):
        breed_path = os.path.join(img_dir, breed)
        for image in os.listdir(breed_path):
            image_path = os.path.join(breed_path, image)
            race = "_".join(breed.split('-')[1:])
            
            data.append([image_path, race])
    
    df = pd.DataFrame(data, columns=['image_path', 'breed'])
    return df

In [4]:
folder_to_breed = {}
for breed in os.listdir(img_dir):
    breed_path = os.path.join(img_dir, breed)
    race = "_".join(breed.split('-')[1:])
    race = " ".join(race.split('_'))
    race = race.lower()
    folder_to_breed[breed] = race

In [5]:
df = parse_directory(img_dir)
df = shuffle(df, random_state=42)
train_df, val_df = train_test_split(df, test_size=TESTSPLIT, stratify=df['breed'], random_state=42)

In [6]:
def imshow(inp, title=None):
    """Display image for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001) 

In [7]:
SIZE = 224 # of images
BATCH_SIZE = 16
EPOCHS = 3
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
data_transforms = {
    'train': T.Compose([
        T.Resize(256),
        T.CenterCrop(SIZE),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': T.Compose([
        T.Resize(256),
        T.CenterCrop(SIZE),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

train_dataset = datasets.ImageFolder(img_dir, data_transforms['train'], is_valid_file=lambda x: x in train_df['image_path'].values)
train_dataset.class_to_idx = {folder_to_breed[k]: v for k, v in train_dataset.class_to_idx.items()}

val_dataset = datasets.ImageFolder(img_dir, data_transforms['val'], is_valid_file=lambda x: x in val_df['image_path'].values)
val_dataset.class_to_idx = {folder_to_breed[k]: v for k, v in val_dataset.class_to_idx.items()}

dataloaders = {
    'train': DataLoader(train_dataset, 
                        batch_size=BATCH_SIZE, 
                        shuffle=True, 
                        ),
    'val': DataLoader(  val_dataset, 
                        batch_size=BATCH_SIZE, 
                        shuffle=False, 
                        ),
}

dataset_sizes = {'train': len(train_dataset), 
                 'val': len(val_dataset)}

class_names = [folder_to_breed[folder_name] for folder_name in sorted(os.listdir(img_dir))]



In [9]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=EPOCHS, verbose=True):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    if verbose:
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [10]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}, actual: {class_names[labels[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [11]:
def init_traing(torch_model=torchvision.models.resnet50, 
                freeze_layers=True, num_classes=len(class_names), 
                num_epochs=EPOCHS, 
                lr=0.001, 
                momentum=0.9, 
                step_size=7, 
                gamma=0.1,
                verbose=True):
    
    model_ft = torch_model(weights='DEFAULT')
    
    if freeze_layers:
        for param in model_ft.parameters():
            param.requires_grad = False

    num_ftrs = model_ft.fc.in_features # number of fully connected layer input features (last layer)

    # keep the same number of inputs, but change the number of outputs to the number of classes in our dataset
    model_ft.fc = nn.Linear(num_ftrs, num_classes) # fc = fully connected layer, in efficientnet it's called classifier

    model_ft = model_ft.to(DEVICE)

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=lr, momentum=momentum)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=step_size, gamma=gamma)

    model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=num_epochs, verbose=verbose)

    return model_ft

In [12]:
compare_freezing_layers = False
if compare_freezing_layers:
    since = time.time()
    model_frozen = init_traing(freeze_layers=True, num_epochs=1, verbose=False)
    time_elapsed1 = time.time() - since
    print(f"Time when frozen: {round(time_elapsed1, 2)} seconds\n")
    
    since = time.time()
    model_not_frozen = init_traing(freeze_layers=False, num_epochs=1, verbose=False)
    time_elapsed2 = time.time() - since
    print(f"Time when not frozen: {round(time_elapsed2, 2)} seconds\n")

    
    print("Speedup when freezing layers:", time_elapsed2/time_elapsed1)

In [13]:
start_train = True
if start_train:
    model_finetuned = init_traing(freeze_layers=True, num_epochs=EPOCHS, verbose=True)
    
    
    visualize_model(model_finetuned)

    plt.ioff()
    plt.show()

Epoch 0/2
----------
train Loss: 3.6398 Acc: 0.5024
val Loss: 2.5101 Acc: 0.7835

Epoch 1/2
----------
train Loss: 2.0214 Acc: 0.7547
val Loss: 1.4112 Acc: 0.8333

Epoch 2/2
----------
train Loss: 1.3326 Acc: 0.8089
val Loss: 0.9257 Acc: 0.8632

Training complete in 11m 3s
Best val Acc: 0.863217


NameError: name 'plt' is not defined

In [15]:
torch.save(model_finetuned.state_dict(), 'model.pth')

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import random

def evaluate_model_grid(model, dataloader, class_names, device, num_images=64):
    """
    Hiển thị grid 8x8 ảnh với nhãn thật và nhãn dự đoán
    
    Args:
        model: Model đã train
        dataloader: DataLoader (val hoặc test)
        class_names: List tên các class
        device: Device (cuda/cpu)
        num_images: Số lượng ảnh hiển thị (mặc định 64 = 8x8)
    """
    model.eval()
    
    # Lấy ngẫu nhiên các ảnh từ dataloader
    all_images = []
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            # Lưu lại
            all_images.extend(inputs.cpu())
            all_labels.extend(labels.cpu())
            all_preds.extend(preds.cpu())
            
            # Dừng khi đủ ảnh
            if len(all_images) >= num_images:
                break
    
    # Chọn ngẫu nhiên num_images ảnh
    indices = random.sample(range(len(all_images)), min(num_images, len(all_images)))
    
    # Tạo figure 8x8
    rows, cols = 8, 8
    fig, axes = plt.subplots(rows, cols, figsize=(20, 20))
    fig.suptitle('Model Evaluation: Predicted vs Actual Labels', fontsize=20, y=0.995)
    
    for idx, ax in enumerate(axes.flat):
        if idx < len(indices):
            img_idx = indices[idx]
            img = all_images[img_idx]
            true_label = all_labels[img_idx].item()
            pred_label = all_preds[img_idx].item()
            
            # Denormalize ảnh
            img = img.numpy().transpose((1, 2, 0))
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            img = std * img + mean
            img = np.clip(img, 0, 1)
            
            # Hiển thị ảnh
            ax.imshow(img)
            ax.axis('off')
            
            # Tạo title với màu sắc
            is_correct = (true_label == pred_label)
            color = 'green' if is_correct else 'red'
            
            # Title: Pred / True
            title = f"Pred: {class_names[pred_label][:15]}\nTrue: {class_names[true_label][:15]}"
            ax.set_title(title, fontsize=8, color=color, weight='bold')
        else:
            ax.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # Tính accuracy
    correct = sum([1 for i in indices if all_preds[i] == all_labels[i]])
    accuracy = correct / len(indices) * 100
    print(f"\n{'='*60}")
    print(f"Accuracy trên {len(indices)} ảnh: {accuracy:.2f}%")
    print(f"Số ảnh dự đoán đúng: {correct}/{len(indices)}")
    print(f"{'='*60}")

# Sử dụng hàm
# Đảm bảo đã import matplotlib
import matplotlib.pyplot as plt

# Gọi hàm với model đã train
evaluate_model_grid(
    model=model_finetuned,  # Model của bạn
    dataloader=dataloaders['val'],  # Validation dataloader
    class_names=class_names,  # List tên class
    device=DEVICE,  # Device
    num_images=64  # 8x8 = 64 ảnh
)