In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import zipfile

In [2]:
#dataset for training
!gdown https://drive.google.com/uc?id=1F5aAY3sl3X8otpcknJKCEGq-g0SLmZWg

Downloading...
From: https://drive.google.com/uc?id=1F5aAY3sl3X8otpcknJKCEGq-g0SLmZWg
To: /content/foto.zip
100% 715M/715M [00:06<00:00, 112MB/s]


In [3]:
os.makedirs('Dataset', exist_ok=True)
local_zip = '/content/foto.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/content/Dataset/')
zip_ref.close()

In [4]:
# Set device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform
        self.classes = list(set(labels))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        image_path = self.data[index]
        label = self.labels[index]

        image = Image.open(image_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        return image, label

dataset_path = '/content/Dataset'

# List untuk menyimpan path ke setiap gambar dan label
data = []
labels = []

# Membaca setiap folder dalam dataset
for person_folder in os.listdir(dataset_path):
    person_path = os.path.join(dataset_path, person_folder)

    # Membaca setiap gambar dalam folder person
    for image_file in os.listdir(person_path):
        image_path = os.path.join(person_path, image_file)

        # Menambahkan path gambar dan label ke list
        data.append(image_path)
        labels.append(person_folder)

# Membagi data menjadi train (70%), validation (15%), dan test (15%)
train_data, temp_data, train_labels, temp_labels = train_test_split(
    data, labels, test_size=0.3, random_state=42, stratify=labels)

val_data, test_data, val_labels, test_labels = train_test_split(
    temp_data, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels)

# Membuat dataset dan dataloader untuk train
train_dataset = CustomDataset(train_data, train_labels, transform=transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)

# Membuat dataset dan dataloader untuk validation
val_dataset = CustomDataset(val_data, val_labels, transform=transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]))
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# Membuat dataset dan dataloader untuk test
test_dataset = CustomDataset(test_data, test_labels, transform=transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)




In [6]:
# Load pre-trained VGG16 model
vgg = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:04<00:00, 119MB/s]


In [7]:
# Freeze convolutional layers
for param in vgg.features.parameters():
    param.requires_grad = False

# Store unique classes in the dataset
num_classes = len(train_dataset.classes)

# Modify the classifier
vgg.classifier[-1] = nn.Sequential(
    nn.Linear(vgg.classifier[-1].in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.5),  # Add dropout with 50% probability
    nn.Linear(512, num_classes)
)

# Move the model to the device
vgg = vgg.to(device)

In [12]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(vgg.parameters(), lr=0.01)

In [13]:
# Lists to store losses
train_losses = []
val_losses = []
test_losses = []

# Lists to store accuracies
train_accuracies = []
val_accuracies = []
test_accuracies = []

num_epochs = 10

for epoch in range(num_epochs):
    vgg.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    #Training loop
    for inputs, labels_tuple in train_loader:
        inputs = inputs.to(device)

        # Assuming labels are strings, convert them to numerical labels
        labels = [train_dataset.classes.index(label) for label in labels_tuple]
        labels = torch.tensor(labels).to(device)

        optimizer.zero_grad()

        outputs = vgg(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    # Calculate average train loss and accuracy
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_samples

    # Save values for plotting
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)

    # Print average training loss and accuracy
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

    # Validation loop
    vgg.eval()
    val_running_loss = 0.0
    val_correct_predictions = 0
    val_total_samples = 0
    val_predicted_labels = []
    true_labels = []

    with torch.no_grad():
        for inputs, labels_tuple in val_loader:
            inputs = inputs.to(device)

            # Assuming labels are strings, convert them to numerical labels
            labels = [val_dataset.classes.index(label) for label in labels_tuple]
            labels = torch.tensor(labels).to(device)

            outputs = vgg(inputs)
            val_loss = criterion(outputs, labels)
            val_running_loss += val_loss.item()

            # Calculate accuracy
            _, val_predicted = torch.max(outputs, 1)
            val_total_samples += labels.size(0)
            val_correct_predictions += (val_predicted == labels).sum().item()

            # Append true labels and predicted labels
            true_labels.extend(labels.cpu().numpy())
            val_predicted_labels.extend(val_predicted.cpu().numpy())

    # Calculate average validation loss and accuracy
    val_epoch_loss = val_running_loss / len(val_loader)
    val_epoch_accuracy = val_correct_predictions / val_total_samples

    # Save values for plotting
    val_losses.append(val_epoch_loss)
    val_accuracies.append(val_epoch_accuracy)

    # Print average validation loss and accuracy
    print(f"Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_accuracy:.4f}")

    # Test loop
    vgg.eval()
    test_running_loss = 0.0
    test_correct_predictions = 0
    test_total_samples = 0
    true_labels_test = []
    test_predicted_labels = []

    with torch.no_grad():
        for inputs, labels_tuple in test_loader:
            inputs = inputs.to(device)

            # Assuming labels are strings, convert them to numerical labels
            labels = [test_dataset.classes.index(label) for label in labels_tuple]
            labels = torch.tensor(labels).to(device)

            outputs = vgg(inputs)
            test_loss = criterion(outputs, labels)
            test_running_loss += test_loss.item()

            # Calculate accuracy
            _, test_predicted = torch.max(outputs, 1)
            test_total_samples += labels.size(0)
            test_correct_predictions += (test_predicted == labels).sum().item()

            # Save true labels and predicted labels for each batch
            true_labels_test.extend(labels.cpu().numpy())
            test_predicted_labels.extend(test_predicted.cpu().numpy())

    # Calculate average test loss and accuracy
    test_epoch_loss = test_running_loss / len(test_loader)
    test_epoch_accuracy = test_correct_predictions / test_total_samples

    # Save values for plotting
    test_losses.append(test_epoch_loss)
    test_accuracies.append(test_epoch_accuracy)

    # Print average test loss and accuracy
    print(f"Test Loss: {test_epoch_loss:.4f}, Test Accuracy: {test_epoch_accuracy:.4f}")




Epoch 1/10, Loss: 367.2083, Accuracy: 0.2755
Validation Loss: 3.2399, Validation Accuracy: 0.1587
Test Loss: 3.0754, Test Accuracy: 0.1746
Epoch 2/10, Loss: 3.4801, Accuracy: 0.2959
Validation Loss: 1.5290, Validation Accuracy: 0.3492
Test Loss: 1.5131, Test Accuracy: 0.3810
Epoch 3/10, Loss: 1.6923, Accuracy: 0.4082
Validation Loss: 0.9384, Validation Accuracy: 0.6667
Test Loss: 0.8676, Test Accuracy: 0.6508
Epoch 4/10, Loss: 1.3447, Accuracy: 0.4932
Validation Loss: 0.7425, Validation Accuracy: 0.7143
Test Loss: 0.6245, Test Accuracy: 0.7143
Epoch 5/10, Loss: 1.2224, Accuracy: 0.5884
Validation Loss: 1.4636, Validation Accuracy: 0.5238
Test Loss: 1.2414, Test Accuracy: 0.4921
Epoch 6/10, Loss: 1.1688, Accuracy: 0.6327
Validation Loss: 0.3735, Validation Accuracy: 0.9048
Test Loss: 0.3497, Test Accuracy: 0.9365
Epoch 7/10, Loss: 1.5134, Accuracy: 0.6633
Validation Loss: 0.8109, Validation Accuracy: 0.7460
Test Loss: 0.6152, Test Accuracy: 0.7619
Epoch 8/10, Loss: 1.0500, Accuracy: 0.6

In [14]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Convert the lists to numpy arrays for the test set
true_labels_test = np.array(true_labels_test)
test_predicted_labels = np.array(test_predicted_labels)

# Calculate confusion matrix for the test set
conf_matrix_test = confusion_matrix(true_labels_test, test_predicted_labels)

# Print confusion matrix for the test set
print("Confusion Matrix - Test Set:")
print(conf_matrix_test)

# Calculate and print classification report for the test set
class_report_test = classification_report(true_labels_test, test_predicted_labels, target_names=test_dataset.classes)
print("Classification Report - Test Set:")
print(class_report_test)

Confusion Matrix - Test Set:
[[11  0  0  0  0  0]
 [ 0 11  0  0  0  0]
 [ 0  0 11  0  0  0]
 [ 0  0  0 10  0  0]
 [ 0  0  0  0  9  1]
 [ 0  0  0  0  0 10]]
Classification Report - Test Set:
                      precision    recall  f1-score   support

Mitra Novitri Waruwu       1.00      1.00      1.00        11
        Heri Gunawan       1.00      1.00      1.00        11
 Muhammad Ghulamzaki       1.00      1.00      1.00        11
        Valentin Gea       1.00      1.00      1.00        10
      Edy fitriyanto       1.00      0.90      0.95        10
       Fanesa Dwiana       0.91      1.00      0.95        10

            accuracy                           0.98        63
           macro avg       0.98      0.98      0.98        63
        weighted avg       0.99      0.98      0.98        63



In [None]:
import matplotlib.pyplot as plt

# Plot kurva loss
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.plot(range(1, num_epochs + 1), test_losses, label='Test Loss')  # Tambahkan kurva loss untuk set uji
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot kurva akurasi
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Training Accuracy')
plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy')
plt.plot(range(1, num_epochs + 1), test_accuracies, label='Test Accuracy')  # Tambahkan kurva akurasi untuk set uji
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Mendefinisikan path untuk menyimpan model
os.makedirs('train_model', exist_ok=True)
model_path = os.path.join('train_model', 'model.pth')

# Menyimpan model
torch.save(vgg.state_dict(), model_path)