# Training ResNet-50 untuk Deteksi Penyakit Daun Jagung

Notebook ini berisi script lengkap untuk training model ResNet-50 dengan 3 kelas:
- **Hawar**: Daun jagung terkena penyakit hawar
- **Sehat**: Daun jagung dalam kondisi sehat
- **Karat**: Daun jagung terkena penyakit karat

**Target**: Training selama 50 epoch dengan evaluasi lengkap



## A. Import Libraries dan Setup


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import shutil
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Menggunakan device: {device}')

# Set random seed untuk reproducibility
torch.manual_seed(42)
np.random.seed(42)


## B. Persiapan Dataset

Struktur dataset yang diharapkan:
```
dataset/
    train/
        hawar/
        sehat/
        karat/
    val/
        hawar/
        sehat/
        karat/
```

Jika dataset masih dalam format folder tunggal, kita akan membuat script untuk memisahkannya.


In [None]:
# Path dataset
dataset_root = 'dataset'  # Path ke folder dataset utama (sekarang di notebook/dataset)
train_dir = 'dataset/train'
val_dir = 'dataset/val'

# Buat struktur folder jika belum ada
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Kelas yang akan digunakan
classes = ['hawar', 'sehat', 'karat']

# Buat folder untuk setiap kelas di train dan val
for split in ['train', 'val']:
    for cls in classes:
        os.makedirs(f'dataset/{split}/{cls}', exist_ok=True)

print("Struktur folder dataset sudah dibuat!")


In [None]:
# Fungsi untuk memisahkan dataset menjadi train dan validation
def split_dataset(source_dir, train_dir, val_dir, split_ratio=0.8):
    """
    Memisahkan dataset dari folder sumber ke train dan validation
    source_dir: folder utama yang berisi subfolder Hawar, Sehat, Karat
    """
    # Mapping nama folder
    class_mapping = {
        'Hawar': 'hawar',
        'Sehat': 'sehat',
        'Karat': 'karat'
    }
    
    for class_folder in os.listdir(source_dir):
        if class_folder in class_mapping:
            source_path = os.path.join(source_dir, class_folder)
            if os.path.isdir(source_path):
                # Ambil semua file gambar
                images = [f for f in os.listdir(source_path) 
                         if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
                
                # Shuffle
                np.random.shuffle(images)
                
                # Split
                split_idx = int(len(images) * split_ratio)
                train_images = images[:split_idx]
                val_images = images[split_idx:]
                
                # Copy ke folder train
                target_class = class_mapping[class_folder]
                for img in train_images:
                    src = os.path.join(source_path, img)
                    dst = os.path.join(train_dir, target_class, img)
                    shutil.copy2(src, dst)
                
                # Copy ke folder val
                for img in val_images:
                    src = os.path.join(source_path, img)
                    dst = os.path.join(val_dir, target_class, img)
                    shutil.copy2(src, dst)
                
                print(f'{class_folder}: {len(train_images)} train, {len(val_images)} val')

# Jalankan split dataset untuk memisahkan dataset menjadi train dan validation
split_dataset(dataset_root, train_dir, val_dir, split_ratio=0.8)
print("Dataset sudah siap digunakan!")


## C. Preprocessing dan Data Augmentation

Menggunakan transformasi standar ImageNet untuk preprocessing:
- Resize ke 224x224
- Normalisasi dengan mean dan std ImageNet
- Augmentasi untuk training: RandomFlip, RandomRotation


In [None]:
# ImageNet normalization parameters
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Transformasi untuk training (dengan augmentasi)
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Transformasi untuk validation (tanpa augmentasi)
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

print("Transformasi sudah didefinisikan!")


In [None]:
# Load dataset menggunakan ImageFolder
train_dataset = ImageFolder(root=train_dir, transform=train_transform)
val_dataset = ImageFolder(root=val_dir, transform=val_transform)

# Buat DataLoader
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f'Jumlah kelas: {len(train_dataset.classes)}')
print(f'Kelas: {train_dataset.classes}')
print(f'Jumlah training samples: {len(train_dataset)}')
print(f'Jumlah validation samples: {len(val_dataset)}')
print(f'Batch size: {batch_size}')


## D. Load Model ResNet-50

Menggunakan ResNet-50 pretrained dan mengganti fully connected layer untuk 3 kelas output.


In [None]:
# Load ResNet-50 pretrained
model = models.resnet50(pretrained=True)

# Freeze semua layer kecuali fully connected
for param in model.parameters():
    param.requires_grad = False

# Ganti fully connected layer untuk 3 kelas
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 3)

# Unfreeze fully connected layer
for param in model.fc.parameters():
    param.requires_grad = True

# Pindahkan model ke device
model = model.to(device)

print("Model ResNet-50 sudah dimuat!")
print(f"Jumlah parameter: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")


## E. Setup Training

- Optimizer: Adam dengan learning rate 0.001
- Loss Function: CrossEntropyLoss
- Learning Rate Scheduler: StepLR


In [None]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)

print("Optimizer dan loss function sudah disetup!")


## F. Training Loop (50 Epochs)

Training model selama 50 epoch dengan tracking loss dan accuracy.


In [None]:
# Fungsi untuk training
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in tqdm(train_loader, desc='Training'):
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

# Fungsi untuk validation
def validate_epoch(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc='Validation'):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

print("Fungsi training dan validation sudah didefinisikan!")


In [None]:
# Training selama 50 epoch
num_epochs = 50

# List untuk menyimpan history
train_losses = []
train_accs = []
val_losses = []
val_accs = []

best_val_acc = 0.0
best_model_state = None

print("=" * 50)
print("MEMULAI TRAINING - 50 EPOCH")
print("=" * 50)

for epoch in range(num_epochs):
    print(f'\nEpoch [{epoch+1}/{num_epochs}]')
    print('-' * 50)
    
    # Training
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    
    # Validation
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    # Update learning rate
    scheduler.step()
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = model.state_dict().copy()
    
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
    print(f'Learning Rate: {scheduler.get_last_lr()[0]:.6f}')

print("\n" + "=" * 50)
print("TRAINING SELESAI!")
print("=" * 50)
print(f'Best Validation Accuracy: {best_val_acc:.2f}%')


## G. Simpan Model

Menyimpan model terbaik ke file `model_resnet50.pth`


In [None]:
# Load best model state
if best_model_state is not None:
    model.load_state_dict(best_model_state)

# Simpan model
model_save_path = '../model/model_resnet50.pth'
os.makedirs('../model', exist_ok=True)
torch.save(model.state_dict(), model_save_path)
print(f'Model berhasil disimpan ke: {model_save_path}')


## H. Visualisasi Training History

Menampilkan grafik loss dan accuracy untuk training dan validation.


In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Plot Loss
axes[0].plot(train_losses, label='Train Loss', color='blue', linewidth=2)
axes[0].plot(val_losses, label='Validation Loss', color='red', linewidth=2)
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].set_title('Training dan Validation Loss', fontsize=14, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot Accuracy
axes[1].plot(train_accs, label='Train Accuracy', color='blue', linewidth=2)
axes[1].plot(val_accs, label='Validation Accuracy', color='red', linewidth=2)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy (%)', fontsize=12)
axes[1].set_title('Training dan Validation Accuracy', fontsize=14, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f'\nFinal Training Accuracy: {train_accs[-1]:.2f}%')
print(f'Final Validation Accuracy: {val_accs[-1]:.2f}%')


## I. Evaluasi dengan Confusion Matrix

Menampilkan confusion matrix dan classification report untuk evaluasi detail.


In [None]:
# Evaluasi model dengan confusion matrix
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in tqdm(val_loader, desc='Evaluating'):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
class_names = train_dataset.classes

# Plot Confusion Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names,
            cbar_kws={'label': 'Jumlah'})
plt.title('Confusion Matrix - Validation Set', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.show()

# Classification Report
print("\n" + "=" * 50)
print("CLASSIFICATION REPORT")
print("=" * 50)
print(classification_report(all_labels, all_preds, target_names=class_names))


## J. Contoh Prediksi Satu Gambar Test

Menguji model dengan satu gambar dari validation set dan menampilkan prediksi dengan confidence.


In [None]:
# Ambil satu gambar dari validation set
model.eval()
data_iter = iter(val_loader)
images, labels = next(data_iter)

# Pilih gambar pertama
img = images[0:1].to(device)
true_label = labels[0].item()

# Prediksi
with torch.no_grad():
    outputs = model(img)
    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
    confidence, predicted = torch.max(probabilities, 0)

predicted_class = class_names[predicted.item()]
true_class = class_names[true_label]
confidence_score = confidence.item() * 100

# Denormalize untuk visualisasi
def denormalize(tensor, mean, std):
    for t, m, s in zip(tensor, mean, std):
        t.mul_(s).add_(m)
    return tensor

img_vis = images[0].clone()
img_vis = denormalize(img_vis, mean, std)
img_vis = torch.clamp(img_vis, 0, 1)

# Plot hasil
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.imshow(img_vis.permute(1, 2, 0))
plt.title(f'True Label: {true_class}', fontsize=14, fontweight='bold')
plt.axis('off')

plt.subplot(1, 2, 2)
probs = probabilities.cpu().numpy()
colors = ['red' if i == predicted.item() else 'gray' for i in range(len(class_names))]
plt.barh(class_names, probs, color=colors)
plt.xlabel('Probability', fontsize=12)
plt.title(f'Predicted: {predicted_class}\nConfidence: {confidence_score:.2f}%', 
          fontsize=14, fontweight='bold')
plt.xlim(0, 1)

plt.tight_layout()
plt.show()

print(f"\nTrue Label: {true_class}")
print(f"Predicted Label: {predicted_class}")
print(f"Confidence: {confidence_score:.2f}%")
print(f"Correct: {'✓' if predicted_class == true_class else '✗'}")


## K. Unit Test - Load Model dan Prediksi

Unit test untuk memastikan model dapat dimuat dan melakukan prediksi dengan benar.


In [None]:
# Unit Test: Load model dan prediksi
print("=" * 50)
print("UNIT TEST - Load Model dan Prediksi")
print("=" * 50)

# Load model
test_model = models.resnet50(pretrained=False)
num_features = test_model.fc.in_features
test_model.fc = nn.Linear(num_features, 3)
test_model.load_state_dict(torch.load('../model/model_resnet50.pth', map_location=device))
test_model = test_model.to(device)
test_model.eval()

print("✓ Model berhasil dimuat")

# Test dengan satu gambar
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Ambil satu gambar dari validation set
test_img_path = None
for root, dirs, files in os.walk(val_dir):
    for file in files:
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            test_img_path = os.path.join(root, file)
            break
    if test_img_path:
        break

if test_img_path:
    # Load dan preprocess gambar
    img = Image.open(test_img_path).convert('RGB')
    img_tensor = test_transform(img).unsqueeze(0).to(device)
    
    # Prediksi
    with torch.no_grad():
        outputs = test_model(img_tensor)
        probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
        confidence, predicted = torch.max(probabilities, 0)
    
    predicted_class = class_names[predicted.item()]
    confidence_score = confidence.item() * 100
    
    print(f"✓ Gambar test: {os.path.basename(test_img_path)}")
    print(f"✓ Prediksi: {predicted_class}")
    print(f"✓ Confidence: {confidence_score:.2f}%")
    print(f"✓ Probabilitas per kelas:")
    for i, cls in enumerate(class_names):
        print(f"    {cls}: {probabilities[i].item()*100:.2f}%")
else:
    print("✗ Tidak ada gambar test yang ditemukan")

print("\n" + "=" * 50)
print("UNIT TEST SELESAI")
print("=" * 50)


## L. Kesimpulan

Training model ResNet-50 untuk deteksi penyakit daun jagung telah selesai dengan hasil:
- **Total Epoch**: 50
- **Best Validation Accuracy**: {best_val_acc:.2f}%
- **Model tersimpan di**: `../model/model_resnet50.pth`

Model siap digunakan untuk inference di aplikasi Flask!
