# 🌱 Soil Image Classification Training - Annam.ai, IIT Ropar 🌱

This notebook trains a pre-trained ResNet18 model in PyTorch for the Soil Image Classification Challenge. The goal is to classify images as containing soil (label 1) or not (label 0). Let's get started! 🚀

## 📚 Step 1: Import Libraries
We import the necessary libraries for data handling, model building, and visualization.

In [None]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data import random_split
from torchvision import transforms
from torchvision import models
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

## 🛠️ Step 2: Set Up Paths
Define the paths to the dataset directories and CSV files, and verify their existence.

In [None]:
base_dir = "/content/soil_competition-2025-2"
train_dir = os.path.join(base_dir, "train")
train_csv = os.path.join(base_dir, "train_labels.csv")

# Verify dataset
print("Checking dataset files... 📂")
print(f"Train CSV exists: {os.path.exists(train_csv)}")

# Load train labels
train_labels = pd.read_csv(train_csv)
print("\nTrain labels sample: 📋")
print(train_labels.head())

## 🖼️ Step 3: Define Data Preprocessing
We define image transformations for training (with augmentation) to prepare images for ResNet18.

In [None]:
# Training transforms with augmentation
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## 📊 Step 4: Create Dataset Class
Custom dataset class for loading training images and labels.

In [None]:
class SoilDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.labels = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.labels.iloc[idx]['image_id'])
        image = Image.open(img_name).convert('RGB')
        label = self.labels.iloc[idx]['soil_label']
        if self.transform:
            image = self.transform(image)
        return image, label

## 🚚 Step 5: Load Data
Split the training data into training and validation sets, and create data loaders.

In [None]:
train_dataset = SoilDataset(train_csv, train_dir, train_transforms)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

print(f"Training samples: {len(train_subset)} 📈")
print(f"Validation samples: {len(val_subset)} 📉")

## 👀 Step 6: Visualize Sample Images
Visualize a few training images to ensure the data is loaded correctly.

In [None]:
def imshow(img, title):
    img = img.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = std * img + mean
    img = np.clip(img, 0, 1)
    plt.imshow(img)
    plt.title(title)
    plt.axis('off')
    plt.show()

# Display sample images
images, labels = next(iter(train_loader))
for i in range(3):
    imshow(images[i], f"Label: {labels[i].item()} (Soil) 🌍")

## 🧠 Step 7: Define the Model
Use a pre-trained ResNet18 model, modifying the final layer to output two classes.

In [None]:
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes: soil (1), not soil (0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f"Model loaded on: {device} ⚙️")

## 🔧 Step 8: Define Loss and Optimizer
Set up the loss function (CrossEntropyLoss) and optimizer (Adam) for training.

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## 🏋️ Step 9: Training Loop
Train the model for a few epochs, tracking training and validation losses.

In [None]:
num_epochs = 5  # Reduced for demo
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            val_loss += criterion(outputs, labels).item()
    val_loss /= len(val_loader)
    val_losses.append(val_loss)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f} 📊")

## 📈 Step 10: Plot Loss Curves
Visualize the training and validation loss curves to assess model performance.

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(range(1, num_epochs+1), train_losses, label='Train Loss', color='blue')
plt.plot(range(1, num_epochs+1), val_losses, label='Val Loss', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss 📉')
plt.legend()
plt.savefig('loss_curves.png')

## 💾 Step 11: Save the Trained Model
Save the trained model for use in inference.

In [None]:
torch.save(model.state_dict(), 'soil_classifier.pth')
print("Trained model saved as 'soil_classifier.pth' ✅")

## 📝 Notes
- The model uses a pre-trained ResNet18 with transfer learning for efficiency. 🧠
- Training is limited to 5 epochs for demonstration; consider increasing for better performance. ⏳
- Ensure the dataset paths are correct for your environment (e.g., `/content/soil_competition-2025-2`). 📂
- The trained model is saved as `soil_classifier.pth` for inference. 🚀