## Importing necessary libraries and loading the dataset

In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dataset_path = '/content/drive/MyDrive/MATHONGO VASU GOEL/soil_classification-2025'

In [None]:
train_df = pd.read_csv(os.path.join(dataset_path, 'train_labels.csv'))
train_df['image_path'] = train_df['image_id'].apply(lambda x: os.path.join(dataset_path, 'train', x))

# Label encode soil types
le = LabelEncoder()
train_df['label_encoded'] = le.fit_transform(train_df['label'])
num_classes = len(le.classes_)

## Train-Validation Split

In [None]:
train_data, val_data = train_test_split(
    train_df,
    test_size=0.2,
    stratify=train_df['label_encoded'],
    random_state=42
)

## Define Transforms

In [26]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])


## Minimal Dataset Class

In [27]:
class SimpleSoilDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'image_path']
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        label = self.df.loc[idx, 'label_encoded']
        return image, label


## Dataloaders

In [28]:
train_dataset = SimpleSoilDataset(train_data, transform)
val_dataset = SimpleSoilDataset(val_data, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


## Simple CNN (ResNet18)

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)




## Training and Evaluating F1-Scores

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(5):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluate on validation set
    model.eval()
    val_preds, val_labels = [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            val_preds.extend(preds)
            val_labels.extend(labels.numpy())

    print(f"\nEpoch {epoch+1} Classification Report:")
    print(classification_report(val_labels, val_preds, target_names=le.classes_))



Epoch 1 Classification Report:
               precision    recall  f1-score   support

Alluvial soil       0.99      0.93      0.96       106
   Black Soil       0.92      0.98      0.95        46
    Clay soil       0.93      1.00      0.96        40
     Red soil       0.98      0.98      0.98        53

     accuracy                           0.96       245
    macro avg       0.95      0.97      0.96       245
 weighted avg       0.96      0.96      0.96       245


Epoch 2 Classification Report:
               precision    recall  f1-score   support

Alluvial soil       0.98      0.96      0.97       106
   Black Soil       0.94      1.00      0.97        46
    Clay soil       0.95      0.95      0.95        40
     Red soil       1.00      0.98      0.99        53

     accuracy                           0.97       245
    macro avg       0.97      0.97      0.97       245
 weighted avg       0.97      0.97      0.97       245


Epoch 3 Classification Report:
               pre

## Result
The final F1 Score (after 5 epochs), is highest for Red Soil, with 1.0