#### Team Leader: Yashodip More, Electrical Engineering, RC Patel Institute of Technology, Shirpur, Maharashtra – yashodipmore2004@gmail.com
#### Team Member: S.M. Sakthivel, AI & Data Science, Achariya College of Engineering Technology, Puducherry – s.m.sakthivelofficial@gmail.com
#### Team Member: Komal Kumavat, Electrical Engineering, RC Patel Institute of Technology, Shirpur, Maharashtra – komalkumavat025@gmail.com

# Load Labels and Prepare Dataset

In [1]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from torchvision import transforms
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader

# Paths
train_dir = '/kaggle/input/soil-2-data/Soil Classification 2/train'
test_dir = '/kaggle/input/soil-2-data/Soil Classification 2/test'
label_file = '/kaggle/input/soil-2-data/Soil Classification 2/train_labels.csv'

# Load labels
df = pd.read_csv(label_file)

# Split into train/val
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

# Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet stats
                         std=[0.229, 0.224, 0.225])
])

# Dataset class
class SoilDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['image_id']
        label = int(self.dataframe.iloc[idx]['label'])
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Loaders
train_dataset = SoilDataset(train_df, train_dir, transform)
val_dataset = SoilDataset(val_df, train_dir, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# Model Architecture and Training

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from sklearn.metrics import f1_score

# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Load pretrained ResNet18
model = models.resnet18(pretrained=True)

# Modify the final layer to output a single probability
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 1),
    nn.Sigmoid()
)

model = model.to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


Using device: cuda


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 144MB/s] 


#  Training & Evaluation Loop

In [3]:
def train_model(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Evaluation
        model.eval()
        all_preds = []
        all_targets = []
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                preds = (outputs > 0.5).int().cpu().numpy()
                all_preds.extend(preds.flatten())
                all_targets.extend(labels.cpu().numpy())

        f1 = f1_score(all_targets, all_preds)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, Val F1: {f1:.4f}")

train_model(model, train_loader, val_loader, epochs=10)


Epoch 1/10, Loss: 7.4676, Val F1: 0.9820
Epoch 2/10, Loss: 5.2175, Val F1: 0.9799
Epoch 3/10, Loss: 4.1162, Val F1: 0.9758
Epoch 4/10, Loss: 4.3309, Val F1: 0.9737
Epoch 5/10, Loss: 4.1723, Val F1: 0.9799
Epoch 6/10, Loss: 2.6939, Val F1: 0.9758
Epoch 7/10, Loss: 2.3075, Val F1: 0.9654
Epoch 8/10, Loss: 2.3578, Val F1: 0.9696
Epoch 9/10, Loss: 2.1439, Val F1: 0.9696
Epoch 10/10, Loss: 2.1301, Val F1: 0.9696


# Test Prediction + Submission File

In [6]:
# Load test image IDs
test_ids = pd.read_csv('/kaggle/input/soil-2-data/Soil Classification 2/test_ids.csv')

# Define test dataset class
class TestSoilDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['image_id']
        img_path = os.path.join(test_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, img_name

# Create dataset and loader
test_dataset = TestSoilDataset(test_ids, test_dir, transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Run predictions
model.eval()
predictions = []

with torch.no_grad():
    for images, img_names in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = (outputs > 0.5).int().cpu().numpy().flatten()
        for name, pred in zip(img_names, preds):
            predictions.append({'image_id': name, 'label': pred})

# Save to CSV
submission_df = pd.DataFrame(predictions)
submission_df.to_csv('submission.csv', index=False)
print("Submission file saved as 'submission.csv'")


Submission file saved as 'submission.csv'
