In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/soil-classification/soil_classification-2025/sample_submission.csv
/kaggle/input/soil-classification/soil_classification-2025/train_labels.csv
/kaggle/input/soil-classification/soil_classification-2025/test_ids.csv
/kaggle/input/soil-classification/soil_classification-2025/test/img_0f035b97.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_f13af256.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_15b41dbc.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_cfb4fc7a.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_683111fb.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_c4bd7b3e.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_4ccce0f8.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_86faa98d.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_c448342c.jpg
/kaggle/input/soil-classification/soil_cla

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score, classification_report
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm


In [3]:
df = pd.read_csv("/kaggle/input/soil-classification/soil_classification-2025/train_labels.csv")
class_names = df["soil_type"].unique().tolist()
class_to_idx = {cls: idx for idx, cls in enumerate(class_names)}
df["label"] = df["soil_type"].map(class_to_idx)



In [4]:
df["soil_type"]

0       Alluvial soil
1       Alluvial soil
2       Alluvial soil
3       Alluvial soil
4       Alluvial soil
            ...      
1217       Black Soil
1218       Black Soil
1219       Black Soil
1220       Black Soil
1221       Black Soil
Name: soil_type, Length: 1222, dtype: object

In [5]:
df["soil_type"][700]

'Clay soil'

In [6]:
df["label"]

0       0
1       0
2       0
3       0
4       0
       ..
1217    3
1218    3
1219    3
1220    3
1221    3
Name: label, Length: 1222, dtype: int64

In [7]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  # for RGB images
])

In [8]:
class SoilDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.df = df
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.df.iloc[idx]['image_id'])
        image = Image.open(img_path).convert("RGB")
        label = self.df.iloc[idx]['label']
        if self.transform:
            image = self.transform(image)
        return image, label

In [9]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'])

train_dataset = SoilDataset(train_df, '/kaggle/input/soil-classification/soil_classification-2025/train', transform)
val_dataset   = SoilDataset(val_df, '/kaggle/input/soil-classification/soil_classification-2025/train', transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 4)  # 4 soil types
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 152MB/s]


In [11]:
def train(model, loader):
    model.train()
    total_loss = 0
    for imgs, labels in tqdm(loader):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader):
    model.eval()
    preds, true = [], []
    with torch.no_grad():
        for imgs, labels in loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            _, predicted = torch.max(outputs, 1)
            preds.extend(predicted.cpu().numpy())
            true.extend(labels.numpy())
    return f1_score(true, preds, average=None), classification_report(true, preds, target_names=class_names)

# Epochs
for epoch in range(10):
    train_loss = train(model, train_loader)
    f1, report = evaluate(model, val_loader)
    print(f"Epoch {epoch+1} - Loss: {train_loss:.4f}")
    print("F1 per class:", f1)
    print("Min F1:", f1.min())
    print(report)


100%|██████████| 31/31 [02:31<00:00,  4.89s/it]


Epoch 1 - Loss: 0.3955
F1 per class: [0.96296296 0.8974359  0.98148148 0.95454545]
Min F1: 0.8974358974358975
               precision    recall  f1-score   support

Alluvial soil       0.95      0.98      0.96       106
    Clay soil       0.92      0.88      0.90        40
     Red soil       0.96      1.00      0.98        53
   Black Soil       1.00      0.91      0.95        46

     accuracy                           0.96       245
    macro avg       0.96      0.94      0.95       245
 weighted avg       0.96      0.96      0.95       245



100%|██████████| 31/31 [02:31<00:00,  4.89s/it]


Epoch 2 - Loss: 0.1132
F1 per class: [0.96682464 0.925      1.         0.98924731]
Min F1: 0.925
               precision    recall  f1-score   support

Alluvial soil       0.97      0.96      0.97       106
    Clay soil       0.93      0.93      0.93        40
     Red soil       1.00      1.00      1.00        53
   Black Soil       0.98      1.00      0.99        46

     accuracy                           0.97       245
    macro avg       0.97      0.97      0.97       245
 weighted avg       0.97      0.97      0.97       245



100%|██████████| 31/31 [02:30<00:00,  4.86s/it]


Epoch 3 - Loss: 0.0788
F1 per class: [0.97652582 0.93670886 1.         1.        ]
Min F1: 0.9367088607594937
               precision    recall  f1-score   support

Alluvial soil       0.97      0.98      0.98       106
    Clay soil       0.95      0.93      0.94        40
     Red soil       1.00      1.00      1.00        53
   Black Soil       1.00      1.00      1.00        46

     accuracy                           0.98       245
    macro avg       0.98      0.98      0.98       245
 weighted avg       0.98      0.98      0.98       245



100%|██████████| 31/31 [02:31<00:00,  4.89s/it]


Epoch 4 - Loss: 0.0545
F1 per class: [0.96261682 0.925      0.99065421 0.96629213]
Min F1: 0.925
               precision    recall  f1-score   support

Alluvial soil       0.95      0.97      0.96       106
    Clay soil       0.93      0.93      0.93        40
     Red soil       0.98      1.00      0.99        53
   Black Soil       1.00      0.93      0.97        46

     accuracy                           0.96       245
    macro avg       0.97      0.96      0.96       245
 weighted avg       0.96      0.96      0.96       245



100%|██████████| 31/31 [02:30<00:00,  4.85s/it]


Epoch 5 - Loss: 0.0328
F1 per class: [0.97196262 0.92307692 0.97247706 0.96629213]
Min F1: 0.9230769230769231
               precision    recall  f1-score   support

Alluvial soil       0.96      0.98      0.97       106
    Clay soil       0.95      0.90      0.92        40
     Red soil       0.95      1.00      0.97        53
   Black Soil       1.00      0.93      0.97        46

     accuracy                           0.96       245
    macro avg       0.96      0.95      0.96       245
 weighted avg       0.96      0.96      0.96       245



100%|██████████| 31/31 [02:31<00:00,  4.90s/it]


Epoch 6 - Loss: 0.0221
F1 per class: [0.97196262 0.92307692 0.98148148 0.97777778]
Min F1: 0.9230769230769231
               precision    recall  f1-score   support

Alluvial soil       0.96      0.98      0.97       106
    Clay soil       0.95      0.90      0.92        40
     Red soil       0.96      1.00      0.98        53
   Black Soil       1.00      0.96      0.98        46

     accuracy                           0.97       245
    macro avg       0.97      0.96      0.96       245
 weighted avg       0.97      0.97      0.97       245



100%|██████████| 31/31 [02:32<00:00,  4.92s/it]


Epoch 7 - Loss: 0.0328
F1 per class: [0.96744186 0.93670886 0.99065421 0.96629213]
Min F1: 0.9367088607594937
               precision    recall  f1-score   support

Alluvial soil       0.95      0.98      0.97       106
    Clay soil       0.95      0.93      0.94        40
     Red soil       0.98      1.00      0.99        53
   Black Soil       1.00      0.93      0.97        46

     accuracy                           0.97       245
    macro avg       0.97      0.96      0.97       245
 weighted avg       0.97      0.97      0.97       245



100%|██████████| 31/31 [02:33<00:00,  4.96s/it]


Epoch 8 - Loss: 0.0322
F1 per class: [0.96296296 0.92307692 1.         0.97777778]
Min F1: 0.9230769230769231
               precision    recall  f1-score   support

Alluvial soil       0.95      0.98      0.96       106
    Clay soil       0.95      0.90      0.92        40
     Red soil       1.00      1.00      1.00        53
   Black Soil       1.00      0.96      0.98        46

     accuracy                           0.97       245
    macro avg       0.97      0.96      0.97       245
 weighted avg       0.97      0.97      0.97       245



100%|██████████| 31/31 [02:28<00:00,  4.79s/it]


Epoch 9 - Loss: 0.0173
F1 per class: [0.97169811 0.96296296 1.         0.96703297]
Min F1: 0.9629629629629629
               precision    recall  f1-score   support

Alluvial soil       0.97      0.97      0.97       106
    Clay soil       0.95      0.97      0.96        40
     Red soil       1.00      1.00      1.00        53
   Black Soil       0.98      0.96      0.97        46

     accuracy                           0.98       245
    macro avg       0.98      0.98      0.98       245
 weighted avg       0.98      0.98      0.98       245



100%|██████████| 31/31 [02:28<00:00,  4.79s/it]


Epoch 10 - Loss: 0.0377
F1 per class: [0.98113208 0.97560976 0.99065421 0.96629213]
Min F1: 0.9662921348314606
               precision    recall  f1-score   support

Alluvial soil       0.98      0.98      0.98       106
    Clay soil       0.95      1.00      0.98        40
     Red soil       0.98      1.00      0.99        53
   Black Soil       1.00      0.93      0.97        46

     accuracy                           0.98       245
    macro avg       0.98      0.98      0.98       245
 weighted avg       0.98      0.98      0.98       245



In [12]:
test_df = pd.read_csv("/kaggle/input/soil-classification/soil_classification-2025/test_ids.csv")

class_names = ["Alluvial soil", "Clay soil", "Red soil", "Black Soil"]

# Dummy label column (not used but required by the dataset class)
test_df["label"] = 0  

# Create dataset and dataloader
test_dataset = SoilDataset(test_df, '/kaggle/input/soil-classification/soil_classification-2025/test', transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Make predictions
model.eval()
test_preds = []
with torch.no_grad():
    for imgs, _ in test_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        _, predicted = torch.max(outputs, 1)
        test_preds.extend(predicted.cpu().numpy())

# Assign predicted class names
test_df["soil_type"] = [class_names[i] for i in test_preds]

# Only include required columns and save to submission file
submission = test_df[["image_id", "soil_type"]]
submission.to_csv("submission.csv", index=False)