In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/soil-classification/soil_classification-2025/sample_submission.csv
/kaggle/input/soil-classification/soil_classification-2025/train_labels.csv
/kaggle/input/soil-classification/soil_classification-2025/test_ids.csv
/kaggle/input/soil-classification/soil_classification-2025/test/img_0f035b97.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_f13af256.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_15b41dbc.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_cfb4fc7a.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_683111fb.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_c4bd7b3e.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_4ccce0f8.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_86faa98d.jpg
/kaggle/input/soil-classification/soil_classification-2025/test/img_c448342c.jpg
/kaggle/input/soil-classification/soil_cla

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset, WeightedRandomSampler
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from tqdm import tqdm
from torchvision.models import ResNet50_Weights

torch.manual_seed(42)
np.random.seed(42)

In [3]:
INPUT_PATH = '/kaggle/input/soil-classification/soil_classification-2025/'
OUTPUT_PATH = '/kaggle/working'

TRAIN_CSV = os.path.join(INPUT_PATH, 'train_labels.csv')
TRAIN_IMG_DIR = os.path.join(INPUT_PATH, 'train')
TEST_CSV = os.path.join(INPUT_PATH, 'test_ids.csv')
TEST_IMG_DIR = os.path.join(INPUT_PATH, 'test')

In [None]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
class SoilDataset(Dataset):
    def __init__(self, img_dir, csv_file, transform=None):
        self.img_dir = img_dir
        self.df = pd.read_csv(csv_file)
        self.transform = transform
        self.classes = sorted(self.df['soil_type'].unique())
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.iloc[idx]['image_id'])
        image = Image.open(img_path).convert('RGB')
        label = self.class_to_idx[self.df.iloc[idx]['soil_type']] 
        if self.transform:
            image = self.transform(image) 
        return image, label

class TestDataset(Dataset):
    def __init__(self, img_dir, csv_file, transform=None):
        self.img_dir = img_dir
        self.df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.iloc[idx]['image_id'])
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        return image, self.df.iloc[idx]['image_id']

In [6]:
full_dataset = SoilDataset(TRAIN_IMG_DIR, TRAIN_CSV, train_transform)

indices = np.arange(len(full_dataset))
stratify_labels = full_dataset.df['soil_type'].values

train_idx, val_idx = train_test_split(
    indices,
    test_size=0.2,
    stratify=stratify_labels,
    random_state=42
)

train_dataset = Subset(full_dataset, train_idx)
val_dataset = Subset(full_dataset, val_idx)
val_dataset.dataset.transform = val_transform

In [7]:
class_counts = full_dataset.df['soil_type'].value_counts().sort_index()
class_weights = 1. / torch.tensor(class_counts.values, dtype=torch.float)

train_labels = stratify_labels[train_idx]
sample_weights = class_weights[torch.tensor([
    full_dataset.class_to_idx[cls] for cls in train_labels
])]

sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

In [8]:
BATCH_SIZE = 64

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    sampler=sampler,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

In [None]:
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, len(full_dataset.classes))

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 174MB/s]


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights = class_weights.to(device)
model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, len(full_dataset.classes))
model = model.to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=3)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 167MB/s]


In [11]:
best_f1 = 0.0
NUM_EPOCHS = 25

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0
    for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{NUM_EPOCHS}'):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    val_f1 = f1_score(all_labels, all_preds, average=None)
    min_f1 = np.min(val_f1)
    
    scheduler.step(min_f1)
    if min_f1 > best_f1:
        best_f1 = min_f1
        torch.save(model.state_dict(), os.path.join(OUTPUT_PATH, 'best_model.pth'))
    
    print(f'\nEpoch {epoch+1}')
    print(f'Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}')
    print('Class-wise F1 Scores:')
    for cls, score in zip(full_dataset.classes, val_f1):
        print(f'  {cls}: {score:.4f}')
    print(f'Minimum F1: {min_f1:.4f}')

Epoch 1/25: 100%|██████████| 16/16 [00:11<00:00,  1.44it/s]



Epoch 1
Train Loss: 0.9381 | Val Loss: 0.6969
Class-wise F1 Scores:
  Alluvial soil: 0.7746
  Black Soil: 0.8824
  Clay soil: 0.8125
  Red soil: 0.8403
Minimum F1: 0.7746


Epoch 2/25: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]



Epoch 2
Train Loss: 0.3031 | Val Loss: 0.2481
Class-wise F1 Scores:
  Alluvial soil: 0.8222
  Black Soil: 0.9677
  Clay soil: 0.8511
  Red soil: 0.8618
Minimum F1: 0.8222


Epoch 3/25: 100%|██████████| 16/16 [00:10<00:00,  1.60it/s]



Epoch 3
Train Loss: 0.1237 | Val Loss: 0.1366
Class-wise F1 Scores:
  Alluvial soil: 0.9400
  Black Soil: 0.9574
  Clay soil: 0.8989
  Red soil: 0.9720
Minimum F1: 0.8989


Epoch 4/25: 100%|██████████| 16/16 [00:10<00:00,  1.59it/s]



Epoch 4
Train Loss: 0.0566 | Val Loss: 0.1032
Class-wise F1 Scores:
  Alluvial soil: 0.9557
  Black Soil: 0.9787
  Clay soil: 0.9302
  Red soil: 0.9907
Minimum F1: 0.9302


Epoch 5/25: 100%|██████████| 16/16 [00:10<00:00,  1.57it/s]



Epoch 5
Train Loss: 0.0562 | Val Loss: 0.0958
Class-wise F1 Scores:
  Alluvial soil: 0.9505
  Black Soil: 0.9787
  Clay soil: 0.9195
  Red soil: 0.9907
Minimum F1: 0.9195


Epoch 6/25: 100%|██████████| 16/16 [00:09<00:00,  1.61it/s]



Epoch 6
Train Loss: 0.0590 | Val Loss: 0.0882
Class-wise F1 Scores:
  Alluvial soil: 0.9659
  Black Soil: 0.9787
  Clay soil: 0.9412
  Red soil: 1.0000
Minimum F1: 0.9412


Epoch 7/25: 100%|██████████| 16/16 [00:10<00:00,  1.51it/s]



Epoch 7
Train Loss: 0.0466 | Val Loss: 0.0873
Class-wise F1 Scores:
  Alluvial soil: 0.9659
  Black Soil: 0.9684
  Clay soil: 0.9524
  Red soil: 0.9811
Minimum F1: 0.9524


Epoch 8/25: 100%|██████████| 16/16 [00:10<00:00,  1.46it/s]



Epoch 8
Train Loss: 0.0259 | Val Loss: 0.0878
Class-wise F1 Scores:
  Alluvial soil: 0.9758
  Black Soil: 0.9583
  Clay soil: 0.9756
  Red soil: 0.9905
Minimum F1: 0.9583


Epoch 9/25: 100%|██████████| 16/16 [00:11<00:00,  1.45it/s]



Epoch 9
Train Loss: 0.0515 | Val Loss: 0.1011
Class-wise F1 Scores:
  Alluvial soil: 0.9808
  Black Soil: 0.9583
  Clay soil: 0.9877
  Red soil: 0.9905
Minimum F1: 0.9583


Epoch 10/25: 100%|██████████| 16/16 [00:11<00:00,  1.43it/s]



Epoch 10
Train Loss: 0.0161 | Val Loss: 0.0688
Class-wise F1 Scores:
  Alluvial soil: 0.9758
  Black Soil: 0.9787
  Clay soil: 0.9639
  Red soil: 1.0000
Minimum F1: 0.9639


Epoch 11/25: 100%|██████████| 16/16 [00:11<00:00,  1.34it/s]



Epoch 11
Train Loss: 0.0381 | Val Loss: 0.0588
Class-wise F1 Scores:
  Alluvial soil: 0.9856
  Black Soil: 0.9787
  Clay soil: 0.9877
  Red soil: 1.0000
Minimum F1: 0.9787


Epoch 12/25: 100%|██████████| 16/16 [00:12<00:00,  1.31it/s]



Epoch 12
Train Loss: 0.0404 | Val Loss: 0.0727
Class-wise F1 Scores:
  Alluvial soil: 0.9856
  Black Soil: 0.9787
  Clay soil: 0.9877
  Red soil: 1.0000
Minimum F1: 0.9787


Epoch 13/25: 100%|██████████| 16/16 [00:12<00:00,  1.27it/s]



Epoch 13
Train Loss: 0.0200 | Val Loss: 0.0593
Class-wise F1 Scores:
  Alluvial soil: 0.9856
  Black Soil: 0.9787
  Clay soil: 0.9877
  Red soil: 1.0000
Minimum F1: 0.9787


Epoch 14/25: 100%|██████████| 16/16 [00:12<00:00,  1.31it/s]



Epoch 14
Train Loss: 0.0233 | Val Loss: 0.0568
Class-wise F1 Scores:
  Alluvial soil: 0.9758
  Black Soil: 0.9787
  Clay soil: 0.9639
  Red soil: 1.0000
Minimum F1: 0.9639


Epoch 15/25: 100%|██████████| 16/16 [00:11<00:00,  1.38it/s]



Epoch 15
Train Loss: 0.0228 | Val Loss: 0.0574
Class-wise F1 Scores:
  Alluvial soil: 0.9905
  Black Soil: 0.9684
  Clay soil: 1.0000
  Red soil: 0.9905
Minimum F1: 0.9684


Epoch 16/25: 100%|██████████| 16/16 [00:11<00:00,  1.38it/s]



Epoch 16
Train Loss: 0.0104 | Val Loss: 0.0684
Class-wise F1 Scores:
  Alluvial soil: 0.9905
  Black Soil: 0.9684
  Clay soil: 1.0000
  Red soil: 0.9905
Minimum F1: 0.9684


Epoch 17/25: 100%|██████████| 16/16 [00:11<00:00,  1.35it/s]



Epoch 17
Train Loss: 0.0135 | Val Loss: 0.0612
Class-wise F1 Scores:
  Alluvial soil: 0.9808
  Black Soil: 0.9684
  Clay soil: 0.9756
  Red soil: 0.9905
Minimum F1: 0.9684


Epoch 18/25: 100%|██████████| 16/16 [00:12<00:00,  1.31it/s]



Epoch 18
Train Loss: 0.0058 | Val Loss: 0.0755
Class-wise F1 Scores:
  Alluvial soil: 0.9808
  Black Soil: 0.9684
  Clay soil: 0.9756
  Red soil: 0.9905
Minimum F1: 0.9684


Epoch 19/25: 100%|██████████| 16/16 [00:12<00:00,  1.30it/s]



Epoch 19
Train Loss: 0.0060 | Val Loss: 0.0606
Class-wise F1 Scores:
  Alluvial soil: 0.9758
  Black Soil: 0.9684
  Clay soil: 0.9639
  Red soil: 0.9905
Minimum F1: 0.9639


Epoch 20/25: 100%|██████████| 16/16 [00:12<00:00,  1.33it/s]



Epoch 20
Train Loss: 0.0053 | Val Loss: 0.0612
Class-wise F1 Scores:
  Alluvial soil: 0.9758
  Black Soil: 0.9684
  Clay soil: 0.9639
  Red soil: 0.9905
Minimum F1: 0.9639


Epoch 21/25: 100%|██████████| 16/16 [00:11<00:00,  1.37it/s]



Epoch 21
Train Loss: 0.0117 | Val Loss: 0.0687
Class-wise F1 Scores:
  Alluvial soil: 0.9758
  Black Soil: 0.9684
  Clay soil: 0.9639
  Red soil: 0.9905
Minimum F1: 0.9639


Epoch 22/25: 100%|██████████| 16/16 [00:11<00:00,  1.34it/s]



Epoch 22
Train Loss: 0.0094 | Val Loss: 0.0673
Class-wise F1 Scores:
  Alluvial soil: 0.9758
  Black Soil: 0.9684
  Clay soil: 0.9639
  Red soil: 0.9905
Minimum F1: 0.9639


Epoch 23/25: 100%|██████████| 16/16 [00:11<00:00,  1.34it/s]



Epoch 23
Train Loss: 0.0128 | Val Loss: 0.0645
Class-wise F1 Scores:
  Alluvial soil: 0.9808
  Black Soil: 0.9684
  Clay soil: 0.9756
  Red soil: 0.9905
Minimum F1: 0.9684


Epoch 24/25: 100%|██████████| 16/16 [00:12<00:00,  1.32it/s]



Epoch 24
Train Loss: 0.0065 | Val Loss: 0.0583
Class-wise F1 Scores:
  Alluvial soil: 0.9905
  Black Soil: 0.9684
  Clay soil: 1.0000
  Red soil: 0.9905
Minimum F1: 0.9684


Epoch 25/25: 100%|██████████| 16/16 [00:12<00:00,  1.32it/s]



Epoch 25
Train Loss: 0.0053 | Val Loss: 0.0689
Class-wise F1 Scores:
  Alluvial soil: 0.9808
  Black Soil: 0.9684
  Clay soil: 0.9756
  Red soil: 0.9905
Minimum F1: 0.9684


In [12]:
model.load_state_dict(torch.load(os.path.join(OUTPUT_PATH, 'best_model.pth')))
model.eval()

test_dataset = TestDataset(TEST_IMG_DIR, TEST_CSV, val_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

predictions = []
image_ids = []
confidences = []

with torch.no_grad():
    for images, ids in tqdm(test_loader):
        images = images.to(device)
        outputs = model(images)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        conf, preds = torch.max(probs, 1)
        
        predictions.extend(preds.cpu().numpy())
        image_ids.extend(ids)
        confidences.extend(conf.cpu().numpy())

results = pd.DataFrame({
    'image_id': image_ids,
    'soil_type': [full_dataset.classes[p] for p in predictions]
})

results.to_csv(os.path.join(OUTPUT_PATH, 'submission.csv'), index=False)

100%|██████████| 6/6 [00:04<00:00,  1.21it/s]
