In [18]:
# configuring the path of Kaggle.json file
!pip install kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [19]:
# 1. Imports and Setup
import os
import numpy as np
import pandas as pd
import torch
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler

In [20]:
# 2. Paths
train_csv = '/kaggle/input/soil-classification-part-2/soil_competition-2025/train_labels.csv'
test_csv = '/kaggle/input/soil-classification-part-2/soil_competition-2025/test_ids.csv'
train_dir = '/kaggle/input/soil-classification-part-2/soil_competition-2025/train'
test_dir = '/kaggle/input/soil-classification-part-2/soil_competition-2025/test'


In [21]:
# 3. Read CSVs
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

In [22]:
# 4.Preprocessing: resize and normalize for ResNet
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  # Normalize to [-1, 1] range
])

In [23]:

# 5. Custom Dataset

class SoilDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform):
        self.df = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_id = self.df.iloc[idx]['image_id']
        image_path = os.path.join(self.img_dir, image_id)
        image = Image.open(image_path).convert("RGB")
        image = self.transform(image)
        return image, image_id
train_dataset = SoilDataset(train_df, train_dir, transform)
test_dataset = SoilDataset(test_df, test_dir, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [24]:
#6. Load pretrained ResNet18 and remove the classifier head
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = models.resnet18(pretrained=True)
resnet.fc = torch.nn.Identity()  # remove final layer
resnet = resnet.to(device)
resnet.eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [25]:
# 7. Feature extraction function
def extract_features(dataloader):
    features = []
    ids = []
    with torch.no_grad():
        for images, image_ids in tqdm(dataloader):
            images = images.to(device)
            feats = resnet(images).cpu().numpy()
            features.append(feats)
            ids.extend(image_ids)
    return np.vstack(features), ids

In [26]:
# 8. Extract features for training and testing
train_features, _ = extract_features(train_loader)
test_features, test_ids = extract_features(test_loader)

100%|██████████| 39/39 [00:11<00:00,  3.46it/s]
100%|██████████| 31/31 [00:03<00:00,  7.85it/s]


In [27]:
# 9. Normalize features before feeding to SVM
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

In [28]:
# 10. Fit One-Class SVM on soil-only training data
svm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.1)  # nu ≈ expected fraction of outliers
svm.fit(train_features)

# Predict on test set
# Output: 1 = inlier (soil), -1 = outlier (non-soil)
svm_preds = svm.predict(test_features)
binary_preds = [1 if p == 1 else 0 for p in svm_preds]  # Convert to 1/0

In [29]:
# 11. Save Submission
submission = pd.DataFrame({
    'image_id': test_ids,
    'label': binary_preds
})
submission.to_csv('submission.csv', index=False)
print(" Submission file saved as 'submission.csv'")

 Submission file saved as 'submission.csv'
