In [None]:
!nvidia-smi

In [None]:
%pip install -U albumentations -q

In [None]:
import os
import timm
import torch
import warnings
import torch.nn as nn
import torch.optim as optim
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
from sklearn.model_selection import StratifiedKFold
import numpy as np
from tqdm import tqdm

In [None]:
warnings.filterwarnings("ignore")

In [None]:
# Read in the training dataset
train = pd.read_csv(f"/kaggle/input/lacuna-solar-survey-challenge/Train.csv")

In [None]:
train.head()

In [None]:
# Create a placement mapper
placement_mapper = train[["ID", "placement"]].drop_duplicates().set_index("ID").to_dict()
# Create a "img_origin" mapper
img_origin_mapper = train[["ID", "img_origin"]].drop_duplicates().set_index("ID").to_dict()

# Group by "ID" and sum up boil_nb, pan_nbr
train_df = train.groupby("ID").sum().reset_index()[["ID", "boil_nbr", "pan_nbr"]]

# Map img_origin and placement
train_df["img_origin"] = train_df["ID"].map(img_origin_mapper["img_origin"])
train_df["placement"] = train_df["ID"].map(placement_mapper["placement"])

# Create path column
train_df["path"] = "/kaggle/input/lacuna-solar-survey-challenge/images/" + train_df["ID"] + ".jpg"

In [None]:
train_df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
# Prepare metadata encoders
def prepare_metadata_encoders(train_df):
    # Encode img_origin (D/S)
    img_origin_encoder = LabelEncoder()
    img_origin_encoder.fit(train_df['img_origin'])
    
    # Encode placement (roof/openspace/r_openspace/ground)
    placement_encoder = LabelEncoder()
    placement_encoder.fit(train_df['placement'])
    
    return img_origin_encoder, placement_encoder

In [None]:
# Training Setup with Modified Model
img_origin_encoder, placement_encoder = prepare_metadata_encoders(train_df)

In [None]:
# Stratified KFold based on multi-label targets
train_df["stratify_label"] = train_df[["boil_nbr", "pan_nbr"]].sum(axis=1)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
train_df["fold"] = -1
for fold, (_, valid_idx) in enumerate(skf.split(train_df, train_df["stratify_label"])):
    train_df.loc[valid_idx, "fold"] = fold

In [None]:
# Enhanced Augmentation Pipeline
train_transforms = A.Compose([
    A.Resize(384, 384),
    # Dynamic Spatial Transforms (Geometric)
    A.Rotate(limit=30, p=0.5),              # Rotate ±30 degrees
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    # Color Transforms
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    # Targeted Dropout (Cutout)
    A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.3),  # Randomly drop small regions
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

test_transforms = A.Compose([
    A.Resize(384, 384),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

In [None]:
# Modified Dataset to include metadata
class SolarPanelDataset(Dataset):
    def __init__(self, dataframe, transform=None, to_train=True, 
                 img_origin_encoder=None, placement_encoder=None):
        self.dataframe = dataframe
        self.transform = transform
        self.to_train = to_train
        self.img_origin_encoder = img_origin_encoder
        self.placement_encoder = placement_encoder

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image = cv2.imread(row["path"])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)["image"]

        # Metadata
        img_origin = self.img_origin_encoder.transform([row["img_origin"]])[0]
        placement = self.placement_encoder.transform([row["placement"]])[0]
        metadata = torch.tensor([img_origin, placement], dtype=torch.long)

        if self.to_train:
            target = torch.tensor([row["boil_nbr"], row["pan_nbr"]], dtype=torch.float32)
            return image, metadata, target
        else:
            return image, metadata

In [None]:
# Prepare Dataloaders
fold = 0  # Change fold index as needed
train_data = train_df[train_df["fold"] != fold].reset_index(drop=True)
valid_data = train_df[train_df["fold"] == fold].reset_index(drop=True)

# Update dataloaders with metadata
dataset_train = SolarPanelDataset(
    train_data, 
    transform=train_transforms,
    img_origin_encoder=img_origin_encoder,
    placement_encoder=placement_encoder
)
dataset_valid = SolarPanelDataset(
    valid_data, 
    transform=test_transforms,
    img_origin_encoder=img_origin_encoder,
    placement_encoder=placement_encoder
)

# ensure num_worker = os.cpu_count() // 2
train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True, num_workers=os.cpu_count())
valid_loader = DataLoader(dataset_valid, batch_size=32, shuffle=False)

In [None]:
# Model with Metadata Integration and Fusion
class EfficientNetV2Regressor(nn.Module):
    def __init__(self, num_img_origins=2, num_placements=4):
        super(EfficientNetV2Regressor, self).__init__()
        
        # 2. Image Feature Extraction with EfficientNetV2
        self.backbone = timm.create_model('tf_efficientnetv2_s', pretrained=True)
        # Get the feature dimension from the backbone
        with torch.no_grad():
            # This is a dummy forward pass to get the feature size
            dummy_input = torch.zeros(1, 3, 384, 384)
            features = self.backbone.forward_features(dummy_input)
            self.feature_dim = features.shape[1] * features.shape[2] * features.shape[3]
        
        # Modify the classifier to output features
        self.backbone.global_pool = nn.Identity()
        self.backbone.classifier = nn.Identity()

        # 1. Metadata Integration
        # Embedding layers for categorical variables
        self.img_origin_embed = nn.Embedding(num_img_origins, 8)  # 8-dimensional embedding
        self.placement_embed = nn.Embedding(num_placements, 16)   # 16-dimensional embedding
        
        # Total metadata embedding size
        metadata_dim = 8 + 16  # 24
        
        # 3. Fusion - 2-layer regression head
        self.fusion_layer = nn.Sequential(
            nn.Linear(self.feature_dim + metadata_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 2)  # Output 2 values: boil_nbr, pan_nbr
        )

    def forward(self, x, metadata):
        # Image feature extraction
        features = self.backbone(x)  # Shape: [batch_size, channels, height, width]
        features = features.view(features.size(0), -1)  # Flatten: [batch_size, feature_dim]

        # Metadata embeddings
        img_origin_emb = self.img_origin_embed(metadata[:, 0])  # [batch_size, 8]
        placement_emb = self.placement_embed(metadata[:, 1])    # [batch_size, 16]
        
        # Concatenate metadata embeddings
        metadata_features = torch.cat([img_origin_emb, placement_emb], dim=1)  # [batch_size, 24]

        # Fusion
        combined = torch.cat([features, metadata_features], dim=1)  # [batch_size, feature_dim + 24]
        output = self.fusion_layer(combined)  # [batch_size, 2]
        
        return output

In [None]:
# Initialize model
model = EfficientNetV2Regressor(
    num_img_origins=len(img_origin_encoder.classes_),
    num_placements=len(placement_encoder.classes_)
).cuda()

criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
best_model_path = "best_model_v2.pth"

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
# 1. Data Strategy: Full Cross-Validation Loop
def train_cross_validation(train_df, n_splits=5, num_epochs=50):
    # Prepare metadata encoders
    img_origin_encoder = LabelEncoder()
    placement_encoder = LabelEncoder()
    img_origin_encoder.fit(train_df['img_origin'])
    placement_encoder.fit(train_df['placement'])

    # Stratified K-Fold
    train_df["stratify_label"] = train_df[["boil_nbr", "pan_nbr"]].sum(axis=1)
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    fold_mae_scores = []
    
    for fold, (train_idx, valid_idx) in enumerate(skf.split(train_df, train_df["stratify_label"])):
        print(f"\nTraining Fold {fold + 1}/{n_splits}")
        
        # Split data
        train_data = train_df.iloc[train_idx].reset_index(drop=True)
        valid_data = train_df.iloc[valid_idx].reset_index(drop=True)

        # Create datasets
        dataset_train = SolarPanelDataset(
            train_data, transform=train_transforms,
            img_origin_encoder=img_origin_encoder,
            placement_encoder=placement_encoder
        )
        dataset_valid = SolarPanelDataset(
            valid_data, transform=test_transforms,
            img_origin_encoder=img_origin_encoder,
            placement_encoder=placement_encoder
        )

        # DataLoaders
        train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True, num_workers=os.cpu_count())
        valid_loader = DataLoader(dataset_valid, batch_size=32, shuffle=False)

        # Initialize model
        model = EfficientNetV2Regressor(
            num_img_origins=len(img_origin_encoder.classes_),
            num_placements=len(placement_encoder.classes_)
        ).cuda()
        
        criterion = nn.L1Loss()
        optimizer = optim.Adam(model.parameters(), lr=1e-4)
        best_model_path = f"best_model_fold_{fold}.pth"
        best_loss = float("inf")

        # Training Loop
        for epoch in range(num_epochs):
            model.train()
            epoch_loss = 0.0
            for images, metadata, targets in tqdm(train_loader, desc=f"Fold {fold+1} Epoch {epoch+1}/{num_epochs} - Training"):
                images, metadata, targets = images.cuda(), metadata.cuda(), targets.cuda()
                optimizer.zero_grad()
                outputs = model(images, metadata)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

            # Validation Loop
            model.eval()
            val_loss = 0.0
            preds, true_vals = [], []
            with torch.no_grad():
                for images, metadata, targets in tqdm(valid_loader, desc=f"Fold {fold+1} Epoch {epoch+1}/{num_epochs} - Validation"):
                    images, metadata, targets = images.cuda(), metadata.cuda(), targets.cuda()
                    outputs = model(images, metadata)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item()
                    preds.append(outputs.cpu().numpy())
                    true_vals.append(targets.cpu().numpy())

            val_loss /= len(valid_loader)
            preds = np.concatenate(preds, axis=0)
            true_vals = np.concatenate(true_vals, axis=0)
            mae = mean_absolute_error(true_vals, preds)
            
            print(f"Fold {fold+1} Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}, MAE: {mae:.4f}")

            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(model.state_dict(), best_model_path)

        fold_mae_scores.append(mae)
    
    avg_mae = np.mean(fold_mae_scores)
    print(f"\nCross-Validation Complete! Average MAE across {n_splits} folds: {avg_mae:.4f}")
    return fold_mae_scores, avg_mae

# Execute Cross-Validation
fold_mae_scores, avg_mae = train_cross_validation(train_df, n_splits=5, num_epochs=30)

In [None]:
# Test Set Prediction
test_df = pd.read_csv("/kaggle/input/lacuna-solar-survey-challenge/Test.csv")
test_df["path"] = "/kaggle/input/lacuna-solar-survey-challenge/images/" + test_df["ID"] + ".jpg"

dataset_test = SolarPanelDataset(
    test_df, transform=test_transforms, to_train=False,
    img_origin_encoder=img_origin_encoder,
    placement_encoder=placement_encoder
)
test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False, num_workers=os.cpu_count() // 2)

# Step 1: Load all fold models
num_folds = 5  # Adjust based on your k-fold setup (e.g., 5-fold CV)
models = []
for fold in range(num_folds):
    model = EfficientNetV2Regressor(
        num_img_origins=len(img_origin_encoder.classes_),
        num_placements=len(placement_encoder.classes_)
    ).cuda()
    best_model_path = f"best_model_fold_{fold}.pth"
    model.load_state_dict(torch.load(best_model_path))
    model.eval()  # Set to evaluation mode
    models.append(model)

# Step 2: Ensemble predictions across all fold models
test_preds = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Predicting on Test Set"):
        images, metadata = batch  # Unpack images and metadata
        images = images.cuda()
        metadata = metadata.cuda()  # Move metadata to GPU

        # Get predictions from each model and average them
        batch_preds = []
        for model in models:
            outputs = model(images, metadata).cpu().numpy()  # Pass both inputs to model
            batch_preds.append(outputs)
        
        # Average predictions across all models for this batch
        batch_preds = np.stack(batch_preds, axis=0)  # Shape: (num_models, batch_size, output_dim)
        ensemble_batch_pred = np.mean(batch_preds, axis=0)  # Shape: (batch_size, output_dim)
        test_preds.append(ensemble_batch_pred)

# Step 3: Concatenate predictions across all batches
test_preds = np.concatenate(test_preds, axis=0)
print(f"Test predictions shape: {test_preds.shape}")

In [None]:
# Create Submission
submission = pd.DataFrame()
submission["ID"] = np.repeat(test_df["ID"].values, 2)
submission["ID"] = submission["ID"] + np.tile(["_boil", "_pan"], len(test_df))
submission["Target"] = test_preds.flatten().clip(0,1000)

# Save Submission
submission.to_csv("Submission.csv", index=False)
print("Submission file saved!")