In [None]:
# I have already mounted my google drive with colab and images for test data set is already present in my drive, for new test data set, we have to generate new images first.
# GENERATING TEST PREDICTIONS


import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import numpy as np
import pickle
import os
from tqdm import tqdm

torch.serialization.add_safe_globals([np.core.multiarray.scalar])

print("="*60)
print("GENERATING TEST PREDICTIONS")
print("="*60)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
PROJECT_DIR = '/content/drive/MyDrive/Real_Estate_Project/'


# Step1: LOADING PREPROCESSING DATA FROM DRIVE


print("\nLoading preprocessing data...")
preprocessed_path = os.path.join(PROJECT_DIR, 'preprocessed_data.pkl')
with open(preprocessed_path, 'rb') as f:
    data_dict = pickle.load(f)

X_test = data_dict['X_test']
test_property_ids = data_dict['test_property_ids']
IMAGE_PATH_MAP = data_dict['IMAGE_PATH_MAP']
val_test_transform = data_dict['val_test_transform']
IMG_SIZE = data_dict['IMG_SIZE']

print(f"Data loaded: {len(X_test)} test samples")


# STEP2: DEFINING OUR MODEL, SAME AS TRAINING


print("\nDefining model architecture...")

class MultimodalRealEstateModel(nn.Module):
    def __init__(self, num_tabular_features, dropout_rate=0.3):
        super(MultimodalRealEstateModel, self).__init__()

        self.cnn = models.resnet18(pretrained=False)
        num_cnn_features = self.cnn.fc.in_features
        self.cnn = nn.Sequential(*list(self.cnn.children())[:-1])

        self.image_fc = nn.Sequential(
            nn.Linear(num_cnn_features, 256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )

        self.tabular_fc = nn.Sequential(
            nn.Linear(num_tabular_features, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(dropout_rate),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 32),
            nn.ReLU()
        )

        self.fusion = nn.Sequential(
            nn.Linear(128 + 32, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(32, 1)
        )

    def forward(self, image, tabular):
        img_features = self.cnn(image)
        img_features = img_features.view(img_features.size(0), -1)
        img_features = self.image_fc(img_features)
        tab_features = self.tabular_fc(tabular)
        combined = torch.cat([img_features, tab_features], dim=1)
        return self.fusion(combined)


# STEP3: LOADING OUR BEST TRAINED MODEL FROM DRIVE


print("\nLoading trained model...")

num_features = X_test.shape[1]
model = MultimodalRealEstateModel(num_features)


if os.path.exists(os.path.join(PROJECT_DIR, 'best_model.pth')):
    model_path = os.path.join(PROJECT_DIR, 'best_model.pth')
else:
    raise FileNotFoundError("No trained model found! Train the model first.")

checkpoint = torch.load(model_path, map_location=device, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
model.eval()

print(f"Model loaded from: {model_path}")
print(f"  Validation RMSE: ${checkpoint['val_rmse']:,.2f}")
print(f"  Validation RÂ²: {checkpoint['val_r2']:.4f}")


# STEP4. CREATING OUR TEST DATASET, WE CAN DO THIS STEP BEFORE LOADING THE BEST MODEL

print("\nCreating test dataset...")

class TestDataset(Dataset):
    def __init__(self, tabular_data, property_ids, image_path_map, transform):
        self.tabular_data = torch.FloatTensor(tabular_data.values)
        self.property_ids = property_ids
        self.image_path_map = image_path_map
        self.transform = transform
        self.blank_image = Image.new('RGB', (IMG_SIZE, IMG_SIZE), color=(128, 128, 128))

    def __len__(self):
        return len(self.property_ids)

    def __getitem__(self, idx):
        tabular = self.tabular_data[idx]
        property_id = self.property_ids[idx]

        img_path = self.image_path_map.get(property_id)

        if img_path and os.path.exists(img_path):
            try:
                image = Image.open(img_path).convert('RGB')
            except:
                image = self.blank_image
        else:
            image = self.blank_image

        if self.transform:
            image = self.transform(image)

        return image, tabular, property_id

test_dataset = TestDataset(X_test, test_property_ids, IMAGE_PATH_MAP, val_test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

print(f"Test dataset ready: {len(test_dataset)} samples")


# STEP5. GENERATINF TEST PREDICTIONS


print("\nGenerating predictions...")

test_predictions = []
test_ids = []

with torch.no_grad():
    for images, tabular, property_ids in tqdm(test_loader, desc="Predicting"):
        images = images.to(device)
        tabular = tabular.to(device)

        outputs = model(images, tabular)

        test_predictions.extend(outputs.cpu().numpy().flatten())
        test_ids.extend(property_ids.numpy())

print(f"Generated {len(test_predictions)} predictions")


# 6. CREATING & SAVING OUR SUBMISSION FILE IN DRIVE IN CSV FORMAT


print("\nCreating submission file...")

submission_df = pd.DataFrame({
    'id': test_ids,
    'predicted_price': test_predictions
})


submission_df = submission_df.sort_values('id').reset_index(drop=True)


submission_path = os.path.join(PROJECT_DIR, 'test_predictions.csv')
submission_df.to_csv(submission_path, index=False)


# 7. DISPLAYING RESULTS WITH SUMMARY


print("\n" + "="*60)
print("SUCCESSFULY! PREDICTIONS COMPLETE")
print("="*60)

print(f"\nFile saved: {submission_path}")
print(f"Total predictions: {len(submission_df)}")

print(f"\nPrice Statistics:")
print(f"  Mean:   ${submission_df['predicted_price'].mean():>12,.2f}")
print(f"  Median: ${submission_df['predicted_price'].median():>12,.2f}")
print(f"  Min:    ${submission_df['predicted_price'].min():>12,.2f}")
print(f"  Max:    ${submission_df['predicted_price'].max():>12,.2f}")
print(f"  Std:    ${submission_df['predicted_price'].std():>12,.2f}")

print("\n" + "="*60)
print("Submission file is ready in drive!")
print("="*60)

