In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
from tqdm.auto import tqdm
from time import time

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42
EXTRA_LAYER = True
BATCH_SIZE = 256
EPOCHS = 10
LEARNING_RATE = 1e-4
IMG_SIZE = 224
NUM_FROZEN_LAYERS = 5

print(f"Using device: {device}")

Using device: cuda


In [4]:
dataset_path = 'dataset'
train_csv = pd.read_csv(os.path.join(dataset_path, 'train_data.csv'))
val_csv = pd.read_csv(os.path.join(dataset_path, 'val_data.csv'))
test_csv = pd.read_csv(os.path.join(dataset_path, 'test_data.csv'))

In [5]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, satellite_img_dir, street_img_dir, label, satellite_transform=None, street_transform=None):
        self.data = csv_file
        self.street_img_dir = street_img_dir
        self.satellite_img_dir = satellite_img_dir
        self.label = label
        self.satellite_transform = satellite_transform
        self.street_transform = street_transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        street_image_path = os.path.join(self.street_img_dir, self.data['Filename'].iloc[idx])
        satellite_image_path = os.path.join(self.satellite_img_dir, self.data['Normalized_Filename'].iloc[idx]+'.jpg' )
        
        street_image = self.load_image(street_image_path, self.street_transform)
        satellite_image = self.load_image(satellite_image_path, self.satellite_transform)
        label = torch.tensor(self.data[self.label].iloc[idx], dtype=torch.float32)
        
        return street_image, satellite_image, label
        
    
    def load_image(self, file_path, transform=None):
        img = Image.open(file_path)
        if transform:
            img = transform(img)
        return img

In [6]:
image_net_means = [0.485, 0.456, 0.406]
image_net_stds = [0.229, 0.224, 0.225]

data_transforms = {
    'dev': transforms.Compose([
        transforms.ToTensor(),  # Converts (H, W, C) to (C, H, W)
        transforms.Normalize(tuple(image_net_means), tuple(image_net_stds))
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),  # Converts (H, W, C) to (C, H, W)
        transforms.Normalize(tuple(image_net_means), tuple(image_net_stds))
    ])
}   
    
    
satellite_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(),  
    transforms.RandomVerticalFlip(),
    transforms.Normalize(tuple(image_net_means), tuple(image_net_stds))
])

street_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(), 
    transforms.Normalize(tuple(image_net_means), tuple(image_net_stds)),
])
    

In [7]:
train_dataset = CustomDataset(train_csv, os.path.join(dataset_path, 'satellite_images'), os.path.join(dataset_path, 'All_img'), 'AQI', satellite_transform, street_transform)

val_dataset = CustomDataset(val_csv, os.path.join(dataset_path, 'satellite_images'), os.path.join(dataset_path, 'All_img'), 'AQI', data_transforms['dev'], data_transforms['dev'])

test_dataset = CustomDataset(test_csv, os.path.join(dataset_path, 'satellite_images'), os.path.join(dataset_path, 'All_img'), 'AQI', data_transforms['test'], data_transforms['test'])


In [8]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
# data = next(iter(train_loader))

In [9]:
len(train_loader), len(val_loader), len(test_loader)


(31, 8, 10)

In [10]:
# temp = next(iter(train_loader))
# print(temp[0].shape, temp[1].shape, temp[2].shape)

In [11]:
class BaseResnet18(nn.Module):
    """
    Base encoder model
    """
    
    def __init__(self, no_channels=3, dropout=0.5, add_block=False, num_frozen=0):
        
        super(BaseResnet18, self).__init__()

        self.add_block = add_block
        self.num_frozen = num_frozen

        self.model= models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        if no_channels != 3:
            self.model.conv1 = nn.Conv2d(no_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

        if self.add_block:
            self.addition_block = nn.Sequential(
                nn.Linear(in_features=1000, out_features=1000),
                nn.BatchNorm1d(1000),
                # nn.LayerNorm(1000),
                nn.Dropout(dropout),
                nn.Linear(in_features=1000, out_features=1000)
            )
    
        self.final_layers = nn.Sequential(
            nn.Linear(in_features=1000, out_features=512),
            nn.BatchNorm1d(512),
            nn.Dropout(dropout),
            nn.Linear(in_features=512, out_features=512)
        )
        
        self.freeze_layers()
        
        
    def freeze_layers(self):
        """
        Freeze the first `num_frozen` layers of the model
        """
        assert 0 <= self.num_frozen <= len(list(self.model.children())), \
            f"Number of frozen layers should be between 0 and {len(list(self.model.children()))}"
        
        for i, child in enumerate(self.model.children()):
            if i < self.num_frozen:
                for param in child.parameters():
                    param.requires_grad = False
            else:
                break
        print(f"Number of frozen layers: {self.num_frozen}")
        
    def forward(self, x):
        
        x = self.model(x)
        if self.add_block:
            x = self.addition_block(x)
        x = self.final_layers(x)
        
        return x



In [12]:

class ResnetRegression(nn.Module):
    """
    Regression model
    """
    
    def __init__(self, no_channels=3, dropout=0.5, add_block=False, num_frozen=0):
        
        super(ResnetRegression, self).__init__()
        
        self.encoder = BaseResnet18(no_channels=no_channels, dropout=dropout, add_block=add_block, num_frozen=num_frozen)
        self.encoder.final_layers[3] = nn.Linear(in_features=512, out_features=1)
        
    def forward(self, x):
    
        x = self.encoder(x)
        return x
    

class ResnetClassification(nn.Module):
    """
    Classification model
    """
    
    def __init__(self, no_channels=3, num_classes=3, dropout=0.5, add_block=False, num_frozen=0):
        
        super(ResnetClassification, self).__init__()
        
        self.encoder = BaseResnet18(no_channels=no_channels, dropout=dropout, add_block=add_block, num_frozen=num_frozen)
        self.encoder.final_layers[3] = nn.Linear(in_features=512, out_features=num_classes)
        
    def forward(self, x):
    
        x = self.encoder(x)
        return x
    


In [13]:
class AQIPrediction(nn.Module):
    """
    Unified model
    """
    
    def __init__(self, satellite_model, street_model, dropout=0.5, num_classes=None):
        
        super(AQIPrediction, self).__init__()
        
        self.satellite_model = satellite_model
        self.street_model = street_model
        
        self.final_layers = nn.Sequential(
            nn.Linear(512 + 512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(512, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 1)
        )
        if num_classes:
            self.final_layers[-1] = nn.Linear(128, num_classes)
            
    def forward(self, street_img, satellite_img):
        
        street_features = self.street_model(street_img)
        satellite_features = self.satellite_model(satellite_img)
        
        features = torch.cat((street_features, satellite_features), dim=1)
        output = self.final_layers(features)
        
        return output
        

In [14]:
def loss_fn_regression(outputs, targets):
    """
    Loss function for regression
    """
    return nn.MSELoss()(outputs, targets)


def loss_fn_classification(outputs, targets):
    """
    Loss function for classification
    """

    return nn.CrossEntropyLoss()(outputs, targets)


def accuracy(outputs, targets):
    """
    Accuracy function
    """

    return (outputs.argmax(1) == targets).float().mean()


def rmse(outputs, targets):
    """
    RMSE function
    """

    return torch.sqrt(nn.MSELoss()(outputs, targets))


def mae(outputs, targets):
    """
    MAE function
    """

    return nn.L1Loss()(outputs, targets)


In [15]:
def train_fn(model, optimizer, scheduler, data_loader, device, is_classification=False):
    """
    Training function
    """

    model.train()
    final_loss = 0
    final_acc = 0
    final_rmse = 0
    final_mae = 0

    for i, data in tqdm(enumerate(data_loader), total=len(data_loader)):
                
        street_img, satellite_img, targets = data
        street_img = street_img.to(device)
        satellite_img = satellite_img.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        if is_classification:
            outputs = model(street_img, satellite_img)
            loss = loss_fn_classification(outputs, targets)
            acc = accuracy(outputs, targets)
            final_acc += acc.item()
        else:
            outputs = model(street_img, satellite_img)
            targets = targets.unsqueeze(1)
            loss = loss_fn_regression(outputs, targets)

        rmse_score = rmse(outputs, targets)
        mae_score = mae(outputs, targets)

        loss.backward()
        optimizer.step()
        scheduler.step()

        final_loss += loss.item()
        
        final_rmse += rmse_score.item()
        final_mae += mae_score.item()
        
        print(f"Batch: {i+1}/{len(data_loader)}, Loss: {final_loss / (i+1):.4f}, RMSE: {final_rmse / (i+1):.4f}, MAE: {final_mae / (i+1):.4f}", end='\r')

    return final_loss / len(data_loader), final_acc / len(data_loader), final_rmse / len(data_loader), final_mae / len(data_loader)

In [16]:

def eval_fn(model, data_loader, device, is_classification=False):
    """
    Evaluation function
    """
    
    model.eval()
    final_loss = 0
    final_acc = 0
    final_rmse = 0
    final_mae = 0

    for data in tqdm(data_loader, total=len(data_loader)):
        street_img, satellite_img, targets = data
        street_img = street_img.to(device)
        satellite_img = satellite_img.to(device)
        targets = targets.to(device)

        with torch.inference_mode():
            if is_classification:
                outputs = model(street_img, satellite_img)
                loss = loss_fn_classification(outputs, targets)
                acc = accuracy(outputs, targets)
            else:
                outputs = model(street_img, satellite_img)
                targets = targets.unsqueeze(1)
                loss = loss_fn_regression(outputs, targets)
                acc = torch.tensor(0, requires_grad=False)

            rmse_score = rmse(outputs, targets)
            mae_score = mae(outputs, targets)

            final_loss += loss.item()
            final_acc += acc.item()
            final_rmse += rmse_score.item()
            final_mae += mae_score.item()

    return final_loss / len(data_loader), final_acc / len(data_loader), final_rmse / len(data_loader), final_mae / len(data_loader)

In [17]:
def train():

    satellite_encoder = BaseResnet18(no_channels=3, dropout=0.5, add_block=EXTRA_LAYER, num_frozen=NUM_FROZEN_LAYERS)
    street_encoder = BaseResnet18(no_channels=3, dropout=0.5, add_block=EXTRA_LAYER, num_frozen=NUM_FROZEN_LAYERS)
    model = AQIPrediction(satellite_encoder, street_encoder, dropout=0.5, num_classes=None)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=LEARNING_RATE, steps_per_epoch=len(train_loader), epochs=EPOCHS)

    losses = []
    accuracies = []
    rmse_scores = []
    mae_scores = []
    
    best_loss = np.inf
    best_acc = 0
    best_rmse = np.inf
    best_mae = np.inf

    for epoch in tqdm(range(EPOCHS)):
        train_loss, train_acc, train_rmse, train_mae = train_fn(model, optimizer, scheduler, train_loader, device, is_classification=False)
        val_loss, val_acc, val_rmse, val_mae = eval_fn(model, val_loader, device, is_classification=False)

        print(f"Epoch: {epoch + 1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Train RMSE: {train_rmse:.4f}, Train MAE: {train_mae:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val RMSE: {val_rmse:.4f}, Val MAE: {val_mae:.4f}")

        losses.append((train_loss, val_loss))
        accuracies.append((train_acc, val_acc))
        rmse_scores.append((train_rmse, val_rmse))
        mae_scores.append((train_mae, val_mae))
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_acc = val_acc
            best_rmse = val_rmse
            best_mae = val_mae
            torch.save(model.state_dict(), "best_model.pth")

    print(f"Best Val Loss: {best_loss:.4f}, Best Val Acc: {best_acc:.4f}, Best Val RMSE: {best_rmse:.4f}, Best Val MAE: {best_mae:.4f}")

    model.load_state_dict(torch.load("best_model.pth"))
    test_loss, test_acc, test_rmse, test_mae = eval_fn(model, test_loader, device, is_classification=False)
    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}, Test RMSE: {test_rmse:.4f}, Test MAE: {test_mae:.4f}")
    
    return losses, accuracies, rmse_scores, mae_scores

In [18]:
losses, accuracies, rmse_scores, mae_scores = train()

Number of frozen layers: 5
Number of frozen layers: 5


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Batch: 31/31, Loss: 38564.1100, RMSE: 196.2477, MAE: 167.6263

  0%|          | 0/8 [00:00<?, ?it/s]

Epoch: 1/10, Train Loss: 38564.1100, Train Acc: 0.0000, Train RMSE: 196.2477, Train MAE: 167.6263, Val Loss: 39089.4199, Val Acc: 0.0000, Val RMSE: 197.6011, Val MAE: 168.8591


  0%|          | 0/31 [00:00<?, ?it/s]

Batch: 31/31, Loss: 38378.4477, RMSE: 195.7969, MAE: 167.5866

  0%|          | 0/8 [00:00<?, ?it/s]

Epoch: 2/10, Train Loss: 38378.4477, Train Acc: 0.0000, Train RMSE: 195.7969, Train MAE: 167.5866, Val Loss: 38574.4761, Val Acc: 0.0000, Val RMSE: 196.2983, Val MAE: 168.8454


  0%|          | 0/31 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
def plot_metrics(losses, accuracies, rmse_scores, mae_scores):
    """
    Plot metrics
    """

    fig, ax = plt.subplots(2, 2, figsize=(20, 15))

    ax[0, 0].plot(losses)
    ax[0, 0].set_title("Loss")
    ax[0, 0].legend(["Train", "Val"])

    ax[0, 1].plot(accuracies)
    ax[0, 1].set_title("Accuracy")
    ax[0, 1].legend(["Train", "Val"])

    ax[1, 0].plot(rmse_scores)
    ax[1, 0].set_title("RMSE")
    ax[1, 0].legend(["Train", "Val"])

    ax[1, 1].plot(mae_scores)
    ax[1, 1].set_title("MAE")
    ax[1, 1].legend(["Train", "Val"])

    plt.show()
    
plot_metrics(losses, accuracies, rmse_scores, mae_scores)