### APPROACH 1 FOR TRANSFER LEARNING:
In this approach, we load a pre-trained model (trained on the France dataset), freeze the base layers, and only fine-tune the classifier layer using the Egypt dataset. This allows us to leverage learned features from the source domain and adapt the model efficiently to the new target domain with limited training.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class EmissionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.base = nn.Sequential(
            nn.Linear(10, 64), nn.ReLU(),
            nn.Linear(64, 32), nn.ReLU()
        )
        self.classifier = nn.Linear(32, 1)


    def forward(self, x):
        return self.classifier(self.base(x))


In [5]:
model = EmissionModel()
# Simulate loading pre-trained weights
# model.load_state_dict(torch.load('france_weights.pth'))

for param in model.base.parameters():
    param.requires_grad = False


**We can clearly see we succecsfully freezed all base layer and kept  classifier a trainabl**

In [6]:
from torchinfo import summary

summary(model, input_size=(1, 10), col_names=["input_size", "output_size", "num_params", "trainable"], col_width=20, row_settings=["var_names"])

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
EmissionModel (EmissionModel)            [1, 10]              [1, 1]               --                   Partial
├─Sequential (base)                      [1, 10]              [1, 32]              --                   False
│    └─Linear (0)                        [1, 10]              [1, 64]              (704)                False
│    └─ReLU (1)                          [1, 64]              [1, 64]              --                   --
│    └─Linear (2)                        [1, 64]              [1, 32]              (2,080)              False
│    └─ReLU (3)                          [1, 32]              [1, 32]              --                   --
├─Linear (classifier)                    [1, 32]              [1, 1]               33                   True
Total params: 2,817
Trainable params: 33
Non-trainable params: 2,784
Total mult-adds (Units.MEGABYTES): 0.00
Input size (

In [7]:
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

In [8]:
X_egypt = torch.randn(32, 10)
y_egypt = torch.randn(32, 1) 

In [9]:
model.to(device)
X_egypt = X_egypt.to(device)
y_egypt = y_egypt.to(device)

In [10]:
model.train()
for epoch in range(5):
    optimizer.zero_grad()
    outputs = model(X_egypt)
    loss = loss_fn(outputs, y_egypt)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.9361
Epoch 2, Loss: 0.9350
Epoch 3, Loss: 0.9340
Epoch 4, Loss: 0.9330
Epoch 5, Loss: 0.9320


### APPROACH 2 FOR TRANSFER LEARNING:
we freeze the layers when creating the model 

In [11]:
import torchvision

we create the model and we freeze inside this model (same model architecture that france data got trained on )


In [16]:
class Resnet18(torch.nn.Module):
    def __init__(self, embedding_dim=128):
        super().__init__()
        model_weights = torchvision.models.ResNet18_Weights.IMAGENET1K_V1    
        model = torchvision.models.resnet18(weights=model_weights)
        self.encoder = torch.nn.Sequential(*list(model.children())[:-1])

        self.embedding = torch.nn.Sequential(
            torch.nn.Dropout(p=0.2, inplace=True),  
            torch.nn.Linear(in_features=512, out_features=embedding_dim, bias=True) ,
            torch.nn.BatchNorm1d(embedding_dim)   
            )           
        
        for param in list(self.encoder.parameters())[:-4]:  
            param.requires_grad = False
            
    def forward(self, x):
        return torch.nn.functional.normalize(
        self.embedding(torch.flatten(self.encoder(x), 1)),
        p=2, dim=1
        )



we can see the summary of the model. Majority of the encoder is frozen we left the last 2 layers in the encoder to increase generalization. 


In [17]:
model = Resnet18(embedding_dim=256)
summary(model=model, 
        input_size=(32, 3, 128, 128),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
) 

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
Resnet18 (Resnet18)                           [32, 3, 128, 128]    [32, 256]            --                   Partial
├─Sequential (encoder)                        [32, 3, 128, 128]    [32, 512, 1, 1]      --                   Partial
│    └─Conv2d (0)                             [32, 3, 128, 128]    [32, 64, 64, 64]     (9,408)              False
│    └─BatchNorm2d (1)                        [32, 64, 64, 64]     [32, 64, 64, 64]     (128)                False
│    └─ReLU (2)                               [32, 64, 64, 64]     [32, 64, 64, 64]     --                   --
│    └─MaxPool2d (3)                          [32, 64, 64, 64]     [32, 64, 32, 32]     --                   --
│    └─Sequential (4)                         [32, 64, 32, 32]     [32, 64, 32, 32]     --                   False
│    │    └─BasicBlock (0)                    [32, 64, 32, 32]     [32, 64, 32

we can save the model state dict using this function. after the model have been trained on the french dataset. We can save the epoch, the optimizer state dict and the scheduler state dict, so we can resume training not start from 0 again. 


In [None]:
def save_model(model: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               scheduler,
               target_dir: str,
               epoch: int,
               model_name: str):
    from pathlib import Path
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True, exist_ok=True)

    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "Filename must end with .pth or .pt"
    model_save_path = target_dir_path / model_name

    print(f"[INFO] Saving model to: {model_save_path}")
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict()
    }, f=model_save_path)


this function is used to load where we stopped in the last training so it can resume.

In [None]:
def load_checkpoint(checkpoint_path, model, optimizer, scheduler):
    """
    Loads state_dicts into model, optimizer, scheduler.
    Returns the epoch to resume from.
    """
    ckpt = torch.load(checkpoint_path, map_location=next(model.parameters()).device)
    model.load_state_dict(ckpt["model_state_dict"])
    optimizer.load_state_dict(ckpt["optimizer_state_dict"])
    scheduler.load_state_dict(ckpt["scheduler_state_dict"])
    return ckpt["epoch"] + 1



In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer)


In [None]:
from pathlib import Path

ckpt_name = f"ResNet18_TRAINED_ON_FRANCE_DATASET.pth"
ckpt_path = Path("models") / ckpt_name
if ckpt_path.exists():
    start_epoch = load_checkpoint(ckpt_path, model, optimizer, scheduler)
    print(f"[INFO] Found checkpoint, resuming from epoch {start_epoch}")
else:
    start_epoch = 0

In [None]:
#Resume training from the last checkpoint
# results = train(model=model,
#             train_dataloader=egy_train_loader,
#             test_dataloader=egy_test_loader,
#             optimizer=optimizer,
#             loss_fn=loss_fn,
#             epochs=10,
#             device=device,
#             start_epoch=start_epoch,
#             scheduler=scheduler)