In [None]:
import os
os.environ["PROJECT_ROOT"] = "/path/to/JekTurnRight_dsde"

In [3]:
!nvidia-smi

Mon Nov 24 12:28:33 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   64C    P8             14W /   72W |       0MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

# Import Package & Module

In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from huggingface_hub import PyTorchModelHubMixin
import pickle
import json
import os

In [17]:
import ipywidgets as widgets
import os

# create widget to load file into notebook kernel
widgets.FileUpload(
    accept='.env',  # .env file with 1 environment variable per line
    multiple=False  # True to accept multiple files upload else False
)

# single ZIP upload and extract
uploader = widgets.FileUpload()
display(uploader)

FileUpload(value={}, description='Upload')

In [18]:
if uploader.value:
    key = list(uploader.value)[0]
    content = uploader.value[key]['content']
    zip_path = "upload.zip"
    out_dir = "uploaded_folder"
    os.makedirs(out_dir, exist_ok=True)
    with open(zip_path, "wb") as f: f.write(content)

    import zipfile
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(out_dir)
    print(f"Extracted to: {out_dir}")
else:
    print("No file uploaded yet.")

Extracted to: uploaded_folder


data dataframe

In [42]:
DATA_PATH = "/content/uploaded_folder/data/processed/flood_training_data_csv/part-00000-dc12f89c-7065-4d09-a951-dec13b2938ce-c000.csv"
df = pd.read_csv(DATA_PATH)
    
print("\n--- ✅ SUCCESS: Data Loaded ---")
print(f"Total Days of Data: {len(df)}")
print(f"Columns Found: {list(df.columns)}")
    
    
sample = df.head(5)
sample


--- ✅ SUCCESS: Data Loaded ---
Total Days of Data: 197280
Columns Found: ['date', 'subdistrict', 'district', 'latitude', 'longitude', 'rainfall', 'total_report', 'number_of_report_flood', 'target', 'year_timestamp', 'month_timestamp', 'month_sin', 'month_cos', 'API_30d', 'API_60d', 'API_90d']


Unnamed: 0,date,subdistrict,district,latitude,longitude,rainfall,total_report,number_of_report_flood,target,year_timestamp,month_timestamp,month_sin,month_cos,API_30d,API_60d,API_90d
0,2022-01-01,กระทุ่มราย,หนองจอก,13.825804,100.891577,0.0,0,0,0,2022,1,0.5,0.866025,0.0,0.0,0.0
1,2022-01-02,กระทุ่มราย,หนองจอก,13.825804,100.891577,0.0,0,0,0,2022,1,0.5,0.866025,0.0,0.0,0.0
2,2022-01-03,กระทุ่มราย,หนองจอก,13.825804,100.891577,0.0,0,0,0,2022,1,0.5,0.866025,0.0,0.0,0.0
3,2022-01-04,กระทุ่มราย,หนองจอก,13.825804,100.891577,0.0,0,0,0,2022,1,0.5,0.866025,0.0,0.0,0.0
4,2022-01-05,กระทุ่มราย,หนองจอก,13.825804,100.891577,2.5,0,0,0,2022,1,0.5,0.866025,0.5,0.5,0.5


In [43]:
# --- CONFIGURATION ---
CONFIG = {
    "SEQ_LEN": 30,
    "BATCH_SIZE": 64,
    "HIDDEN_DIM": 64,
    "LAYERS": 2,
    "DROPOUT": 0.4,
    "EPOCHS": 50,
    "PATIENCE": 7,
    "LR": 1e-3,
    "WD": 1e-5,
    "DEVICE": torch.device("cuda" if torch.cuda.is_available() else "cpu")
}

In [44]:
# --- UTILITIES ---
class EarlyStopping:
    def __init__(self, patience=7, path='best_model.pth'):
        self.patience = patience
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.path = path

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(model)
        elif val_loss > self.best_loss:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.save_checkpoint(model)
            self.counter = 0

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), self.path)

In [45]:
# --- MODEL ---
class FloodLSTM(nn.Module, PyTorchModelHubMixin):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, 1)
    def forward(self, x):
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :]).squeeze()

In [46]:
def create_tensors(df):
    cutoff = pd.Timestamp("2024-01-01")
    train_df = df[df['date'] < cutoff].copy()
    test_df = df[df['date'] >= cutoff].copy()

    # Export Test Set for Optuna
    test_df.to_csv("test_set.csv", index=False)

    features = ['water', 'total_report', 'API_30d', 'API_60d', 'API_90d', 'month_sin', 'month_cos', 'latitude', 'longitude']
    scaler = StandardScaler()
    train_df[features] = scaler.fit_transform(train_df[features])
    test_df[features] = scaler.transform(test_df[features])

    with open("scaler.pkl", "wb") as f: pickle.dump(scaler, f)

    def _slide(sub_df):
        X, y = [], []
        for _, g in sub_df.groupby('subdistrict'):
            v = g[features].values
            t = g['target'].values
            if len(v) <= CONFIG["SEQ_LEN"]: continue
            for i in range(len(v) - CONFIG["SEQ_LEN"]):
                X.append(v[i : i+CONFIG["SEQ_LEN"]])
                y.append(t[i+CONFIG["SEQ_LEN"]])
        return np.array(X), np.array(y)

    X_tr, y_tr = _slide(train_df)
    X_te, y_te = _slide(test_df)
    pos_weight = (len(y_tr) - sum(y_tr)) / (sum(y_tr) + 1e-5)
    return (X_tr, y_tr), (X_te, y_te), pos_weight, len(features)

In [47]:
"""
src/ds/model.py
---------------
FloodLSTM Model Architecture.
Inherits from PyTorchModelHubMixin for easy Hugging Face integration.
"""
import torch
import torch.nn as nn
from huggingface_hub import PyTorchModelHubMixin

class FloodLSTM(nn.Module, PyTorchModelHubMixin):
    def __init__(self, input_dim: int, hidden_dim: int, num_layers: int, dropout: float):
        """
        Args:
            input_dim (int): Number of input features.
            hidden_dim (int): Number of hidden units in LSTM.
            num_layers (int): Number of LSTM layers.
            dropout (float): Dropout probability.
        """
        super().__init__()
        # Save config for Hugging Face (Mixin requirement)
        self.save_hyperparameters()
        
        self.lstm = nn.LSTM(
            input_size=input_dim, 
            hidden_size=hidden_dim, 
            num_layers=num_layers, 
            batch_first=True, 
            dropout=dropout
        )
        
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # Initialize hidden/cell states
        # Note: x.size(0) is the batch size
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        
        # Forward pass
        # out shape: (batch, seq_len, hidden_dim)
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the LAST time step
        # out[:, -1, :] shape: (batch, hidden_dim)
        out = self.fc(out[:, -1, :])
        
        # Squeeze to remove extra dimension: (batch, 1) -> (batch)
        return out.squeeze()