### Mount Storage, Import Libraries

In [7]:
import json
import pandas as pd
import numpy as np

from tqdm.auto import tqdm
from dataclasses import dataclass

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
########## For Colab ##########
# !pip install ts2vec
from ts2vec import TS2Vec

########## Personal ##########
# from google.colab import drive
# drive.mount('/content/drive')
# with open('/content/drive/MyDrive/dataset_hist.json') as f:
#     content = f.read()
#     data = json.loads(content)

########## Enterprise ##########
# import gcsfs
# fs = gcsfs.GCSFileSystem()
# with fs.open('gs://modoo-eod/users/datasets/dataset_hist.json') as f:
#     content = f.read()
#     data = json.loads(content)

########## Local ##########
with open("../../datasets/dataset_hist.json") as f:
    content=f.read()
    data=json.loads(content)

### Data Cleaning

In [None]:
df = pd.DataFrame.from_records(data)
print(len(df), "Measurements")

age_mean = np.mean([i[0] for i in df['static'] if pd.notna(i[0])])
bmi_mean = np.mean([i[1] for i in df['static'] if pd.notna(i[1])])

cleaned_data = []
for _, m in enumerate(data):

    # Remove measurements with empty windows
    if len(m['uc_windows']) == 0 or len(m['fhr_windows']) == 0:
        continue

    # Handle NaN values
    static = m['static'].copy()
    if pd.isna(m['static'][0]):
        static[0] = age_mean
    if pd.isna(m['static'][1]):
        static[1] = bmi_mean

    copy = m.copy()
    copy['static'] = static
    cleaned_data.append(copy)

cleaned_df = pd.DataFrame(cleaned_data)
print(len(cleaned_df), "Cleaned Measurements")

### Train-Test Split (Stratified)

In [None]:
df_train = cleaned_df.groupby(
    "gest_age_weeks",
    group_keys=False
).apply(lambda x: x.sample(frac=0.8), include_groups=True)

df_test = cleaned_df.drop(df_train.index)

In [None]:
train = df_train.to_dict(orient='records')
test  = df_test.to_dict(orient='records')

### Pre-Compute TS2Vec Embeddings

In [None]:
# n_instances x n_timestamps x n_features
train_uc = np.expand_dims(np.array([i['uc_raw'] for i in train]), 2)
train_fhr = np.expand_dims(np.array([i['fhr_raw'] for i in train]), 2)
test_uc  = np.expand_dims(np.array([i['uc_raw'] for i in test]), 2)
test_fhr = np.expand_dims(np.array([i['fhr_raw'] for i in test]), 2)

print("Train has shape", train_fhr.shape)
print("Test has shape ", test_fhr.shape)

ts_model = TS2Vec(
    input_dims = 1,
    output_dims = 320,
    device = 0,
    batch_size = 32
)

train_uc_embed    = ts_model.encode(train_uc, encoding_window="full_series")
train_fhr_embed   = ts_model.encode(train_fhr, encoding_window="full_series")

test_uc_embed     = ts_model.encode(test_uc, encoding_window="full_series")
test_fhr_embed    = ts_model.encode(test_fhr, encoding_window="full_series")

for idx, e in enumerate(train_uc_embed):
    train[idx]['uc_raw'] = e

for idx, e in enumerate(train_fhr_embed):
    train[idx]['fhr_raw'] = e

for idx, e in enumerate(test_uc_embed):
    test[idx]['uc_raw'] = e

for idx, e in enumerate(test_fhr_embed):
    test[idx]['fhr_raw'] = e

### Aggregate Windows

In [None]:
for i in train:

    uc_w    = torch.tensor([[v for _, v in w.items()] for w in i['uc_windows']], dtype=torch.float32)
    fhr_w   = torch.tensor([[v for _, v in w.items()] for w in i['fhr_windows']],dtype=torch.float32)
    uc_mean = uc_w.mean(dim=0) ; fhr_mean = fhr_w.mean(dim=0)

    i['uc_windows'] = uc_mean ; i['fhr_windows'] = fhr_mean

for i in test:

    uc_w    = torch.tensor([[v for _, v in w.items()] for w in i['uc_windows']], dtype=torch.float32)
    fhr_w   = torch.tensor([[v for _, v in w.items()] for w in i['fhr_windows']],dtype=torch.float32)
    uc_mean = uc_w.mean(dim=0) ; fhr_mean = fhr_w.mean(dim=0)

    i['uc_windows'] = uc_mean ; i['fhr_windows'] = fhr_mean

### Dataset, Data Loader

In [1]:
class PatientDataset(Dataset):

    def __init__(self, measurements):

        self.measurements = measurements

    def __len__(self):

        return len(self.measurements)

    def __getitem__(self, idx):

        measurement = self.measurements[idx]

        return measurement

NameError: name 'Dataset' is not defined

In [None]:
def patient_collate_fn(batch):

    uc_raw  = torch.stack([torch.tensor(m['uc_raw'], dtype=torch.float32) for m in batch])
    fhr_raw = torch.stack([torch.tensor(m['fhr_raw'], dtype=torch.float32) for m in batch])
    static  = torch.stack([torch.tensor(m['static'], dtype=torch.float32) for m in batch])
    target  = torch.stack([torch.tensor(m['target'], dtype=torch.float32) for m in batch])
    uc_win  = torch.stack([m['uc_windows'] for m in batch])
    fhr_win = torch.stack([m['fhr_windows'] for m in batch])

    return {
        'uc_raw'      : uc_raw,
        'fhr_raw'     : fhr_raw,
        'uc_windows'  : uc_win,
        'fhr_windows' : fhr_win,
        'static'      : static,
        'target'      : target
    }

### Model Cfg

In [2]:
@dataclass
class ModelCfg:

    # Raw UC/FHR
    ts2vec_in_dim   : int = 1
    ts2vec_out_dim  : int = 320

    # FHR Windows
    fhr_in_dim      : int = 24
    fhr_hidden_dim  : int = 64
    fhr_out_dim     : int = 32

    # UC Windows
    uc_in_dim       : int = 20
    uc_hidden_dim   : int = 64
    uc_out_dim      : int = 32

    # Static
    stat_in_dim     : int = 8
    stat_hidden_dim : int = 64
    stat_out_dim    : int = 32

    # Fused Regressor
    fuse_hidden_dim : int = 512

NameError: name 'dataclass' is not defined

### Model Modules

In [3]:
class StaticEncoder(nn.Module):

    def __init__(self, in_dim, hidden_dim, out_dim):

        super().__init__()

        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, out_dim),
            nn.ReLU()
        )

    def forward(self, x):

        # (B, 32)
        return self.net(x)

NameError: name 'nn' is not defined

In [None]:
class WindowsEncoder(nn.Module):

    def __init__(self, in_dim, hidden_dim, out_dim):

        super().__init__()

        self.mlp = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, out_dim),
            nn.ReLU()
        )

    def forward(self, window):

        # (B, 24|20) -> (B, 32)
        h = self.mlp(window)

        # (B, 32)
        return h

### Main Model Class

In [None]:
class FusedRegressor(nn.Module):

    def __init__(self, cfg: ModelCfg):

        super().__init__()

        self.cfg = cfg

        # UC Window Encoder: Output 32
        self.uc_win_encoder = WindowsEncoder(
            in_dim=self.cfg.uc_in_dim,
            hidden_dim=self.cfg.uc_hidden_dim,
            out_dim=self.cfg.uc_out_dim,
        )

        # FHR Window Encoder: Output 32
        self.fhr_win_encoder = WindowsEncoder(
            in_dim=self.cfg.fhr_in_dim,
            hidden_dim=self.cfg.fhr_hidden_dim,
            out_dim=self.cfg.fhr_out_dim,
        )

        # Static Encoder: Output 32
        self.static_encoder = StaticEncoder(
            in_dim=self.cfg.stat_in_dim,
            hidden_dim=self.cfg.stat_hidden_dim,
            out_dim=self.cfg.stat_out_dim,
        )

        fused_dim = (
            2 * self.cfg.ts2vec_out_dim # 640
            + self.cfg.uc_out_dim       # 32
            + self.cfg.fhr_out_dim      # 32
            + self.cfg.stat_out_dim     # 32
        )

        # fused_dim = (
        #     2 * self.cfg.ts2vec_out_dim
        #     + 20
        #     + 24
        #     + 8
        # )

        self.fusion = nn.Sequential(
            nn.Linear(fused_dim, self.cfg.fuse_hidden_dim),
            nn.ReLU(),
            nn.Linear(self.cfg.fuse_hidden_dim, self.cfg.fuse_hidden_dim//2),
            nn.ReLU(),
            nn.Linear(self.cfg.fuse_hidden_dim//2, self.cfg.fuse_hidden_dim//4),
            nn.ReLU(),
            nn.Linear(self.cfg.fuse_hidden_dim//4, self.cfg.fuse_hidden_dim//8),
            nn.ReLU(),
            nn.Linear(self.cfg.fuse_hidden_dim//8, 1)
        )

    def forward(self, batch):

        uc_raw = batch['uc_raw'] ; uc_raw_tensor = uc_raw.to(device)

        uc_windows = batch['uc_windows'] ; uc_win_tensor = uc_windows.to(device)

        fhr_raw = batch['fhr_raw'] ; fhr_raw_tensor = fhr_raw.to(device)

        fhr_windows = batch['fhr_windows'] ; fhr_win_tensor = fhr_windows.to(device)

        static = batch['static'] ; static_tensor = static.to(device)

        # (B, 20) -> (B, 32)
        uc_win_emb = self.uc_win_encoder(uc_win_tensor)
        # print("UC Windows Shape:", uc_win_emb.shape)

        # (B, 24) -> (B, 32)
        fhr_win_emb = self.fhr_win_encoder(fhr_win_tensor)
        # print("FHR Windows Shape:", fhr_win_emb.shape)

        # (B, 8) -> (B, 32)
        static_emb = self.static_encoder(static_tensor)
        # print("Static Shape:", static_emb.shape)

        # (B, 736)
        fused = torch.cat(
            [uc_raw_tensor, fhr_raw_tensor, uc_win_emb, fhr_win_emb, static_emb],
            dim=-1,
        ).to(device)
        # print("Fused Shape:", fused.shape)

        # (B, 1)
        preds = self.fusion(fused)
        # print("Fusion Output Shape:", preds.shape)

        return preds.squeeze(1)


### Train Eval Functions

In [4]:
def train_one_epoch(model, loader, optimiser, criterion, device):

    model.train()
    total_loss = 0.0
    n_batches = 0

    for batch in loader:

        target = batch['target'].to(device)
        # print("Targets Shape:", target.shape)

        optimiser.zero_grad()

        pred = model(batch)

        loss = criterion(pred, target)

        total_loss += loss.item()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimiser.step()

        n_batches += 1

        # print("Train", pred, target, sep="|")

    return total_loss / n_batches

In [5]:
@torch.no_grad()
def evaluate(model, loader, criterion, device):

    model.eval()
    total_loss = 0.0
    n_batches = 0

    for batch in loader:

        target = batch['target'].to(device)

        pred = model(batch)

        loss = criterion(pred, target)

        total_loss += loss.item()

        n_batches += 1

    return total_loss / n_batches

NameError: name 'torch' is not defined

### Main

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

model_cfg = ModelCfg()

model = FusedRegressor(model_cfg).to(device)

train_dataset = PatientDataset(train)

train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    collate_fn=patient_collate_fn,
    num_workers=4,
    pin_memory=True
)

eval_dataset = PatientDataset(test)

eval_loader = DataLoader(
    eval_dataset,
    batch_size=32,
    shuffle=False,
    collate_fn=patient_collate_fn,
    num_workers=4,
    pin_memory=True,
)

optimizer = torch.optim.Adam(
    list(model.parameters()),
    lr=3e-4,
    weight_decay=1e-5
)

criterion = torch.nn.L1Loss()

for epoch in tqdm(range(100)):

    train_loss = train_one_epoch(
        model,
        train_loader,
        optimizer,
        criterion,
        device
    )

    val_loss = evaluate(
        model,
        eval_loader,
        criterion,
        device
    )

    print(f"[{epoch+1:02d}] train_loss={train_loss:.4f}  val_loss={val_loss:.4f}")

NameError: name 'torch' is not defined