In [1]:
import pandas as pd
from faker import Faker
import numpy as np

fake = Faker()
num_records = 10000

data = {
    'SUBJECT_ID': [fake.unique.random_number(digits=7) for _ in range(num_records)],
    'HADM_ID': [fake.unique.random_number(digits=7) for _ in range(num_records)],
    'ICUSTAY_ID': [fake.unique.random_number(digits=7) for _ in range(num_records)],
    'ADMITTIME': [fake.date_time_between(start_date='-10y', end_date='now') for _ in range(num_records)],
    'AGE': np.random.randint(18, 90, num_records),
    'heart_rate': np.round(np.random.normal(80, 15, num_records), 2),
    'blood_pressure': np.round(np.random.normal(120, 20, num_records), 2),
    'respiratory_rate': np.round(np.random.normal(18, 5, num_records), 2),
}

df = pd.DataFrame(data)
df['ADMITTIME'] = pd.to_datetime(df['ADMITTIME'])
df.sort_values(by=['SUBJECT_ID', 'ADMITTIME'], inplace=True)
df.to_csv('synthetic_mimic.csv', index=False)


In [None]:
from tsdm.utils.data import timeseries
from tsdm.encoders import numerical
import pandas as pd
import numpy as np
import torch

df = pd.read_csv('synthetic_mimic.csv')

# Ensure datetime format
df['ADMITTIME'] = pd.to_datetime(df['ADMITTIME'])
df['timestamp'] = df.groupby('SUBJECT_ID').cumcount()

# Handle missing values by introducing NaNs randomly (simulate irregularity)
for col in ['heart_rate', 'blood_pressure', 'respiratory_rate']:
    df.loc[df.sample(frac=0.1).index, col] = np.nan

# Encode and normalize numerical features
encoder = numerical.Standardizer()
df[['heart_rate', 'blood_pressure', 'respiratory_rate']] = encoder.fit_transform(
    df[['heart_rate', 'blood_pressure', 'respiratory_rate']]
)


AttributeError: `np.singlecomplex` was removed in the NumPy 2.0 release. Use `np.complex64` instead.

In [None]:
class SyntheticIrregularDataset(torch.utils.data.Dataset):
    def __init__(self, df, subject_col='SUBJECT_ID', time_col='timestamp', features=['heart_rate', 'blood_pressure', 'respiratory_rate']):
        self.subject_ids = df[subject_col].unique()
        self.data = df
        self.time_col = time_col
        self.features = features

    def __len__(self):
        return len(self.subject_ids)

    def __getitem__(self, idx):
        subject_id = self.subject_ids[idx]
        subject_data = self.data[self.data['SUBJECT_ID'] == subject_id]

        times = torch.tensor(subject_data[self.time_col].values, dtype=torch.float32)
        features = torch.tensor(subject_data[self.features].values, dtype=torch.float32)

        mask = ~torch.isnan(features)
        features = torch.nan_to_num(features)

        return times, features, mask

dataset = SyntheticIrregularDataset(df)


In [None]:
def custom_collate(batch):
    times, features, masks = zip(*batch)
    pad = torch.nn.utils.rnn.pad_sequence

    times_padded = pad(times, batch_first=True, padding_value=0)
    features_padded = pad(features, batch_first=True, padding_value=0)
    masks_padded = pad(masks, batch_first=True, padding_value=False)

    return times_padded, features_padded, masks_padded

from torch.utils.data import DataLoader

loader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=custom_collate)


In [None]:
from models.fld_icc import FLD

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = FLD(
    input_dim=3,
    latent_dim=20,
    num_heads=4,
    embed_dim=64,
    function='L'
).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
criterion = torch.nn.MSELoss()


In [None]:
model.train()
for epoch in range(10):
    total_loss = 0
    for times, features, masks in loader:
        times, features, masks = times.to(device), features.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(times, features, masks, times)  # Using same times for simplicity

        loss = criterion(outputs[masks], features[masks])
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    avg_loss = total_loss / len(loader)
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")
