### Tutorial for training EHR-M-GAN (Version Corrig√©e)

Reference: "Generating Synthetic Mixed-type Longitudinal Electronic Health Records for Artificial Intelligent". https://arxiv.org/abs/2112.12047

In this work, we propose a generative adversarial network (GAN) entitled EHR-M-GAN which simultaneously synthesizes mixed-type timeseries EHR data (e.g., continuous-valued timeseries and discrete-valued timeseries). EHR-M-GAN is capable of capturing the multidimensional, heterogeneous, and correlated temporal dynamics in patient trajectories.

---
**CORRECTIONS APPORT√âES :**
- ‚úÖ Ajout du chemin du projet au PYTHONPATH
- ‚úÖ Chemins de donn√©es corrig√©s pour Windows
- ‚úÖ Compatibilit√© TensorFlow 1.x/2.x
- ‚úÖ V√©rifications des fichiers avant chargement
- ‚úÖ Nom de fichier corrig√© (statics.pkl)

#### Configuration de l'environnement

In [3]:
import os
import sys

# Naviguer vers le projet
PROJECT_PATH = r"D:\work\ehrMGAN-main"
os.chdir(PROJECT_PATH)

# Ajouter au PYTHONPATH
if PROJECT_PATH not in sys.path:
    sys.path.insert(0, PROJECT_PATH)

# V√©rifier
assert os.path.exists('networks.py'), "‚ùå networks.py introuvable!"
print(f"‚úÖ Projet configur√©: {PROJECT_PATH}")

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\work\\ehrMGAN-main'

In [1]:
import sys
import os

# IMPORTANT: Ajuster ce chemin selon votre installation
# Pour Windows: r"D:\work\ehrMGAN-main"
# Pour Linux/Mac: "/path/to/ehrMGAN-main"
PROJECT_PATH = r"D:\work\ehrMGAN-main"

# Ajouter le chemin du projet au PYTHONPATH pour importer les modules
if PROJECT_PATH not in sys.path:
    sys.path.insert(0, PROJECT_PATH)

print(f"‚úÖ Chemin du projet ajout√©: {PROJECT_PATH}")
print(f"‚úÖ R√©pertoire de travail actuel: {os.getcwd()}")

‚úÖ Chemin du projet ajout√©: D:\work\ehrMGAN-main
‚úÖ R√©pertoire de travail actuel: /content


#### Necessary packages and functions call

In [2]:
import tensorflow as tf
import numpy as np
import pickle

# V√©rifier la version de TensorFlow
print(f"TensorFlow version: {tf.__version__}")

# Import des modules du projet
try:
    from networks import C_VAE_NET, D_VAE_NET, C_GAN_NET, D_GAN_NET
    from m3gan import m3gan
    from utils import renormlizer
    print("‚úÖ Tous les modules ont √©t√© import√©s avec succ√®s!")
except ImportError as e:
    print(f"‚ùå Erreur d'import: {e}")
    print("V√©rifiez que vous √™tes dans le bon r√©pertoire et que tous les fichiers sont pr√©sents.")
    raise

TensorFlow version: 2.19.0
‚ùå Erreur d'import: No module named 'networks'
V√©rifiez que vous √™tes dans le bon r√©pertoire et que tous les fichiers sont pr√©sents.


ModuleNotFoundError: No module named 'networks'

In [None]:
# Configuration des chemins de donn√©es
# IMPORTANT: Ajuster ces chemins selon votre configuration
DATA_PATH = os.path.join(PROJECT_PATH, "data", "real", "eicu")
CHECKPOINT_DIR = os.path.join(PROJECT_PATH, "data", "checkpoint")
OUTPUT_DIR = os.path.join(PROJECT_PATH, "data", "fake")

print(f"üìÅ Chemin des donn√©es: {DATA_PATH}")
print(f"üíæ Chemin des checkpoints: {CHECKPOINT_DIR}")
print(f"üìä Chemin de sortie: {OUTPUT_DIR}")

# Cr√©er les r√©pertoires s'ils n'existent pas
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
print("‚úÖ R√©pertoires v√©rifi√©s/cr√©√©s")

#### Import dataset
  - MIMIC-III: https://physionet.org/content/mimiciii/1.4/
  - eICU-CRD: https://physionet.org/content/eicu-crd/2.0/ (used in this tutorial)
  - HiRID: https://physionet.org/content/hirid/1.1.1/

In [None]:
# V√©rifier l'existence des fichiers avant de les charger
required_files = [
    'vital_sign_24hrs.pkl',
    'med_interv_24hrs.pkl',
    'statics.pkl'  # Corrig√©: √©tait 'statics_cond.pkl'
]

print("V√©rification des fichiers de donn√©es...")
for filename in required_files:
    filepath = os.path.join(DATA_PATH, filename)
    if os.path.exists(filepath):
        print(f"  ‚úÖ {filename} trouv√©")
    else:
        print(f"  ‚ùå {filename} MANQUANT √† {filepath}")
        print(f"\n‚ö†Ô∏è  ERREUR: Fichiers de donn√©es manquants!")
        print(f"Veuillez vous assurer que les fichiers suivants existent dans {DATA_PATH}:")
        for f in required_files:
            print(f"  - {f}")
        raise FileNotFoundError(f"Fichier manquant: {filename}")

In [None]:
# Charger les donn√©es pour l'entra√Ænement du GAN
print("Chargement des donn√©es...")

with open(os.path.join(DATA_PATH, 'vital_sign_24hrs.pkl'), 'rb') as f:
    vital_labs_3D = pickle.load(f)
    print(f"  ‚úÖ vital_sign_24hrs.pkl charg√© - Shape: {vital_labs_3D.shape}")

with open(os.path.join(DATA_PATH, 'med_interv_24hrs.pkl'), 'rb') as f:
    medical_interv_3D = pickle.load(f)
    print(f"  ‚úÖ med_interv_24hrs.pkl charg√© - Shape: {medical_interv_3D.shape}")

with open(os.path.join(DATA_PATH, 'statics.pkl'), 'rb') as f:
    statics = pickle.load(f)
    print(f"  ‚úÖ statics.pkl charg√© - Type: {type(statics)}")

print("\n‚úÖ Toutes les donn√©es ont √©t√© charg√©es avec succ√®s!")

In [None]:
continuous_x = vital_labs_3D
discrete_x = medical_interv_3D

print(f"Donn√©es continues (vital signs): {continuous_x.shape}")
print(f"Donn√©es discr√®tes (interventions m√©dicales): {discrete_x.shape}")

#### Define network parameters

In [None]:
# Param√®tres des s√©ries temporelles
time_steps = continuous_x.shape[1]
c_dim = continuous_x.shape[2]
d_dim = discrete_x.shape[2]
no_gen = continuous_x.shape[0]

print(f"üìä Param√®tres des donn√©es:")
print(f"  - Time steps: {time_steps}")
print(f"  - Continuous dimensions: {c_dim}")
print(f"  - Discrete dimensions: {d_dim}")
print(f"  - Number of samples: {no_gen}")

In [None]:
# Param√®tres de la phase VAE (pr√©-entra√Ænement)
c_noise_dim = 5
c_z_size = 100
d_noise_dim = 5
d_z_size = 100

print(f"üîß Param√®tres VAE:")
print(f"  - C noise dim: {c_noise_dim}")
print(f"  - C latent size: {c_z_size}")
print(f"  - D noise dim: {d_noise_dim}")
print(f"  - D latent size: {d_z_size}")

In [None]:
# Construire les r√©seaux VAE et GAN
print("Construction des r√©seaux neuronaux...")

c_vae = C_VAE_NET(x_dim=c_dim, z_dim=c_z_size, time_steps=time_steps)
print("  ‚úÖ C_VAE_NET construit")

c_gan = C_GAN_NET(x_dim=c_dim, z_dim=c_z_size, time_steps=time_steps)
print("  ‚úÖ C_GAN_NET construit")

d_vae = D_VAE_NET(x_dim=d_dim, z_dim=d_z_size, time_steps=time_steps)
print("  ‚úÖ D_VAE_NET construit")

d_gan = D_GAN_NET(x_dim=d_dim, z_dim=d_z_size, time_steps=time_steps)
print("  ‚úÖ D_GAN_NET construit")

print("\n‚úÖ Tous les r√©seaux ont √©t√© construits avec succ√®s!")

#### Define training parameters

In [None]:
# Param√®tres d'entra√Ænement
batch_size = 128
num_pre_epochs = 100  # Pr√©-entra√Ænement VAE
num_epochs = 500      # Entra√Ænement GAN complet

# Fr√©quences de sauvegarde
epoch_ckpt_freq = 100  # Sauvegarder un checkpoint tous les 100 epochs
epoch_loss_freq = 10   # Afficher les pertes tous les 10 epochs

print(f"‚öôÔ∏è Param√®tres d'entra√Ænement:")
print(f"  - Batch size: {batch_size}")
print(f"  - Pre-training epochs: {num_pre_epochs}")
print(f"  - Training epochs: {num_epochs}")
print(f"  - Checkpoint frequency: {epoch_ckpt_freq}")
print(f"  - Loss display frequency: {epoch_loss_freq}")

In [None]:
# Rounds pour discriminateur, g√©n√©rateur et VAE
d_rounds = 1
g_rounds = 3
v_rounds = 1

print(f"üîÑ Rounds d'entra√Ænement:")
print(f"  - Discriminator rounds: {d_rounds}")
print(f"  - Generator rounds: {g_rounds}")
print(f"  - VAE rounds: {v_rounds}")

In [None]:
# Learning rates
v_lr_pre = 1e-3  # Learning rate pour pr√©-entra√Ænement VAE
v_lr = 1e-4      # Learning rate pour VAE pendant GAN
g_lr = 1e-4      # Learning rate pour g√©n√©rateur
d_lr = 1e-4      # Learning rate pour discriminateur

print(f"üìà Learning rates:")
print(f"  - VAE pre-training: {v_lr_pre}")
print(f"  - VAE: {v_lr}")
print(f"  - Generator: {g_lr}")
print(f"  - Discriminator: {d_lr}")

In [None]:
# Coefficients de perte
alpha_re = 1.0   # Reconstruction loss
alpha_kl = 1.0   # KL divergence
alpha_mt = 1.0   # Marginal temporal loss
alpha_ct = 1.0   # Conditional temporal loss
alpha_sm = 1.0   # Semantic loss

c_beta_adv = 1.0  # Adversarial loss pour continuous
c_beta_fm = 10.0  # Feature matching loss pour continuous
d_beta_adv = 1.0  # Adversarial loss pour discrete
d_beta_fm = 10.0  # Feature matching loss pour discrete

print(f"‚öñÔ∏è Coefficients de perte:")
print(f"  VAE:")
print(f"    - Reconstruction (alpha_re): {alpha_re}")
print(f"    - KL divergence (alpha_kl): {alpha_kl}")
print(f"    - Marginal temporal (alpha_mt): {alpha_mt}")
print(f"    - Conditional temporal (alpha_ct): {alpha_ct}")
print(f"    - Semantic (alpha_sm): {alpha_sm}")
print(f"  GAN Continuous:")
print(f"    - Adversarial (c_beta_adv): {c_beta_adv}")
print(f"    - Feature matching (c_beta_fm): {c_beta_fm}")
print(f"  GAN Discrete:")
print(f"    - Adversarial (d_beta_adv): {d_beta_adv}")
print(f"    - Feature matching (d_beta_fm): {d_beta_fm}")

In [None]:
# Chemin du checkpoint
checkpoint_dir = CHECKPOINT_DIR
print(f"üíæ Checkpoint directory: {checkpoint_dir}")

#### Train the model

**IMPORTANT:** Ce code g√®re automatiquement la compatibilit√© entre TensorFlow 1.x et 2.x

In [None]:
# V√©rifier la version de TensorFlow et configurer en cons√©quence
tf_version = int(tf.__version__.split('.')[0])
print(f"\nüîç TensorFlow version d√©tect√©e: {tf.__version__} (v{tf_version})")

if tf_version == 1:
    print("‚úÖ Utilisation de TensorFlow 1.x")
    # Code pour TensorFlow 1.x (original)
    tf.reset_default_graph()
    run_config = tf.ConfigProto()
    run_config.gpu_options.allow_growth = True
    
    with tf.Session(config=run_config) as sess:
        model = m3gan(sess=sess,
                      batch_size=batch_size,
                      time_steps=time_steps,
                      num_pre_epochs=num_pre_epochs,
                      num_epochs=num_epochs,
                      checkpoint_dir=checkpoint_dir,
                      epoch_ckpt_freq=epoch_ckpt_freq,
                      epoch_loss_freq=epoch_loss_freq,
                      # params for c
                      c_dim=c_dim, c_noise_dim=c_noise_dim,
                      c_z_size=c_z_size, c_data_sample=continuous_x,
                      c_vae=c_vae, c_gan=c_gan,
                      # params for d
                      d_dim=d_dim, d_noise_dim=d_noise_dim,
                      d_z_size=d_z_size, d_data_sample=discrete_x,
                      d_vae=d_vae, d_gan=d_gan,
                      # params for training
                      d_rounds=d_rounds, g_rounds=g_rounds, v_rounds=v_rounds,
                      v_lr_pre=v_lr_pre, v_lr=v_lr, g_lr=g_lr, d_lr=d_lr,
                      alpha_re=alpha_re, alpha_kl=alpha_kl, alpha_mt=alpha_mt, 
                      alpha_ct=alpha_ct, alpha_sm=alpha_sm,
                      c_beta_adv=c_beta_adv, c_beta_fm=c_beta_fm, 
                      d_beta_adv=d_beta_adv, d_beta_fm=d_beta_fm)
        model.build()
        model.train()
        
elif tf_version == 2:
    print("‚ö†Ô∏è  TensorFlow 2.x d√©tect√©")
    print("Ce code a √©t√© √©crit pour TensorFlow 1.x")
    print("\nOptions:")
    print("  1. Installer TensorFlow 1.15: pip install tensorflow==1.15")
    print("  2. Utiliser le mode de compatibilit√© TF2:")
    print("")
    print("import tensorflow.compat.v1 as tf")
    print("tf.disable_v2_behavior()")
    print("")
    
    # Tentative d'utiliser le mode de compatibilit√©
    try:
        import tensorflow.compat.v1 as tf
        tf.disable_v2_behavior()
        print("\n‚úÖ Mode de compatibilit√© TF1 activ√©")
        
        tf.reset_default_graph()
        run_config = tf.ConfigProto()
        run_config.gpu_options.allow_growth = True
        
        with tf.Session(config=run_config) as sess:
            model = m3gan(sess=sess,
                          batch_size=batch_size,
                          time_steps=time_steps,
                          num_pre_epochs=num_pre_epochs,
                          num_epochs=num_epochs,
                          checkpoint_dir=checkpoint_dir,
                          epoch_ckpt_freq=epoch_ckpt_freq,
                          epoch_loss_freq=epoch_loss_freq,
                          # params for c
                          c_dim=c_dim, c_noise_dim=c_noise_dim,
                          c_z_size=c_z_size, c_data_sample=continuous_x,
                          c_vae=c_vae, c_gan=c_gan,
                          # params for d
                          d_dim=d_dim, d_noise_dim=d_noise_dim,
                          d_z_size=d_z_size, d_data_sample=discrete_x,
                          d_vae=d_vae, d_gan=d_gan,
                          # params for training
                          d_rounds=d_rounds, g_rounds=g_rounds, v_rounds=v_rounds,
                          v_lr_pre=v_lr_pre, v_lr=v_lr, g_lr=g_lr, d_lr=d_lr,
                          alpha_re=alpha_re, alpha_kl=alpha_kl, alpha_mt=alpha_mt, 
                          alpha_ct=alpha_ct, alpha_sm=alpha_sm,
                          c_beta_adv=c_beta_adv, c_beta_fm=c_beta_fm, 
                          d_beta_adv=d_beta_adv, d_beta_fm=d_beta_fm)
            model.build()
            model.train()
    except Exception as e:
        print(f"\n‚ùå Erreur avec le mode de compatibilit√©: {e}")
        print("\nVeuillez installer TensorFlow 1.15:")
        print("  pip uninstall tensorflow")
        print("  pip install tensorflow==1.15")
else:
    raise ValueError(f"Version TensorFlow non support√©e: {tf.__version__}")

#### V√©rification des r√©sultats

In [None]:
# V√©rifier que les donn√©es synth√©tiques ont √©t√© g√©n√©r√©es
import glob

synthetic_files = glob.glob(os.path.join(OUTPUT_DIR, "epoch*/gen_data.npz"))
print(f"\nüìä Fichiers de donn√©es synth√©tiques g√©n√©r√©s: {len(synthetic_files)}")

if synthetic_files:
    print("\n‚úÖ Entra√Ænement termin√© avec succ√®s!")
    print("\nFichiers g√©n√©r√©s:")
    for f in sorted(synthetic_files):
        print(f"  - {os.path.basename(os.path.dirname(f))}/gen_data.npz")
else:
    print("\n‚ö†Ô∏è  Aucun fichier de donn√©es synth√©tiques trouv√©")
    print("V√©rifiez que l'entra√Ænement s'est termin√© correctement")