In [13]:
"""If the 'o:Creatinine' and 'o:PaO2_FiO2' columns in the raw 1-hour file
are completely empty, copy their filled values from the normalized file
and save as a new CSV.
"""

from pathlib import Path

import pandas as pd
import numpy as np

In [14]:
timestep = 8
DIR = Path(rf'F:\time_step\OfflineRL_FactoredActions\RL_mimic_sepsis\data\data_asNormThreshold_dt{timestep}h')
NORM_PATH = DIR / f'sepsis_final_data_RAWNORM_withTimes_dt{timestep}h.csv'
ORIGINAL_PATH  = DIR / f'sepsis_final_data_withTimes_dt{timestep}h.csv'
OUT_PATH  = DIR / f'sepsis_final_data_FILLED_withTimes_dt{timestep}h.csv'

COLS = ['o:Creatinine', 'o:PaO2_FiO2']

norm_df = pd.read_csv(NORM_PATH)
raw_df  = pd.read_csv(ORIGINAL_PATH)

In [15]:
def column_is_empty(col: pd.Series) -> bool:
    if col.isna().all():
        return True
    
    return (col.astype(str).str.strip() == '').all()


In [16]:
empty_flags = [column_is_empty(raw_df[c]) for c in COLS]

empty_flags

[False, False]

In [17]:
empty_flags_norm = [column_is_empty(norm_df[c]) for c in COLS]

empty_flags_norm

[False, False]

In [18]:

for idx, should_fill in enumerate(empty_flags):
    if len(norm_df) != len(raw_df):
        raise ValueError(
            f'row number does not meet, processed={len(norm_df)}, raw={len(raw_df)}, stop copying.'
        )
    if should_fill == True:
        raw_df[COLS[idx]] = norm_df[COLS[idx]]
        raw_df.to_csv(OUT_PATH, index=False)
        print(idx, should_fill)
        print('Filled and saved')
