# 実行前提
[synthea dataset](https://github.com/synthetichealth/synthea)で作成したデータセットが存在すること.

In [1]:
from datetime import datetime
import polars as pl
from pathlib import Path

from src.preprocess import Patient, Observations, Events, SickStatus, HealthCondition, EventSeq

In [2]:
# 主要健診項目
obs_cols = \
    [
 'Body Height',
 'Pain severity - 0-10 verbal numeric rating [Score] - Reported',
 'Body Weight',
 'Body Mass Index',
 'Diastolic Blood Pressure',
 'Systolic Blood Pressure',
 'Heart rate',
 'Respiratory rate',
 'Tobacco smoking status']

events = \
[
    "Metabolic syndrome X (disorder)",
    "Ischemic heart disease (disorder)",
    "Diabetic renal disease (disorder)",
    "Chronic kidney disease stage 1 (disorder)"
]

In [3]:
patient_path = Path("/mnt/sample/csv/patients.csv")
observation_csv = Path("/mnt/sample/csv/observations.csv")
condidion_path = Path("/mnt/sample/csv/conditions.csv")

In [4]:
pt = Patient.load_csv(patient_path)
obs = Observations.load_csv(observation_csv, obs_cols)
cond = Events.load_csv(condidion_path, events)

In [5]:
obs.join_patient(pt)
st = SickStatus(cond, [i for i in range(2012, 2022)])
hc = HealthCondition(obs, st)
hc.get_data().head()

Id,Date,Body Height,Pain severity - 0-10 verbal numeric rating [Score] - Reported,Body Weight,Body Mass Index,Diastolic Blood Pressure,Systolic Blood Pressure,Heart rate,Respiratory rate,Tobacco smoking status,year,BIRTHDATE,DEATHDATE,RACE,GENDER,Metabolic syndrome X (disorder),Ischemic heart disease (disorder),Diabetic renal disease (disorder),Chronic kidney disease stage 1 (disorder)
str,date,f32,f32,f32,f32,f32,f32,f32,f32,str,i32,date,str,str,str,i64,i64,i64,i64
"""a82104f7-4b7a-...",2013-06-20,131.199997,3.0,29.200001,17.0,94.0,131.0,100.0,14.0,"""Never smoked t...",2014,2003-06-12,,"""white""","""M""",0,0,0,0
"""a82104f7-4b7a-...",2014-06-26,135.600006,3.0,32.200001,17.5,89.0,129.0,75.0,14.0,"""Never smoked t...",2015,2003-06-12,,"""white""","""M""",0,0,0,0
"""a82104f7-4b7a-...",2015-07-02,140.699997,3.0,36.799999,18.6,92.0,125.0,99.0,14.0,"""Never smoked t...",2016,2003-06-12,,"""white""","""M""",0,0,0,0
"""a82104f7-4b7a-...",2016-07-07,147.199997,2.0,38.400002,17.700001,90.0,133.0,90.0,13.0,"""Never smoked t...",2017,2003-06-12,,"""white""","""M""",0,0,0,0
"""a82104f7-4b7a-...",2017-07-13,155.100006,0.0,46.599998,19.4,89.0,125.0,64.0,13.0,"""Never smoked t...",2018,2003-06-12,,"""white""","""M""",0,0,0,0


In [6]:
es = EventSeq.load_csv(condidion_path)
es.preprocess(maxlength=300)