# Acute Leukemia 30-Day Readmission
Demo: train and predict using `src/readmit_pipeline.py` on sample data.

In [None]:
import os, sys, json
import pandas as pd
base = os.path.abspath('..') if os.path.basename(os.getcwd())=='notebooks' else os.path.abspath('.')
src_path = os.path.join(base,'src')
if src_path not in sys.path:
    sys.path.append(src_path)
from readmit_pipeline import *
data_dir = os.path.join(base,'data','sample')
structured_csv = os.path.join(data_dir,'structured.csv')
notes_csv = os.path.join(data_dir,'notes.csv')
structured_csv, notes_csv

In [2]:
pd.read_csv(structured_csv).head()

Unnamed: 0,patient_id,discharge_date,readmit_30d,anc,wbc,hemoglobin,platelets,creatinine,alt,ast,...,prior_30d_admits,transfusion_count,infection_count,age,sex,race,ethnicity,insurance,chemo_regimen,discharge_dayofweek
0,P001,2025-06-01,1,200,12.1,8.2,45,1.1,30,28,...,1,2,1,63,M,White,Non-Hispanic,Medicare,7+3,Mon
1,P002,2025-06-03,0,900,6.4,9.9,120,0.8,22,20,...,0,0,0,55,F,White,Non-Hispanic,Commercial,Consolidation,Wed
2,P003,2025-06-05,1,300,10.2,7.1,38,1.2,40,33,...,2,3,1,71,M,Black,Non-Hispanic,Medicare,Hi-DAC,Fri
3,P004,2025-06-06,0,1200,5.8,10.4,150,0.9,18,19,...,0,0,0,48,F,Asian,Non-Hispanic,Commercial,Maintenance,Thu
4,P005,2025-06-08,0,800,7.3,9.2,110,1.0,25,21,...,0,1,0,60,M,Other,Hispanic,Medicaid,Consolidation,Tue


In [3]:
pd.read_csv(notes_csv).head()

Unnamed: 0,patient_id,note_text
0,P001,Discharged home with services. MRD positive. B...
1,P002,Discharged home. MRD negative. Blasts 1%. Good...
2,P003,Discharged home. Sepsis treated; mucositis not...
3,P004,Discharged home. No complications. MRD negativ...
4,P005,Discharged home. MRD unknown. Low intensity th...


In [11]:
df, y = build_dataset(structured_csv, notes_csv)

In [12]:
df

Unnamed: 0,patient_id,discharge_date,anc,wbc,hemoglobin,platelets,creatinine,alt,ast,los_days,...,neutropenic_fever,sepsis,mucositis,central_line,discharge_disposition,chemo_intensity,planned_readmission,followup_within_7d,social_support,med_nonadherence
0,P001,2025-06-01,200,12.1,8.2,45,1.1,30,28,9,...,1,0,0,1,home_with_services,unknown,0,1,unknown,0
1,P002,2025-06-03,900,6.4,9.9,120,0.8,22,20,7,...,0,0,0,0,home,standard,0,0,strong,0
2,P003,2025-06-05,300,10.2,7.1,38,1.2,40,33,12,...,0,1,1,1,home,unknown,1,0,unknown,0
3,P004,2025-06-06,1200,5.8,10.4,150,0.9,18,19,6,...,0,0,0,0,home,standard,0,0,strong,0
4,P005,2025-06-08,800,7.3,9.2,110,1.0,25,21,8,...,0,0,0,0,home,low,0,0,limited,0


In [4]:
results = train_and_save(structured_csv, notes_csv)
results['metadata']['cv_stats']

== Cross-validation ==
[Fold 1] AUROC=1.000 AUPRC=1.000 Brier=0.064
[Fold 2] AUROC=1.000 AUPRC=1.000 Brier=0.036
{
  "cv_auroc_mean": 1.0,
  "cv_auroc_std": 0.0,
  "cv_auprc_mean": 1.0,
  "cv_auprc_std": 0.0,
  "cv_brier_mean": 0.04975140755777983,
  "cv_brier_std": 0.014247655456163168,
  "suggested_threshold": 0.8437016670987321
}
[OK] Saved model to ./artifacts\model.joblib


{'cv_auroc_mean': 1.0,
 'cv_auroc_std': 0.0,
 'cv_auprc_mean': 1.0,
 'cv_auprc_std': 0.0,
 'cv_brier_mean': 0.04975140755777983,
 'cv_brier_std': 0.014247655456163168,
 'suggested_threshold': 0.8437016670987321}

In [5]:
example = {
  'structured': {
    'anc': 250,
    'wbc': 11.2,
    'hemoglobin': 7.8,
    'platelets': 32,
    'creatinine': 1.3,
    'alt': 36,
    'ast': 30,
    'los_days': 11,
    'prior_30d_admits': 1,
    'transfusion_count': 3,
    'infection_count': 1,
    'age': 66,
    'sex': 'F',
    'race': 'White',
    'ethnicity': 'Non-Hispanic',
    'insurance': 'Medicare',
    'chemo_regimen': '7+3',
    'discharge_dayofweek': 'Thu'
  },
  'note_text': 'Discharged home. MRD positive. Blasts 8%. Neutropenic fever and mucositis. Port-a-cath in place. Follow-up within 1 week planned.'
}
predict_one(example['structured'], example['note_text'])

{'prob_readmit_30d': 0.9614119934924037,
 'label_at_thr': 1,
 'threshold': 0.8437016670987321}