In [28]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection string
engine = create_engine('postgresql://uvg_user:uvg_password@db:5432/health_data')

# Simple query to test the foundation layer
try:
    df = pd.read_sql("SELECT 1 as connection_status", engine)
    print("Connection Successful! Your Biomedical Data Stack is ready.")
    print(df)
except Exception as e:
    print(f"Connection Failed: {e}")
    

Connection Successful! Your Biomedical Data Stack is ready.
   connection_status
0                  1


In [30]:
from sqlalchemy import text
with open('sql/002_ehr_schema.sql', 'r') as f:
    sql_script = f.read()
with engine.connect() as conn:
    conn.execute(text("DROP TABLE IF EXISTS labevents, d_labitems, diagnoses, admissions, patients CASCADE;"))
    conn.execute(text(sql_script))
    conn.commit()
    print("¡Base de datos configurada y datos insertados con éxito!")

¡Base de datos configurada y datos insertados con éxito!


In [31]:
pd.read_sql("""
SELECT
  p.subject_id,
  p.external_id,
  p.full_name,
  a.hadm_id,
  a.admission_type,
  a.admittime,
  a.dischtime,
  a.hospital_expire_flag
FROM patients p
JOIN admissions a ON a.subject_id = p.subject_id
ORDER BY a.admittime;
""", engine)

Unnamed: 0,subject_id,external_id,full_name,hadm_id,admission_type,admittime,dischtime,hospital_expire_flag
0,1,MRN-0001,Ana López,1,Emergency,2101-01-10 08:00:00,2101-01-15 14:00:00,False
1,6,MRN-0006,Lucía Herrera,7,Emergency,2101-02-05 09:30:00,2101-02-08 10:00:00,False
2,2,MRN-0002,Carlos Pérez,3,Emergency,2101-03-20 22:00:00,2101-03-28 10:00:00,True
3,7,MRN-0007,Miguel Castillo,9,Emergency,2101-05-14 07:00:00,2101-05-20 15:00:00,False
4,3,MRN-0003,María Gómez,4,Urgent,2101-07-11 13:00:00,2101-07-14 11:00:00,False
5,8,MRN-0008,Sofía Morales,10,Emergency,2101-08-21 20:00:00,2101-08-24 09:00:00,False
6,4,MRN-0004,José Martínez,5,Emergency,2101-09-02 06:00:00,2101-09-10 15:00:00,False
7,6,MRN-0006,Lucía Herrera,8,Urgent,2101-11-01 16:00:00,2101-11-04 12:00:00,False
8,5,MRN-0005,Alex Rivera,6,Emergency,2101-12-18 19:00:00,2101-12-22 08:00:00,False
9,1,MRN-0001,Ana López,2,Elective,2102-06-01 10:00:00,2102-06-05 09:00:00,False


In [32]:
query_5_1 = """
SELECT 
    p.external_id, 
    p.full_name, 
    COUNT(*) AS n_admissions
FROM patients p
JOIN admissions a ON p.subject_id = a.subject_id
GROUP BY p.subject_id, p.external_id, p.full_name
HAVING COUNT(*) > 1;
"""

df_frequent_flyers = pd.read_sql(query_5_1, engine)
df_frequent_flyers

Unnamed: 0,external_id,full_name,n_admissions
0,MRN-0006,Lucía Herrera,2
1,MRN-0001,Ana López,2


In [33]:
query_5_2 = """
SELECT 
    a.hadm_id, 
    p.external_id, 
    p.full_name, 
    EXTRACT(EPOCH FROM (a.dischtime - a.admittime)) / 86400.0 AS length_of_stay_days
FROM patients p
JOIN admissions a ON p.subject_id = a.subject_id
ORDER BY length_of_stay_days DESC;
"""

df_los = pd.read_sql(query_5_2, engine)
df_los

Unnamed: 0,hadm_id,external_id,full_name,length_of_stay_days
0,5,MRN-0004,José Martínez,8.375
1,3,MRN-0002,Carlos Pérez,7.5
2,9,MRN-0007,Miguel Castillo,6.333333
3,1,MRN-0001,Ana López,5.25
4,2,MRN-0001,Ana López,3.958333
5,6,MRN-0005,Alex Rivera,3.541667
6,7,MRN-0006,Lucía Herrera,3.020833
7,4,MRN-0003,María Gómez,2.916667
8,8,MRN-0006,Lucía Herrera,2.833333
9,10,MRN-0008,Sofía Morales,2.541667


In [34]:
query_5_3 = """
SELECT 
    a.hadm_id, 
    p.external_id, 
    p.full_name, 
    MAX(l.value_num) AS max_creatinine
FROM patients p
JOIN admissions a ON p.subject_id = a.subject_id
JOIN labevents l ON a.hadm_id = l.hadm_id
JOIN d_labitems d ON l.labitem_id = d.labitem_id
WHERE d.label = 'Creatinine'
GROUP BY a.hadm_id, p.external_id, p.full_name;
"""

df_creatinine = pd.read_sql(query_5_3, engine)
df_creatinine

Unnamed: 0,hadm_id,external_id,full_name,max_creatinine
0,1,MRN-0001,Ana López,1.6
1,3,MRN-0002,Carlos Pérez,3.1
2,7,MRN-0006,Lucía Herrera,0.9
3,8,MRN-0006,Lucía Herrera,1.4
4,9,MRN-0007,Miguel Castillo,1.8


In [35]:
query_5_4 = """
SELECT 
    a.hadm_id, 
    p.external_id, 
    p.full_name, 
    MAX(CASE WHEN d.label = 'White Blood Cells' THEN l.value_num END) AS max_wbc,
    MAX(CASE WHEN d.label = 'Lactate' THEN l.value_num END) AS max_lactate
FROM patients p
JOIN admissions a ON p.subject_id = a.subject_id
JOIN labevents l ON a.hadm_id = l.hadm_id
JOIN d_labitems d ON l.labitem_id = d.labitem_id
GROUP BY a.hadm_id, p.external_id, p.full_name
HAVING 
    MAX(CASE WHEN d.label = 'White Blood Cells' THEN l.value_num END) > 12 
    OR 
    MAX(CASE WHEN d.label = 'Lactate' THEN l.value_num END) > 2;
"""

df_sepsis_proxy = pd.read_sql(query_5_4, engine)
df_sepsis_proxy

Unnamed: 0,hadm_id,external_id,full_name,max_wbc,max_lactate
0,5,MRN-0004,José Martínez,14.2,2.2
1,6,MRN-0005,Alex Rivera,12.5,
2,3,MRN-0002,Carlos Pérez,18.4,3.8
3,10,MRN-0008,Sofía Morales,13.0,
