# eICU Database Exploration

This notebook explores the eICU-CRD database structure and content.

**Objective:** Understand the database schema, table relationships, and data quality for federated learning experiments.

In [None]:
import duckdb
import pandas as pd
import numpy as np
import altair as alt

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

## Connect to Database

In [None]:
# Connect to DuckDB database
db_path = '../../data/duckdb/fedlearn.duckdb'
conn = duckdb.connect(db_path, read_only=True)

print(f"Connected to: {db_path}")

## Database Schema Overview

In [None]:
# List all tables
tables = conn.execute("SHOW TABLES").fetchdf()
print(f"Number of tables: {len(tables)}")
print("\nAvailable tables:")
tables

In [None]:
# Get row counts for each table
def get_table_stats(conn, table_name):
    count = conn.execute(f"SELECT COUNT(*) as count FROM {table_name}").fetchone()[0]
    return count

table_stats = []
for table in tables['name']:
    count = get_table_stats(conn, table)
    table_stats.append({'table': table, 'row_count': count})

stats_df = pd.DataFrame(table_stats).sort_values('row_count', ascending=False)
stats_df

## Explore Patient Table

In [None]:
# Check if patient table exists (or similar)
# Adjust table name based on what you see above
patient_query = """
SELECT * FROM patient 
LIMIT 5
"""

try:
    patients_sample = conn.execute(patient_query).fetchdf()
    print("Patient table sample:")
    display(patients_sample)
    
    print("\nColumn data types:")
    print(patients_sample.dtypes)
except Exception as e:
    print(f"Error: {e}")
    print("Adjust table name based on SHOW TABLES output above")

## Summary Statistics

In [None]:
# Add your analysis here
# Example: Distribution of patients by hospital, demographics, etc.

## Next Steps

1. Identify key tables for federated learning
2. Check data quality and missing values
3. Understand hospital distribution for federated clients
4. Define target variable for prediction task

In [None]:
# Close connection when done
conn.close()