# HDL Cholesterol Visualization (Jupyter Notebook)

This notebook demonstrates:
1. Generating FHIR data for HDL cholesterol observations using `fhir_utils`.
2. Flattening the FHIR data into a pandas DataFrame.
3. Visualizing the HDL values over time using Bokeh.

In [None]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import DatetimeTickFormatter
from bokeh.io import output_notebook
from datetime import datetime, timedelta, date # Added date for consistency
import json

# Assuming Jupyter is run from the project root, or src is in PYTHONPATH
import sys
import os
# Add project root to path to allow import from src, if not already discoverable
if os.path.abspath(os.path.join(os.getcwd(), os.pardir)) not in sys.path:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))

from src.medical_data_science.fhir_utils import (
    create_patient_hdl_observation,
    flatten_fhir_bundle,
)

In [None]:
# Cell 2: Initialize Bokeh for Notebook
output_notebook()

In [None]:
# Cell 3: Data Generation

# Define Patient Data
patient_id = "patient-jupyter-01"
patient_family_name = "Jenson"
patient_given_name = "Jupyter"
patient_birth_date_str = "1975-08-20"  # YYYY-MM-DD
patient_gender = "male"

# Define HDL Observations Data
hdl_observations_data = [
    {
        "value": 50.0,
        "effective_date_time": "2019-11-01T09:00:00Z",
        "observation_id": "hdl-obs-jupyter-01",
    },
    {
        "value": 53.5,
        "effective_date_time": "2020-12-05T09:30:00Z",
        "observation_id": "hdl-obs-jupyter-02",
    },
    {
        "value": 48.0,
        "effective_date_time": "2021-10-15T08:45:00Z",
        "observation_id": "hdl-obs-jupyter-03",
    },
    {
        "value": 51.2,
        "effective_date_time": "2022-11-20T10:00:00Z",
        "observation_id": "hdl-obs-jupyter-04",
    },
]

# Generate FHIR Bundle
print("Generating FHIR Bundle...")
fhir_bundle = create_patient_hdl_observation(
    patient_id=patient_id,
    patient_family_name=patient_family_name,
    patient_given_name=patient_given_name,
    patient_birth_date=patient_birth_date_str,
    patient_gender=patient_gender,
    hdl_observations=hdl_observations_data,
)
print("FHIR Bundle generated.")

# Convert bundle to JSON dictionary
bundle_json = fhir_bundle.as_json()

# Flatten Bundle to DataFrame
print("Flattening FHIR Bundle to DataFrame...")
df = flatten_fhir_bundle(bundle_json)
print("DataFrame generated.")

In [None]:
# Cell 4: Data Inspection (Optional)

print("--- Flattened DataFrame Head ---")
if df is not None and not df.empty:
    print(df.head().to_string())
else:
    print("DataFrame is empty or None.")

# Optionally, inspect parts of the bundle_json
# print("\n--- Sample of Bundle JSON ---")
# print(json.dumps({k: bundle_json[k] for k in list(bundle_json)[:3]}, indent=2)) # Print first 3 keys

In [None]:
# Cell 5: Data Preparation for Plotting

if df is not None and not df.empty:
    # Ensure 'Observation_effectiveDateTime' is in datetime format
    if 'Observation_effectiveDateTime' in df.columns:
        df['Observation_effectiveDateTime'] = pd.to_datetime(df['Observation_effectiveDateTime'])
        print("'Observation_effectiveDateTime' converted to datetime.")
    else:
        print("Warning: 'Observation_effectiveDateTime' column not found.")

    # Ensure 'Observation_valueQuantity_value' is numeric (it should be from fhir_utils)
    if 'Observation_valueQuantity_value' in df.columns:
        df['Observation_valueQuantity_value'] = pd.to_numeric(df['Observation_valueQuantity_value'])
        print("'Observation_valueQuantity_value' ensured as numeric.")
    else:
        print("Warning: 'Observation_valueQuantity_value' column not found.")
    
    # Display prepared data types (optional)
    print("\nData types after preparation:")
    if 'Observation_effectiveDateTime' in df.columns and 'Observation_valueQuantity_value' in df.columns:
        print(df[['Observation_effectiveDateTime', 'Observation_valueQuantity_value']].dtypes)
    elif 'Observation_effectiveDateTime' in df.columns: 
        print(df[['Observation_effectiveDateTime']].dtypes)
    elif 'Observation_valueQuantity_value' in df.columns:
        print(df[['Observation_valueQuantity_value']].dtypes)
else:
    print("DataFrame is empty or None, skipping data preparation.")

In [None]:
# Cell 6: Bokeh Plotting

if df is not None and not df.empty and \
   'Observation_effectiveDateTime' in df.columns and \
   'Observation_valueQuantity_value' in df.columns and \
   pd.api.types.is_datetime64_any_dtype(df['Observation_effectiveDateTime']) and \
   pd.api.types.is_numeric_dtype(df['Observation_valueQuantity_value']):

    p = figure(
        x_axis_type="datetime",
        title="HDL Cholesterol Over Time (Jupyter)",
        height=350,
        width=800,
        x_axis_label='Date',
        y_axis_label='HDL (mg/dL)'
    )

    # Add a line and circle glyph
    p.line(
        x=df['Observation_effectiveDateTime'],
        y=df['Observation_valueQuantity_value'],
        legend_label="HDL",
        line_width=2
    )
    p.circle(
        x=df['Observation_effectiveDateTime'],
        y=df['Observation_valueQuantity_value'],
        legend_label="HDL", # Usually only need one legend entry for combined glyphs
        fill_color="white",
        size=8
    )

    # Format the x-axis to display dates nicely
    p.xaxis.formatter = DatetimeTickFormatter(
        days="%Y-%m-%d",
        months="%Y-%m",
        years="%Y"
    )
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"

    # Customize plot appearance (optional)
    p.grid.grid_line_alpha = 0.3
    p.legend.location = "top_left"

    # Show the plot
    show(p)
else:
    print("Could not generate plot. Please check if the DataFrame `df` is correctly populated and prepared, ")
    print("and if 'Observation_effectiveDateTime' and 'Observation_valueQuantity_value' columns are present ")
    print("with the correct data types.")