Imports

In [None]:
#Imports
import pandas as pd
import sys
import os
import json

# Add utils path to be able to import custom functions
utils_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'utils'))
if utils_path not in sys.path:
    sys.path.insert(0, utils_path)

from post_processing_utils import standardize_date, create_interactive_patient_timeline, plot_all_interactive_patient_timelines, get_patient_timeline_summary

Data Loading

In [None]:
# Load original data
df = pd.read_csv("../data/data.csv")
print(f"Loaded {len(df)} records")

In [None]:
#Load predictions
predictions = pd.read_json("../outputs/llm_predictions.json")
print(f"Loaded {len(predictions)} predictions")

In [None]:
# Merge predictions with original data
timeline_df = pd.merge(
    predictions, 
    df[['patient_id', 'doc_id', 'document_timestamp']], 
    on='doc_id',
    how='left'
)

In [None]:
#Inspect df
timeline_df.head()

Data Preparation

In [None]:
# Apply the date standardization function
timeline_df['standardized_date'] = timeline_df.apply(standardize_date, axis=1)

In [None]:
#Re-order columns
new_order = ['patient_id', 'doc_id', 'document_timestamp', 'date_id', 'date', 'date_type', 'standardized_date', 'entity_id', 'entity_label', 'entity_preferred_name']
timeline_df = timeline_df[new_order]

In [None]:
# Drop rows where patient_id is missing or date could not be standardized
timeline_df.dropna(subset=['patient_id', 'standardized_date'], inplace=True)

In [None]:
# Convert patient_id to integer for cleaner grouping
timeline_df['patient_id'] = timeline_df['patient_id'].astype(int)

In [None]:
print(f"Processed {len(timeline_df)} events.")
timeline_df.head()

Generate Patient Timelines (Single Patient)

In [None]:
#Set patient id
patient_id = 1

In [None]:
# For a specific patient:
fig = create_interactive_patient_timeline(timeline_df, patient_id=patient_id)
fig.show()

In [None]:
#Save as HTML
fig.write_html(f'../outputs/patient_{patient_id}_timeline.html', include_plotlyjs=True)

In [None]:
# Get a summary for a specific patient
summary = get_patient_timeline_summary(timeline_df, patient_id=patient_id)
print(json.dumps(summary, indent=2))

In [None]:
# Save timeline to a file
with open(f'../outputs/patient_{patient_id}_timeline.json', 'w') as f:
    json.dump(summary, f, indent=2)

Generate Patient Timelines (All Patients)

In [None]:
# For all patients:
figs = plot_all_interactive_patient_timelines(timeline_df)
for fig in figs:
    fig.show()