<a href="https://colab.research.google.com/github/ufbfung/mimic-fhir/blob/main/load_and_profile.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Overview
In this notebook, I will be exploring the mimic IV demo database on FHIR and perhaps create some interesting functions in python for it.

In [1]:
!wget -r -N -c -np https://physionet.org/files/mimic-iv-fhir-demo/2.0/

--2024-01-15 17:19:38--  https://physionet.org/files/mimic-iv-fhir-demo/2.0/
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘physionet.org/files/mimic-iv-fhir-demo/2.0/index.html’

physionet.org/files     [ <=>                ]     584  --.-KB/s    in 0s      

Last-modified header missing -- time-stamps turned off.
2024-01-15 17:19:38 (57.7 MB/s) - ‘physionet.org/files/mimic-iv-fhir-demo/2.0/index.html’ saved [584]

Loading robots.txt; please ignore errors.
--2024-01-15 17:19:38--  https://physionet.org/robots.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 22 [text/plain]
Saving to: ‘physionet.org/robots.txt’


2024-01-15 17:19:38 (6.95 MB/s) - ‘physionet.org/robots.txt’ saved [22/22]

--2024-01-15 17:19:38--  https://physionet.org/files/mimic-iv-fhir-de

In [28]:
# Navigate the file path
base_path = 'physionet.org/files/mimic-iv-fhir-demo/2.0/mimic-fhir'
patient_path = os.path.join(base_path, 'Patient.ndjson')
condition_path = os.path.join(base_path, 'Condition.ndjson')

print(patient_path)

import os

list_of_fhir_files = os.listdir(base_path)

for file in list_of_fhir_files:
    print(file)

physionet.org/files/mimic-iv-fhir-demo/2.0/mimic-fhir/Patient.ndjson
MedicationAdministration.ndjson
Condition.ndjson
ObservationMicroSusc.ndjson
Organization.ndjson
ObservationMicroOrg.ndjson
ObservationDatetimeevents.ndjson
ObservationChartevents.ndjson
ObservationOutputevents.ndjson
MedicationRequest.ndjson
SpecimenLab.ndjson
Medication.ndjson
Specimen.ndjson
Procedure.ndjson
Location.ndjson
EncounterICU.ndjson
ObservationLabevents.ndjson
Encounter.ndjson
ObservationMicroTest.ndjson
MedicationDispense.ndjson
MedicationAdministrationICU.ndjson
Patient.ndjson
ProcedureICU.ndjson
index.html


In [31]:
!pip install fhir.resources
from fhir.resources.patient import Patient
from fhir.resources.observation import Observation
from fhir.resources.R4B.condition import Condition



In [35]:
import json
import pandas as pd
from datetime import datetime

def create_fhir_object(resource_data):
    resource_type = resource_data.get("resourceType")

    if resource_type == "Patient":
        return Patient.parse_obj(resource_data)
    if resource_type == "Condition":
        return Condition.parse_obj(resource_data)
    elif resource_type == "Observation":
        return Observation.parse_obj(resource_data)

def parse_fhir_records(file_path):
    fhir_objects = []

    with open(file_path, 'r') as file:
        for line in file:
            resource_data = json.loads(line)
            fhir_object = create_fhir_object(resource_data)
            fhir_objects.append(fhir_object)

    return fhir_objects

def calculate_age_with_offset(birthdate, offset_years=150):
    today = datetime.now().date()

    # Calculate the difference in years
    age = today.year - birthdate.year - ((today.month, today.day) < (birthdate.month, birthdate.day))

    # Apply the offset
    age_with_offset = age - offset_years
    return age_with_offset

# Usage
patients = parse_fhir_records(patient_path)
conditions = parse_fhir_records(condition_path)

conditions[0]

Condition(resource_type='Condition', fhir_comments=None, id='0002fff8-11c5-5d6d-975a-b926a13bb02b', implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=Meta(resource_type='Meta', fhir_comments=None, extension=None, id=None, lastUpdated=datetime.datetime(2022, 5, 24, 15, 51, 35, 263000, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), lastUpdated__ext=None, profile=['http://fhir.mimic.mit.edu/StructureDefinition/mimic-condition'], profile__ext=None, security=None, source='#CexDBHtjfcg8Ti57', source__ext=None, tag=None, versionId='1', versionId__ext=None), contained=None, extension=None, modifierExtension=None, text=None, abatementAge=None, abatementDateTime=None, abatementDateTime__ext=None, abatementPeriod=None, abatementRange=None, abatementString=None, abatementString__ext=None, asserter=None, bodySite=None, category=[CodeableConcept(resource_type='CodeableConcept', fhir_comments=None, extension=None, id=None, coding=[Coding(reso

In [63]:
from collections import Counter

def profile_conditions(conditions):
    code_counts = Counter()
    patient_conditions = {}
    encounter_conditions = {}

    for condition in conditions:
        # Extract code and display
        code = condition.code.coding[0].code
        display = condition.code.coding[0].display
        code_counts[(code, display)] += 1

        # Count conditions per patient
        patient_id = condition.subject.reference
        patient_conditions.setdefault(patient_id, set()).add(code)

        # Count conditions per encounter
        encounter_id = condition.encounter.reference
        encounter_conditions.setdefault(encounter_id, set()).add(code)

    # Top 10 most prevalent codes along with their display
    top_10_codes = pd.DataFrame(code_counts.most_common(10), columns=['Code_Display', 'Count'])
    top_10_codes[['Code', 'Display']] = pd.DataFrame(top_10_codes['Code_Display'].tolist(), index=top_10_codes.index)

    # Average number of conditions per patient
    avg_conditions_per_patient = sum(len(codes) for codes in patient_conditions.values()) / len(patient_conditions)

     # Number of codes per encounter
    codes_per_encounter = {encounter: len(codes) for encounter, codes in encounter_conditions.items()}

    # Average number of codes per encounter
    avg_codes_per_encounter = sum(codes_per_encounter.values()) / len(codes_per_encounter) if codes_per_encounter else 0

    return top_10_codes[['Code', 'Display', 'Count']], avg_conditions_per_patient, avg_codes_per_encounter

# Example usage:
top_10_codes, avg_conditions_per_patient, avg_codes_per_encounter = profile_conditions(conditions)

In [64]:
print(top_10_codes)
print(avg_conditions_per_patient)
print(avg_codes_per_encounter)

     Code                                            Display  Count
0    4019                 Unspecified essential hypertension     67
1    E785                        Hyperlipidemia, unspecified     54
2    2724               Other and unspecified hyperlipidemia     51
3  Z87891            Personal history of nicotine dependence     35
4   I2510  Atherosclerotic heart disease of native corona...     34
5     I10                   Essential (primary) hypertension     33
6   42731                                Atrial fibrillation     33
7    E039                        Hypothyroidism, unspecified     33
8   25000  Diabetes mellitus without mention of complicat...     32
9    Z794                 Long term (current) use of insulin     32
30.52
15.897338403041825


In [56]:
# Find all conditions related to a specific patient

# Specify the patient
pat_id = 'Patient/b410dd44-7d65-56f9-974f-2751e8aa80e2'

pat_conditions = []

for condition in conditions:
    if condition.subject.reference == pat_id:
        pat_conditions.append(condition)

# Print list of conditions
##count = 1
#for condition in pat_conditions:
#    print("Count: ", count,"\n")
#    print(condition.code.coding[0].display,"\n")
#    print(condition.code.coding[0].code, "\n")
#    print(condition.subject.reference, "\n")
#    count += 1

# Create a list of conditions for that patient
conditions_data = []
for condition in pat_conditions:
    condition_data = {
        'Count': conditions.index(condition) + 1,
        'Display': condition.code.coding[0].display,
        'Code': condition.code.coding[0].code,
        'Subject Reference': condition.subject.reference
    }
    conditions_data.append(condition_data)

# Convert to df
conditions_df = pd.DataFrame(conditions_data)

# Print df
conditions_df.head()

Unnamed: 0,Count,Display,Code,Subject Reference
0,1,Personal history of malignant neoplasm of pros...,Z8546,Patient/b410dd44-7d65-56f9-974f-2751e8aa80e2
1,42,Personal history of hodgkin's disease,V1072,Patient/b410dd44-7d65-56f9-974f-2751e8aa80e2
2,73,Long term (current) use of antithrombotics/ant...,Z7902,Patient/b410dd44-7d65-56f9-974f-2751e8aa80e2
3,105,"Asthma, unspecified type, unspecified",49390,Patient/b410dd44-7d65-56f9-974f-2751e8aa80e2
4,152,Other and unspecified hyperlipidemia,2724,Patient/b410dd44-7d65-56f9-974f-2751e8aa80e2


In [None]:
import pandas as pd
import json

# Function to parse a single patient record from FHIR format
def parse_patient_record(record):
    patient_data = {
        'id': record['id'],
        'gender': record['gender'],
        'birthDate': record['birthDate'],
        'maritalStatus': record.get('maritalStatus', {}).get('coding', [{}])[0].get('code', None),
        'race': None,
        'ethnicity': None,
        'birthSex': None
    }

    # Parsing extensions for race, ethnicity, and birthSex
    for extension in record.get('extension', []):
        if extension['url'] == 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race':
            patient_data['race'] = extension['extension'][0]['valueCoding']['display']
        elif extension['url'] == 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity':
            patient_data['ethnicity'] = extension['extension'][0]['valueCoding']['display']
        elif extension['url'] == 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex':
            patient_data['birthSex'] = extension['valueCode']

    return patient_data

# Function to parse a single observation record
def parse_observation_record(record):
    observation_data = {
        'id': record.get('id', None),
        'status': record.get('status', None),
        'category_code': record.get('category', [{}])[0].get('coding', [{}])[0].get('code', None),
        'test_code': record.get('code', {}).get('coding', [{}])[0].get('code', None),
        'test_name': record.get('code', {}).get('coding', [{}])[0].get('display', None),
        'patient_reference': record.get('subject', {}).get('reference', None),
        'encounter_reference': record.get('encounter', {}).get('reference', None),
        'effectiveDateTime': record.get('effectiveDateTime', None),
        'value': None,
        'unit': None
    }

    # Check if 'valueQuantity' exists before accessing it
    if 'valueQuantity' in record:
        observation_data['value'] = record['valueQuantity'].get('value', None)
        observation_data['unit'] = record['valueQuantity'].get('unit', None)

    return observation_data

def find_observations_for_patient(df, patient_ref):
    """
    Function to find all observation resources for a specific patient.

    Parameters:
    df (DataFrame): The DataFrame containing observation data.
    patient_ref (str): The patient reference to filter by.

    Returns:
    DataFrame: A DataFrame containing only the observations for the specified patient.
    """
    filtered_df = df[df['patient_reference'] == patient_ref]
    return filtered_df

specific_patient_ref = "Patient/9c3ebb7e-d087-519e-bea4-31c3d4aac7ff"
observations_for_patient = find_observations_for_patient(df, specific_patient_ref)

# To view the results
observations_for_patient

# Read the file for patients
patients = []
with open('Patient.ndjson', 'r') as file:
    for line in file:
        json_record = json.loads(line)
        patients.append(parse_patient_record(json_record))

# Reading the file for observations
observations = []
with open('ObservationLabevents.ndjson', 'r') as file:
    for line in file:
        json_record = json.loads(line)
        observations.append(parse_observation_record(json_record))

# Converting both lists to a dataframe
patient_df = pd.DataFrame(patients)
observation_df = pd.DataFrame(observations)

# Using the main web app

# Identify the patient of interest
specific_patient_ref = "Patient/9c3ebb7e-d087-519e-bea4-31c3d4aac7ff"

# Get all observations for that patient
observations_for_patient = find_observations_for_patient(observation_df, specific_patient_ref)

# Goals for turning this into a web app

- Upload the patient and lab files to github
- Organize all the functions into a single section in this colab notebook
- Remove any unnecessary artifacts in the notebook to keep it organized
- Send all the code to chatGPT to convert to streamlit so I can deploy it as a web app