In [None]:
#----Patient ETL
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
#from tqdm import tqdm
from datetime import datetime, timezone
#import datetime  # module
#from datetime import datetime as dt  # class, aliased to avoid conflict
#from datetime import datetime as tz
#import datetime
import json

# Path to your service account JSON key
key_path = "/Users/toniventura/keys/bq_key.json" 

# Create credentials and BigQuery client
#credentials = service_account.Credentials.from_service_account_file(key_path)
credentials = service_account.Credentials.from_service_account_file(key_path)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "fhir",
    "user": "toniventura",
    "password": "fhir_project"
}
#records =[]

# BigQuery config
#BQ_PROJECT = "your-gcp-project"
BQ_PROJECT = "fhir-synthea-data"
BQ_DATASET = "fhir_curated"
#client = bigquery.Client(project=BQ_PROJECT)
client = bigquery.Client(project="fhir-synthea-data", credentials=credentials)
#client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)
dataset_ref = bigquery.Dataset(f"{BQ_PROJECT}.{BQ_DATASET}")

# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        #cur.execute(f"SELECT * FROM fhir_staging.{table}")
        cur.execute("SELECT * FROM fhir_staging.patients_fhir_raw LIMIT 5;")
        while True:
            rows = cur.fetchmany(batch_size)
            print(f"rows: {rows}")
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Postgres connection or query failed: {e}")
        


# Helper: insert dataframe into BigQuery
def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()  # wait for completion

# Example: Transform & load Patients
def transform_patients(rows):
    #print("transforming data")
    records = []
    print(f"rows: {len(rows)}")
    for r in rows:
        print("inside loop")
        rid, resource = r[1], r[2] # adjust index if needed
        #print(f"rid: {rid}")
        #print(f"resource: {resource}")
        #birth_date = resource.get("birthdate")
        records.append({
            "patient_id": rid,
            "first_name": resource.get("name", [{}])[0].get("given", [""])[0],
            "last_name": resource.get("name", [{}])[0].get("family", ""),
            #"birth_date": datetime.date.fromisoformat(resource.get("birthDate"))
                #if resource.get("birthDate") else None,
            "birth_date": datetime.fromisoformat(resource.get("birthDate"))
                if resource.get("birthDate") else None,
            "gender": resource.get("gender"),
            #"load_timestamp" : datetime.datetime.utcnow()
            "load_timestamp": datetime.now(timezone.utc)
        })
    return pd.DataFrame(records)

# Main ETL loop
def etl_patients():
    try:
        for batch in tqdm(fetch_staged_data("patients_fhir_raw")):
            df = transform_patients(batch)
            if not df.empty:
                insert_to_bq(df, "patients")
                print("***Inserting***")
    except Exception as e:
        print(f"Error in patients ETL: {e}")

if __name__ == "__main__":
    etl_patients()




In [None]:
#---Practitioner ETL
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
from datetime import datetime, timezone
import json
import pprint

# Path to your service account JSON key
key_path = "/Users/toniventura/keys/bq_key.json"

# Create credentials and BigQuery client
credentials = service_account.Credentials.from_service_account_file(key_path)
BQ_PROJECT = "fhir-synthea-data"
BQ_DATASET = "fhir_curated"
client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "fhir",
    "user": "toniventura",
    "password": "fhir_project"
}

# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        cur.execute(f"SELECT * FROM fhir_staging.{table} LIMIT 5;")
        while True:
            rows = cur.fetchmany(batch_size)
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Postgres connection or query failed: {e}")

# Helper: insert dataframe into BigQuery
def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()

# Transform Practitioners
def transform_practitioners(rows):
    records = []
    for r in rows:
        rid, resource = r[1], r[2]  # adjust index if needed

        #Initialize variables so they exist even if not found
        npi = None
        license_number = None
        other_ids = []

    for ident in resource.get("identifier", []):
        # Debugging: show the whole identifier object
        pprint.pprint(ident)

        system = ident.get("system")
        value = ident.get("value")

        print(f"system: {system}")
        print(f"value: {value}")

        if system == "http://hl7.org/fhir/sid/us-npi":
            npi = value
        elif system == "http://example.org/license-number":
            license_number = value
        else:
            other_ids.append(value)


        print("---- Results ----")
        print(f"NPI: {npi}")
        print(f"License Number: {license_number}")
        print(f"Other IDs: {other_ids}")
        name_info = resource.get("name", [{}])[0]

        records.append({
            "practitioner_id": rid,
            "first_name": name_info.get("given", [""])[0],
            "last_name": name_info.get("family", ""),
            "prefix": name_info.get("prefix", [""])[0] if name_info.get("prefix") else None,
            "gender": resource.get("gender"),
            #"birth_date": datetime.fromisoformat(resource.get("birthDate")) if resource.get("birthDate") else None,
            "npi": npi,
            "license_number": license_number,
            "primary_email": next((t.get("value") for t in resource.get("telecom", []) if t.get("system") == "email"), None),
            "primary_phone": next((t.get("value") for t in resource.get("telecom", []) if t.get("system") == "phone"), None),
            "load_timestamp": datetime.now(timezone.utc)
        })
    return pd.DataFrame(records)

# Main ETL loop
def etl_practitioners():
    try:
        for batch in fetch_staged_data("practitioners_fhir_raw"):
            df = transform_practitioners(batch)
            if not df.empty:
                insert_to_bq(df, "practitioners")
                print("***Inserted batch***")
    except Exception as e:
        print(f"Error in practitioners ETL: {e}")

if __name__ == "__main__":
    etl_practitioners()


{'system': 'http://hl7.org/fhir/sid/us-npi', 'value': '9999928192'}
system: http://hl7.org/fhir/sid/us-npi
value: 9999928192
---- Results ----
NPI: 9999928192
License Number: None
Other IDs: []
***Inserted batch***


In [None]:
#---Practitioner Roles ETL
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
from datetime import datetime, timezone
import json
import pprint

# Path to your service account JSON key
key_path = "/Users/toniventura/keys/bq_key.json"

# Create credentials and BigQuery client
credentials = service_account.Credentials.from_service_account_file(key_path)
BQ_PROJECT = "fhir-synthea-data"
BQ_DATASET = "fhir_curated"
client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "fhir",
    "user": "toniventura",
    "password": "fhir_project"
}

def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()

def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        cur.execute(f"SELECT * FROM fhir_staging.{table} LIMIT 5;")
        while True:
            rows = cur.fetchmany(batch_size)
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Postgres connection or query failed: {e}")

# Helper: insert dataframe into BigQuery
def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()

# Transform Practitioners
def transform_practitioner_roles(rows):
    records = []
    for r in rows:
        rid, resource = r[1], r[2]

        # Initialize columns
        specialty_text = resource.get("specialty", {})[0].get("text")
        #specialty_code = resource.get("specialty", {})[0].get("code",[{}][0].get("coding",[{}][0].get("code")))
        specialty_code = resource.get("specialty", [{}])[0].get("coding", [{}])[0].get("code")
        role_text = resource.get("code", [{}])[0].get("text")
        role_code = resource.get("code", [{}])[0].get("coding", [{}])[0].get("code")
        
        print(specialty_code)
        print(specialty_text)
        print(role_text)
        print(role_code)

        

        records.append({
            "practitioner_role_id": rid,
            "practitioner_npi": resource.get("practitioner", {}).get("identifier").get("value"),
            "organization_id": resource.get("organization", {}).get("identifier").get("value"),
            "specialty_code": specialty_code,
            "specialty_text": specialty_text,
            "role_text" : role_text, 
            "role_code": role_code, 
            #                 if resource.get("telecom") and resource["telecom"][0].get("system")=="email" else None,
            #"role_text": resource.get("telecom", [{}])[0].get("value") 
                              #if resource.get("telecom") and resource["telecom"][0].get("system")=="phone" else None,
            "load_timestamp": datetime.now(timezone.utc)
        })

    return pd.DataFrame(records)


# Main ETL loop
def etl_practitioner_roles():
    try:
        for batch in fetch_staged_data("practitioner_roles_fhir_raw"):
            df = transform_practitioner_roles(batch)
            if not df.empty:
                insert_to_bq(df, "practitioner_roles")
                print("***Inserted batch***")
    except Exception as e:
        print(f"Error in practitioner roles ETL: {e}")

if __name__ == "__main__":
    etl_practitioner_roles()


208D00000X
General Practice Physician
General Practice Physician
208D00000X
208D00000X
General Practice Physician
General Practice Physician
208D00000X
208D00000X
General Practice Physician
General Practice Physician
208D00000X
208D00000X
General Practice Physician
General Practice Physician
208D00000X
208D00000X
General Practice Physician
General Practice Physician
208D00000X
***Inserted batch***


In [None]:
#---Observations ETL
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
from datetime import datetime, timezone
import json
import pprint
import dateutil.parser  # optional, for robust ISO parsing

# Path to your service account JSON key
key_path = "/Users/toniventura/keys/bq_key.json"

# Create credentials and BigQuery client
credentials = service_account.Credentials.from_service_account_file(key_path)
BQ_PROJECT = "fhir-synthea-data"
BQ_DATASET = "fhir_curated"
client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "fhir",
    "user": "toniventura",
    "password": "fhir_project"
}

def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()
    print(f"Loaded {job.output_rows} rows to {table_id}")



# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        cur.execute(f"SELECT * FROM fhir_staging.{table} LIMIT 5;")
        while True:
            rows = cur.fetchmany(batch_size)
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Postgres connection or query failed: {e}")

# Transform Practitioners
def transform_observations(rows):
    records = []
    for r in rows:
        rid, resource = r[1], r[2]

        # Initialize columns
        codings  = resource.get("code", {}).get("coding", [])
        code_text = resource.get("code", {}).get("text")

        status = resource.get("status") if codings else None
        system = codings[0].get("system")if codings else None
        code = codings[0].get("code") if codings else None
        
        codings_struct = [
            {
                "system" : c.get("system"), 
                "code" : c.get("code"), 
                "display": c.get("display")
                
            }for c in codings
        ]

        value_numeric = None
        unit = None
        value_text = None
        value_codings = []

        if "valueQuantity" in resource:
            q = resource["valueQuantity"]
            value_numeric = q.get("value")
            unit = q.get("unit")
            # system/code available but usually redundant here

        elif "valueString" in resource:
            value_text = resource["valueString"]

        elif "valueCodeableConcept" in resource:
            cc = resource["valueCodeableConcept"]
            # Store text in value_text for quick querying
            value_text = cc.get("text")
            # Capture codings for full fidelity
            for c in cc.get("coding", []):
                value_codings.append({
                "system": c.get("system"),
                "code": c.get("code"),
                "display": c.get("display")
             })

        elif "valueDateTime" in resource:
            value_text = resource["valueDateTime"]  # or a dedicated column

        elif "valuePeriod" in resource:
            value_text = json.dumps(resource["valuePeriod"])  # or expand into start/end columns

        patient_id_ref = resource.get("subject", {}).get("reference")
        patient_id = patient_id_ref.split(":")[-1]

        encounter_id_ref = resource.get("encounter",{}).get("reference")
        encounter_id = encounter_id_ref.split(":")[-1]

        effective_date_str = resource.get("effectiveDateTime")
        effective_date = None

        if effective_date_str:
            effective_datetime = dateutil.parser.isoparse(effective_date_str)
        
        records.append({
            "observation_id": rid,
            "status" : status,
            "obs_code": code,
            "system": system,
            "obs_code_text": code_text,
            "codings": codings_struct,
            "value_numeric": value_numeric, 
            "value_text" : value_text,
            "unit" : unit,
            "value_codings" : value_codings, 
            "patient_id" : patient_id,
            "encounter_id" : encounter_id,
            "effective_datetime": effective_date, #not inserting
            "load_timestamp": datetime.now(timezone.utc) 
        })

    return pd.DataFrame(records)


# Main ETL loop
def etl_observations():
    try:
        for batch in fetch_staged_data("observations_fhir_raw"):
            df = transform_observations(batch)
            if not df.empty:
                insert_to_bq(df, "observations")
                print("***Inserted batch***")
    except Exception as e:
        print(f"Error in observations ETL: {e}")

if __name__ == "__main__":
    etl_observations()


Loaded 5 rows to fhir-synthea-data.fhir_curated.observations
***Inserted batch***


In [21]:
#---Conditions ETL
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
from datetime import datetime, timezone
import json
import pprint
import dateutil.parser  # optional, for robust ISO parsing

# Path to your service account JSON key
key_path = "/Users/toniventura/keys/bq_key.json"

# Create credentials and BigQuery client
credentials = service_account.Credentials.from_service_account_file(key_path)
BQ_PROJECT = "fhir-synthea-data"
BQ_DATASET = "fhir_curated"
client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "fhir",
    "user": "toniventura",
    "password": "fhir_project"
}

def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()
    print(f"Loaded {job.output_rows} rows to {table_id}")



# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        cur.execute(f"SELECT * FROM fhir_staging.{table} LIMIT 5;")
        while True:
            rows = cur.fetchmany(batch_size)
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Postgres connection or query failed: {e}")

# Transform Practitioners
def transform_conditions(rows):
    records = []
    for r in rows:
        rid, resource = r[1], r[2]

        # Initialize columns
        codings  = resource.get("code", {}).get("coding", [])
        code_text = resource.get("code", {}).get("text")

        status = resource.get("status") if codings else None
        system = codings[0].get("system")if codings else None
        code = codings[0].get("code") if codings else None
        
        codings_struct = [
            {
                "system" : c.get("system"), 
                "code" : c.get("code"), 
                "display": c.get("display")
                
            }for c in codings
        ]

        category = None
        category_text = None

        if resource.get("category"):
            first_category = resource["category"][0]["coding"][0]  # first category -> first coding
            category = first_category.get("code")
            category_text = first_category.get("display")
        else:
            category = None
            category_text = None

        # Nested array for full fidelity
        category_codings = []
        for cat in resource.get("category", []):
            for coding in cat.get("coding", []):
                category_codings.append({
                "system": coding.get("system"),
                "code": coding.get("code"),
                "display": coding.get("display")
             })

    
        patient_id_ref = resource.get("subject", {}).get("reference")
        patient_id = patient_id_ref.split(":")[-1]

        encounter_id_ref = resource.get("encounter",{}).get("reference")
        encounter_id = encounter_id_ref.split(":")[-1]

        onset_date_str = resource.get("effectiveDateTime")
        onset_date_time = None

        if onset_date_str:
            onset_date_time = dateutil.parser.isoparse(onset_date_str)
        
        records.append({
            "condition_id": rid,
            "clinical_status" : status,
            "code": code,
            "code_system": system,
            "code_text": code_text,
            "codings": codings_struct,
            "category_code": category,
            "category": category_text,
            "category_codings": category_codings,
            "patient_id" : patient_id,
            "encounter_id" : encounter_id,
            "onset_date": onset_date_time, #not inserting
            "load_timestamp": datetime.now(timezone.utc) 
        })

    return pd.DataFrame(records)


# Main ETL loop
def etl_conditions():
    try:
        for batch in fetch_staged_data("conditions_fhir_raw"):
            df = transform_conditions(batch)
            if not df.empty:
                insert_to_bq(df, "conditions")
                print("***Inserted batch***")
    except Exception as e:
        print(f"Error in conditions ETL: {e}")

if __name__ == "__main__":
    etl_conditions()


Loaded 5 rows to fhir-synthea-data.fhir_curated.conditions
***Inserted batch***


In [27]:
#---Claims ETL
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
from datetime import datetime, timezone
import json
import pprint
import dateutil.parser  # optional, for robust ISO parsing

# Path to your service account JSON key
key_path = "/Users/toniventura/keys/bq_key.json"

# Create credentials and BigQuery client
credentials = service_account.Credentials.from_service_account_file(key_path)
BQ_PROJECT = "fhir-synthea-data"
BQ_DATASET = "fhir_curated"
client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "fhir",
    "user": "toniventura",
    "password": "fhir_project"
}

def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()
    print(f"Loaded {job.output_rows} rows to {table_id}")



# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        cur.execute(f"SELECT * FROM fhir_staging.{table} LIMIT 5;")
        while True:
            rows = cur.fetchmany(batch_size)
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Postgres connection or query failed: {e}")

# Transform Practitioners
def transform_claims(rows):
    records = []
    for r in rows:
        rid, resource = r[1], r[2]

        use = resource.get("use")
        print(f"use: {use}")

        status = resource.get("status")
        print(f"status: {status}")

        patient_id = resource.get("patient", {}).get("reference").split(":")[-1]
        print(f"patient_id: {patient_id}")

        type_info = resource.get("type", {}).get("code")
        type_info = type_info[0] if type_info else None

        print(f"type_info: {type_info}")

        total_value = resource.get("total",{}).get("value")
        total_currency = resource.get("total", {}).get("currency")

        billable_start = resource.get("billable_period", {}).get("start")
        billable_end = resource.get("billable_period", {}).get("end")

        date_created = resource.get("created")

        billing_provider = []
        provider_reference = resource.get("provider", {}).get("reference")
        provider_reference_id = provider_reference.split("|")[-1]
        provider_reference_type = provider_reference.split("?")[0]
        provider_display = resource.get("provider", {}).get("display")
        billing_provider.append({
            "provider_reference_id": provider_reference_id,
            "provider_reference_type": provider_reference_type,
            "provider_display": provider_display 
        })

        priority_code = (resource.get("priority", {}).get("code") or [None])[0]

        facility_json = resource.get("facility")
        facility = []
        facility_reference_complete = facility_json.get("reference")
        facility_reference = facility_reference_complete.split("|")[0]
        facility_id = facility_reference_complete.split("|")[-1]
    
        facility.append({
            "facility_reference": facility_reference,
            #"facility_reference": facility_json.get("reference"),
            "facility_id": facility_id,
            "facility_display" : facility_json.get("display")   
        })

        insurances = resource.get("insurance",[{}])
        all_insurances = []

        for insurance in insurances:
            print(insurance.get("sequence"))
            print(insurance.get("focal"))
            print(insurance.get("coverage"))
            all_insurances.append({
                "sequence": insurance.get("sequence"),
                "focal": insurance.get("focal"),
                "coverage": insurance.get("coverage", {}).get("display")
            })
        
        
        items = resource.get("items", [{}])
        all_items = []

        for item in items:
            sequence = "None"
            sequence = item.get("sequence")
            item_type = "None"
            diagnosis_seq = item.get("diagnosisSequence")
            information_seq = item.get("informationSequence")
            procedure_seq = item.get("procedureSequence")
            productOrService = item.get("productOrService")
            system = "None"
            code = "None"
            display = "None"
            coding = productOrService.get("coding",[0])
            system = coding.get("system")
            code = coding.get("code")
            display = coding.get("display")
            service_period = item.get("servicePeriod",{})
            start = service_period.get("start")
            end = service_period.get("end")

            if diagnosis_seq:
                item_type = "diagnosis sequence"  
            if information_seq:
                item_type = "information sequence"
            if procedure_seq:
                item_type = "procedure sequence"  

            location = []

            location_coding = item.get("locationCodeableConcept").get("coding",[0])
            location_system = location_coding.get("system")
            location_code = location_coding.get("code")
            location_display = location_coding.get("display")

            location.append({
                "facility_id": facility_id,
                "system": location_system,
                "code": location_code,
                "display": location_display
            })

            encounter = item.get("encounter",[0]).get("reference").split(":")[-1]

            net = item.get("net",{})
            net_value = net.get("value")
            net_currency = net.get("currency")

            item_text =item.get("text")

            all_items.append({
                "sequence": sequence,
                "item_type": item_type,
                "system": system,
                "code": code,
                "display": display,
                "service_start":start,
                "service_end": end,
                "net_value": net_value,
                "net_currency": net_currency,
                "location": location,
                "encounter": encounter,
                "start_period": start,
                "end_period": end,
                "item_text": item_text
            })

        # Initialize columns
        all_diagnosis = []
        diagnoses = resource.get("diagnosis",[])

        for diagnosis in diagnoses:
            sequence = diagnosis.get("sequence")
            diagnosis_id = diagnosis.get("diagnosisReference").get("reference").split(":")[-1] #conditions
            all_diagnosis.append({
                "sequence": sequence,
                "diagnosis": "diagnosis_id"    
            })

        records.append({
            "claim_id": rid,
            "status" : status,
            "use": use,
            "status": status,
            "patient_id": patient_id,
            "claim_type_info": type_info,
            "total_value": total_value,
            "total_currency": total_currency,
            "billable_start": billable_start,
            "billable_end" : billable_end,
            "created": date_created,
            "billing_provider" : billing_provider,
            "priority_code": priority_code,
            "date_created": date_created,
            "facility" : facility,
            "all_insurances": all_insurances,
            "diagnoses": all_diagnosis,
            "items":all_items,
            "load_timestamp": datetime.now(timezone.utc) 
        })

    return pd.DataFrame(records)


# Main ETL loop
def etl_claims():
    try:
        for batch in fetch_staged_data("claims_fhir_raw"):
            df = transform_claims(batch)
            if not df.empty:
                insert_to_bq(df, "claims")
                print("***Inserted batch***")
    except Exception as e:
        print(f"Error in claims ETL: {e}")

if __name__ == "__main__":
    etl_claims()


use: claim
status: active
patient_id: 9cfbd2b4-5eff-91db-1a93-777a9b86738f
type_info: None
Error in claims ETL: 'NoneType' object has no attribute 'get'
