In [2]:
#----DB config, fetch and insert
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
#from tqdm import tqdm
from datetime import datetime, timezone
#import datetime  # module
#from datetime import datetime as dt  # class, aliased to avoid conflict
#from datetime import datetime as tz
#import datetime
import pprint
import dateutil.parser
import json

# Path to your service account JSON key
#key_path = "/Users/toniventura/keys/bq_key.json" 
key_path = "C:\\Users\\tonim\\keys\\bq_key.json"

# Create credentials and BigQuery client
#credentials = service_account.Credentials.from_service_account_file(key_path)
credentials = service_account.Credentials.from_service_account_file(key_path)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    #"database": "fhir",
    "database" : "FHIR_staging",
    #"user": "toniventura",
    "user": "postgres",
    #"password": "fhir_project"
    "password": "new_password"
}
#records =[]

# BigQuery config
#BQ_PROJECT = "your-gcp-project"
BQ_PROJECT = "fhir-synthea-data"
#BQ_DATASET = "fhir_curated"
BQ_DATASET = "fhir_curated"
#client = bigquery.Client(project=BQ_PROJECT)
client = bigquery.Client(project="fhir-synthea-data", credentials=credentials)
#client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)
dataset_ref = bigquery.Dataset(f"{BQ_PROJECT}.{BQ_DATASET}")

# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        #cur.execute(f"SELECT * FROM fhir_staging.{table}")
        #cur.execute("SELECT * FROM fhir_staging.patients_fhir_raw LIMIT 5;")
        cur.execute(f"SELECT * FROM fhir_staging_sample.{table}")
        #cur.execute(f"SELECT * FROM fhir_staging_sample.{table};")
        while True:
            rows = cur.fetchmany(batch_size)
            logging.info(f"rows: {rows}")
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
       logging.info(f"Postgres connection or query failed: {e}")
        


# Helper: insert dataframe into BigQuery
def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()  # wait for completion


def safe_bq_timestamp(dt):
    if not dt:
        return None
    return dt.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")






In [None]:
#ETL for EOB's 

# Transform Encounters
def transform_eob_bq(rows):
    records = []
    for r in rows:
        rid, resource = r[1], r[2]
        print("Transforming eob's ")
        # --- Basic fields ---
        #patient_ref = resource.get("subject", {}).get("reference").split(":")[-1]  # e.g., "Patient/123"
        #org_ref = resource.get("serviceProvider", {}).get("reference").split("|")[-1]  # e.g., "Organization|456"
        #encounter_class = resource.get("class",{}).get("code")
        #encounter_status = resource.get("status")
        #start = resource.get("period", {}).get("start")
        #end = resource.get("period", {}).get("end")

        eob_id = resource.get("eob_id")
        status = resource.get("status")
        type_code = resource.get("type_code")
        type_system = resource.get("type_system")
        type_display = resource.get("type_display")
        use = resource.get("use")
        patient_id = resource.get("subject").get("reference").split(":")[-1]
        coverage_resource = resource.get("resourceType",{}).get("Coverage")
        billablePeriod = coverage_resource.get("billablePeriod",{})
        billable_start = billablePeriod.get("start")
        logging.info(f"billable_start: {billable_start}")
        billable_end = billablePeriod.get("end")
        logging.info(f"billable_end: {billable_end}")
        #print(billable_end)
        create_date = coverage_resource.get("created")
        insurer = coverage_resource.get("insurer").get("display")
        provider = coverage_resource.get("provider").get("reference")
        referral_reference = coverage_resource.get("referral").get("reference")
        facility = coverage_resource.get("facility").get("display")
        outcome = coverage_resource.get("outcome")
        load_timestamp = safe_bq_timestamp(datetime.now(timezone.utc))

        coverage_resource.get("Items")

        # Optional: parse timestamps safely
        '''eob_id STRING NOT NULL,
        sequence INT64,
        diagnosis_sequence ARRAY<INT64>,
        category_system STRING,
        category_code STRING,
        category_display STRING,
        product_system STRING,
        product_code STRING,
        product_display STRING,
        product_text STRING,
        service_start TIMESTAMP,
        service_end TIMESTAMP,
        location_system STRING,
        location_code STRING,
        location_display STRING,
        encounter STRING,
        net_value FLOAT64,
        net_currency STRING,
        adjudication ARRAY<STRUCT<
            code STRING,
            display STRING,
            value FLOAT64,
            currency STRING
        >>'''



        records.append({
            "encounter_id": rid,
            "load_timestamp": safe_bq_timestamp(datetime.now(timezone.utc))
        })
    return records

# Main ETL loop
def etl_EOBs():
    #print("ETL EOBS")
    try:
        for batch in fetch_staged_data("explanationofbenefits_fhir_raw"):
            #table_id = "fhir-synthea-data.fhir_curated.eob"
            table_id = "fhir-synthea-data.fhir_curated_sample.eobs"

            job_config = bigquery.LoadJobConfig(
                schema=[
                    bigquery.SchemaField("eob_id", "STRING", mode="REQUIRED"),
                    #bigquery.SchemaField("patient_id", "STRING"),
                    #bigquery.SchemaField("organization_id", "STRING"),
                    #bigquery.SchemaField("encounter_class", "STRING"),
                    #bigquery.SchemaField("encounter_status", "STRING"),
                    #bigquery.SchemaField("encounter_type_code", "STRING"),
                    #bigquery.SchemaField("encounter_type_display", "STRING"),
                    #bigquery.SchemaField("start_datetime", "TIMESTAMP"),
                    #bigquery.SchemaField("end_datetime", "TIMESTAMP"),
                    bigquery.SchemaField("load_timestamp", "TIMESTAMP", mode="REQUIRED")
                ]
            )

            records = transform_eob_bq(batch)
            #job = client.load_table_from_json(records, table_id, job_config=job_config)
            #job.result()
            logging.info(f"Loaded {len(records)} rows to {table_id}")

    except Exception as e:
        logging(f"Error in EOB's ETL: {e}")

if __name__ == "__main__":
    #print("main")
    etl_EOBs()



In [1]:
print("hello world")

hello world
