In [3]:
print("Hello World")

Hello World


In [4]:
#----DB config, fetch and insert
import psycopg2
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
#from tqdm import tqdm
from datetime import datetime, timezone
#import datetime  # module
#from datetime import datetime as dt  # class, aliased to avoid conflict
#from datetime import datetime as tz
#import datetime
import pprint
import dateutil.parser
import json
import logging
import traceback

# Path to your service account JSON key
#key_path = "/Users/toniventura/keys/bq_key.json" 
key_path = "C:\\Users\\tonim\\keys\\bq_key.json"

# Create credentials and BigQuery client
#credentials = service_account.Credentials.from_service_account_file(key_path)
credentials = service_account.Credentials.from_service_account_file(key_path)

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    #"database": "fhir",
    "database" : "FHIR_staging",
    #"user": "toniventura",
    "user": "postgres",
    #"password": "fhir_project"
    "password": "new_password"
}
#records =[]

# BigQuery config
#BQ_PROJECT = "your-gcp-project"
BQ_PROJECT = "fhir-synthea-data"
#BQ_DATASET = "fhir_curated"
BQ_DATASET = "fhir_curated"
#client = bigquery.Client(project=BQ_PROJECT)
client = bigquery.Client(project="fhir-synthea-data", credentials=credentials)
#client = bigquery.Client(project=BQ_PROJECT, credentials=credentials)
dataset_ref = bigquery.Dataset(f"{BQ_PROJECT}.{BQ_DATASET}")

logging.basicConfig(
    filename="etl_eobs.log",
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s"
)

# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    try:
        conn = psycopg2.connect(**PG_CONFIG)
        cur = conn.cursor()
        #cur.execute(f"SELECT * FROM fhir_staging.{table}")
        #cur.execute("SELECT * FROM fhir_staging.patients_fhir_raw LIMIT 5;")
        cur.execute(f"SELECT * FROM fhir_staging_sample.{table}")
        #cur.execute(f"SELECT * FROM fhir_staging_sample.{table};")
        while True:
            rows = cur.fetchmany(batch_size)
            logging.info(f"rows: {rows}")
            if not rows:
                break
            yield rows
        cur.close()
        conn.close()
    except Exception as e:
        logging.error(f"Postgres connection or query failed: {e}")
        


# Helper: insert dataframe into BigQuery
def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()  # wait for completion


def safe_bq_timestamp(dt):
    if not dt:
        return None
    return dt.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")






In [5]:
# Main ETL loop
def etl_EOBs():
    try:
         for batch in fetch_staged_data("explanationofbenefits_fhir_raw"):
            #table_id = "fhir-synthea-data.fhir_curated.eob"
            table_id = "fhir-synthea-data.fhir_curated_sample.eobs" 
            logging.info(f"Processing batch for table: {table_id}")    
            job_config = bigquery.LoadJobConfig(
                schema=[
                    bigquery.SchemaField("eob_id", "STRING", mode="REQUIRED"),
                    #bigquery.SchemaField("patient_id", "STRING"),
                    #bigquery.SchemaField("organization_id", "STRING"),
                    #bigquery.SchemaField("encounter_class", "STRING"),
                    #bigquery.SchemaField("encounter_status", "STRING"),
                    #bigquery.SchemaField("encounter_type_code", "STRING"),
                    #bigquery.SchemaField("encounter_type_display", "STRING"),
                    #bigquery.SchemaField("start_datetime", "TIMESTAMP"),
                    #bigquery.SchemaField("end_datetime", "TIMESTAMP"),
                    bigquery.SchemaField("load_timestamp", "TIMESTAMP", mode="REQUIRED")
                ]
            )
    except:
        logging.error(f"Error in EOB's ETL: {e}")
        logging.error(traceback.format_exc())
if __name__ == "__main__":
    logging.info("Starting EOB ETL...")
    #print("main")
    etl_EOBs()
    logging.info("Finished EOB ETL")