In [None]:
import psycopg2
from google.cloud import bigquery
import pandas as pd
from tqdm import tqdm
import datetime

# Postgres config
PG_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "FHIR_staging",
    "user": "postgres",
    "password": "new_password"
}

# BigQuery config
BQ_PROJECT = "your-gcp-project"
BQ_DATASET = "fhir_curated"
client = bigquery.Client(project=BQ_PROJECT)

# Helper: fetch staged data
def fetch_staged_data(table, batch_size=10000):
    conn = psycopg2.connect(**PG_CONFIG)
    cur = conn.cursor()
    cur.execute(f"SELECT * FROM fhir_staging.{table}")
    while True:
        rows = cur.fetchmany(batch_size)
        if not rows:
            break
        yield rows
    cur.close()
    conn.close()

# Helper: insert dataframe into BigQuery
def insert_to_bq(df, table_name):
    table_id = f"{BQ_PROJECT}.{BQ_DATASET}.{table_name}"
    job = client.load_table_from_dataframe(df, table_id)
    job.result()  # wait for completion

# Example: Transform & load Patients
def transform_patients(rows):
    records = []
    for r in rows:
        rid, resource = r[0], r[1]  # adjust index if needed
        records.append({
            "patient_id": rid,
            "first_name": resource.get("name", [{}])[0].get("given", [""])[0],
            "last_name": resource.get("name", [{}])[0].get("family", ""),
            "birth_date": resource.get("birthDate"),
            "gender": resource.get("gender"),
            "load_timestamp": datetime.datetime.utcnow()
        })
    return pd.DataFrame(records)

# Main ETL loop
def etl_patients():
    for batch in tqdm(fetch_staged_data("patients_fhir_raw")):
        df = transform_patients(batch)
        insert_to_bq(df, "patients")
