In [4]:
#--------------------------09/10/2025-------------------------#
#--------------------------Patients---------------------------#
# runs 218 min 21.5 sec
import os
import io
import json
import glob
import logging
import psycopg2
from psycopg2.extras import Json, execute_values
import orjson  # add this at the top of your file
import csv
import time
import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
#from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import tempfile
import shutil
import collections
from collections import Counter
from collections import defaultdict

def logger(name, log_file, level=logging.DEBUG):
    """Create a dedicated logger for a parser method."""
    logger = logging.getLogger(name)
    logger.setLevel(level)
    
    # Avoid adding multiple handlers if logger already exists
    if not logger.handlers:
        file_handler = logging.FileHandler(log_file, mode='w', encoding='utf-8')
        file_handler.setLevel(level)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
    
    return logger


# -----------------------------
# DB config
# -----------------------------
DB_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "FHIR_staging",
    "user": "postgres",
    "password": "new_password"
}

# -----------------------------
# FHIR staging map
# -----------------------------

# FHIR resource → staging table mapping
FHIR_STAGING_MAP = {
    "Practitioner": ("practitioners_fhir_raw", "practitioner_id"),
    "PractitionerRole": ("practitioner_roles_fhir_raw", "practitioner_role_id"),
    "Patient": ("patients_fhir_raw", "patients_id"),
    "Encounter": ("encounters_fhir_raw", "encounter_id"),
    "Observation": ("observations_fhir_raw", "observation_id"),
    "Condition": ("conditions_fhir_raw", "condition_id"),
    "Claim": ("claims_fhir_raw", "claim_id"),
    "DiagnosticReport": ("diagnostics_fhir_raw", "diagnostic_id"),
    "DocumentReference": ("document_references_fhir_raw", "document_reference_id"),
    "ExplanationOfBenefit": ("explanationofbenefits_fhir_raw", "explanationofbenefit_id"),
    "CarePlan": ("careplans_fhir_raw", "careplan_id"),
    "Immunization": ("immunizations_fhir_raw", "immunization_id"),
    "Device": ("devices_fhir_raw", "device_id"),
    "SupplyDelivery": ("supplydeliveries_fhir_raw", "supplydelivery_id"),
    "Medication": ("medications_fhir_raw", "medication_id"),
    "MedicationRequest": ("medicationrequests_fhir_raw", "medicationrequest_id"),
    "MedicationAdministration": ("medicationadministrations_fhir_raw", "medicationadministration_id"),
    "ImagingStudy": ("imagingstudies_fhir_raw", "imagingstudy_id"),
    "Procedure": ("procedures_fhir_raw", "procedure_id"),
    "Organization": ("organizations_fhir_raw", "organization_id"),
    "Provenance":("provenances_fhir_raw", "provenance_id"),
    "CareTeam":("careteams_fhir_raw", "careteam_id"),
    "AllergyIntolerance":("allergyintolerances_fhir_raw", "allergyintolerance_id")
}

global_counters = {k: 0 for k in FHIR_STAGING_MAP.keys()}

#-----------------------------
# Chunk file helper
#-----------------------------

def chunk_files(file_list, chunk_size):
    """Split list of files into chunks of given size."""
    for i in range(0, len(file_list), chunk_size):
        yield file_list[i:i + chunk_size]

#---------------------------------------------------
# Process multiples in one thread
#----------------------------------------------------
def process_file_chunk(file_chunk):
    """Process a chunk of files using a single DB connection per thread."""
    chunk_counters = {k: 0 for k in FHIR_STAGING_MAP.keys()}
    # Open one connection for this worker
    conn = psycopg2.connect(**DB_CONFIG)
    try:
        for file_path in file_chunk:
            try:
                result = process_file(file_path,  conn=conn)
                if result:
                    for k, v in result.items():
                        chunk_counters[k] += v
            except Exception as e:
                logger.error(f"Error processing file {file_path}: {e}")
                conn.rollback()  # rollback on file-level errors
    finally:
        conn.close()
    return chunk_counters


#import io

def insert_batch_copy(conn, table, id_field, rows):
    if not rows:
        return 0

    buffer = io.StringIO()
    writer = csv.writer(buffer, quoting=csv.QUOTE_MINIMAL)

    for rid, resource_json in rows:
        #json_text = orjson.dumps(resource_json).decode('utf-8')
        #writer.writerow([rid, json_text])
        writer.writerow([rid, orjson.dumps(resource_json).decode()])
    buffer.seek(0)

    #sql = f"COPY fhir_staging.{table} ({id_field}, resource) FROM STDIN WITH (FORMAT text, DELIMITER E'\t')"
    #sql = f"COPY fhir_staging.{table} ({id_field}, resource) FROM STDIN WITH CSV"
    sql = f"COPY fhir_staging_sample.{table} ({id_field}, resource) FROM STDIN WITH CSV"

    try:
        with conn.cursor() as cur:
            cur.copy_expert(sql, buffer)
        conn.commit()
        return len(rows)
    except Exception as e:
        conn.rollback()
        logger.error(f"COPY error for {table}: {e}")
        return 0



# -----------------------------
# Insert batch helper
# -----------------------------
def insert_batch(conn, table, id_field, rows):
    if not rows:
        return 0
    #sql = f"""
    #    INSERT INTO fhir_staging.{table} ({id_field}, resource)
    #    VALUES %s
    #    ON CONFLICT ({id_field}) DO NOTHING
    #"""

    sql = f"""
        INSERT INTO fhir_staging_sample.{table} ({id_field}, resource)
        VALUES %s
        ON CONFLICT ({id_field}) DO NOTHING
    """

    try:
        with conn.cursor() as cur:
            execute_values(cur, sql, rows)
        #conn.commit()
        return len(rows)
    except Exception as e:
        conn.rollback()
        logger.error(f"Error inserting batch into {table}: {e}")
        return 0

# -----------------------------
# Process one JSON file
# -----------------------------
def process_file(file_path,  conn=None):
    all_rows = []
    inserted_summary = {k:0 for k in FHIR_STAGING_MAP.keys()}

    close_conn = False
    if conn is None:
        conn = psycopg2.connect(**DB_CONFIG)
        close_conn = True
    try:
        #conn = psycopg2.connect(**DB_CONFIG)
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        resources = data.get("entry", [])
        batch_map = {}

        resource_counter = collections.Counter()
        batch_map = defaultdict(list)
        for entry in resources:
            resource = entry.get("resource")
            if not resource:
                continue

            rtype = resource.get("resourceType")
            rid = resource.get("id")


            if not rtype or not rid:
                continue
            
            if rtype not in FHIR_STAGING_MAP:
                logger.warning(f"Skipping unsupported resourceType: {rtype}")
                continue

            table, id_field = FHIR_STAGING_MAP[rtype]
            all_rows.append((table, id_field, rid, resource))

        for table, id_field, rid, resource_json in all_rows:
            batch_map[(table, id_field)].append((rid, resource_json))

        # Insert per table
        for (table, id_field), rows in batch_map.items():
            inserted = insert_batch_copy(conn, table, id_field, rows)
            rtype = next((k for k, v in FHIR_STAGING_MAP.items() if v[0] == table), table)
            inserted_summary[rtype] += inserted

        conn.commit()  # commit once per file

        logger.info(f"Processed {os.path.basename(file_path)}: {inserted_summary}")
        return inserted_summary

    except Exception as e:
        logger.error(f"Failed to process {file_path}: {e}")
        if conn:
            conn.rollback()
        return None
    finally:
        if close_conn:
            conn.close()

def process_file_for_batch(file_path):
    print(f"Processing {file_path}")
    batch_map = defaultdict(list)
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        #batch_map = {}    
        for entry in data.get("entry", []):
            resource = entry.get("resource")
            if not resource:
                continue
            rtype = resource.get("resourceType")
            rid = resource.get("id")
            if not rtype or not rid or rtype not in FHIR_STAGING_MAP:
                continue
            table, id_field = FHIR_STAGING_MAP[rtype]
            key = (table, id_field)
            #batch_map.setdefault(key, []).append((rid, resource))
            batch_map[(table, id_field)].append((rid, resource))
    except Exception as e:
        print(f"Failed to parse {file_path}: {e}")
        return {}
    #return batch_map
    return dict(batch_map)

# -----------------------------
# Folder processing with threads
# -----------------------------
CHECKPOINT_FILE = r"C:\Users\tonim\Downloads\output\fhir\processed_files.json"

def load_checkpoint():
    """Load the set of processed files safely."""
    if os.path.exists(CHECKPOINT_FILE):
        try:
            with open(CHECKPOINT_FILE, "r", encoding="utf-8") as f:
                data = json.load(f)
                if isinstance(data, list):
                    return set(data)
                else:
                    # Corrupted format: ignore and start fresh
                    logger.warning(f"{CHECKPOINT_FILE} is not a list. Starting fresh.")
                    return set()
        except json.JSONDecodeError:
            # Corrupted JSON: ignore and start fresh
            logger.warning(f"{CHECKPOINT_FILE} is corrupted. Starting fresh.")
            return set()
    return set()

def save_checkpoint(processed_files):
    """Save the set of processed files safely."""
    temp_file = tempfile.NamedTemporaryFile(delete=False, mode="w", encoding="utf-8")
    try:
        json.dump(list(processed_files), temp_file, indent=2)
        temp_file.close()
        # Replace old checkpoint atomically
        shutil.move(temp_file.name, CHECKPOINT_FILE)
    except Exception as e:
        logger.error(f"Failed to save checkpoint: {e}")
        if os.path.exists(temp_file.name):
            os.remove(temp_file.name)

#def process_folder(folder_path, max_workers=4, files_per_worker=50):
def process_folder(folder_path, max_workers=8, files_per_worker=50, checkpoint_interval=5):
    files = [os.path.abspath(f) for f in glob.glob(os.path.join(folder_path, "*.json"))]
    if not files:
        logger.info("No files found.")
        return

    processed_files = set(os.path.abspath(f) for f in load_checkpoint())
    remaining_files = [f for f in files if f not in processed_files]

    logger.info(f"Starting processing {len(remaining_files)} files...")

    file_chunks = list(chunk_files(remaining_files, files_per_worker))

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_file_chunk, chunk): chunk for chunk in file_chunks}

        for future in tqdm(as_completed(futures), total=len(file_chunks)):
            chunk = futures[future]
            try:
                res = future.result()
                if res:
                    for k, v in res.items():
                        global_counters[k] += v

                    for f in chunk:
                        processed_files.add(f)
                    save_checkpoint(processed_files)
            except Exception as e:
                logger.error(f"Error processing chunk {chunk}: {e}")

    logger.info("Folder processing completed.")
    logger.info("All files processed. Totals by resourceType:")
    for rtype, count in global_counters.items():
        logger.info(f"{rtype}: {count}")

    print("Totals by resourceType:")
    for rtype, count in global_counters.items():
        print(f"{rtype}: {count}")

# -----------------------------
# Updated process_folder
# -----------------------------
'''def process_folder(folder_path, max_workers=1):
    
    files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".json")]
    
    if not files:
        print("No files found.")
        return

    processed_files = set(os.path.abspath(f) for f in load_checkpoint())
    remaining_files = [os.path.abspath(f) for f in files if os.path.abspath(f) not in processed_files]

    #logger.info(f"Starting processing {len(remaining_files)} files...")

    all_batches = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        
        futures = {executor.submit(process_file_for_batch, f): f for f in files}
        
        for future in as_completed(futures):
            try: 
                batch_map = future.result()
                if batch_map:
                    all_batches.append(batch_map)
            except Exception as e:
                print(f"Error processing a file: {e}")
            processed_files.add(futures[future])
            save_checkpoint(processed_files)

    # Insert batches in main thread using your insert_batch_copy
    conn = psycopg2.connect(**DB_CONFIG)
    try:
        global_counters = {k: 0 for k in FHIR_STAGING_MAP.keys()}
        for batch_map in all_batches:
            for (table, id_field), rows in batch_map.items():
               
                inserted = insert_batch_copy(conn, table, id_field, rows)
                rtype = next((k for k, v in FHIR_STAGING_MAP.items() if v[0] == table), table)
                global_counters[rtype] += inserted
        #conn.commit()
    finally:
        conn.close()

    
    print("Totals by resourceType:")
    for rtype, count in global_counters.items():
        print(f"{rtype}: {count}")'''

# -----------------------------
# Main
# -----------------------------

def main():
    #folder = r"C:\Users\tonim\Downloads\output\fhir\Patients"
    #process_folder(folder, max_workers=1)
    folder = r"C:\Users\tonim\Downloads\output\fhir\Batch_1\metadata\Patients"
    process_folder(folder, max_workers=8, files_per_worker=50, checkpoint_interval=5)

if __name__ == "__main__":
    logger = logger(
        "staging_test", 
        "C:\\Users\\tonim\\Downloads\\output\\fhir\\staging_logger.log"
    )
    #folder = r"C:\Users\tonim\Downloads\output\fhir\Test_Files"
    #C:\Users\tonim\Downloads\output\fhir\Test_Files
    #process_folder(folder, max_workers=8)
    main()


100%|██████████| 117/117 [16:28<00:00,  8.45s/it]

Totals by resourceType:
Practitioner: 0
PractitionerRole: 0
Patient: 5834
Encounter: 350515
Observation: 3065994
Condition: 214665
Claim: 653713
DiagnosticReport: 696976
DocumentReference: 350515
ExplanationOfBenefit: 653713
CarePlan: 19438
Immunization: 83349
Device: 33816
SupplyDelivery: 154592
Medication: 106672
MedicationRequest: 303198
MedicationAdministration: 106672
ImagingStudy: 27895
Procedure: 967105
Organization: 0
Provenance: 5834
CareTeam: 19438
AllergyIntolerance: 5163





In [6]:
#-------09/10/2025--------------------------#
#       Organizations-----------------------#
# 4.9 sec to run
import os
import glob
import json
import logging
import psycopg2
from datetime import datetime
from psycopg2.extras import execute_batch
import pandas as pd

# ----------------------------
# Logger setup
# ----------------------------
#staging_logger = logging.getLogger("staging_logger")
#logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
def logger(name, log_file, level=logging.DEBUG):
    logger = logging.getLogger(name)
    logger.setLevel(level)
    if not logger.handlers:
        fh = logging.FileHandler(log_file, 'w', 'utf-8')
        fh.setLevel(level)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        logger.addHandler(fh)
    return logger

staging_logger = logger(
        "staging_test", 
        "C:\\Users\\tonim\\Downloads\\output\\fhir\\staging_logger.log"
)
#staging_logger = logging.getLogger("staging")

def logger(name, log_file, level=logging.DEBUG):
    logger = logging.getLogger(name)
    logger.setLevel(level)
    if not logger.handlers:
        fh = logging.FileHandler(log_file, 'w', 'utf-8')
        fh.setLevel(level)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        logger.addHandler(fh)
    return logger


# ----------------------------
# DB Connection
# ----------------------------
def get_db_connection():
    return psycopg2.connect(
        host="localhost",
        dbname="FHIR_staging",
        user="postgres",
        password="new_password",
        port=5432
    )

# ----------------------------
# Batch insert
# ----------------------------
#df = pd.DataFrame()
rows = []
id_file_map = pd.DataFrame()
def insert_organization_batch(batch, file_path):
    if not batch:
        return

    conn = get_db_connection()
    cur = conn.cursor()

    #sql = """
    #    INSERT INTO fhir_staging.organizations_fhir_raw (organization_id, resource)
    #    VALUES (%s, %s)
    #    ON CONFLICT (organization_id) DO NOTHING
    #"""

    sql = """
        INSERT INTO fhir_staging_sample.organizations_fhir_raw (organization_id, resource)
        VALUES (%s, %s)
        ON CONFLICT (organization_id) DO NOTHING
    """

    '''sql = f"""
        INSERT INTO fhir_staging.{table} ({id_field}, resource)
        VALUES %s
        ON CONFLICT ({id_field}) DO NOTHING
    """'''
    
    values = [(res.get("id"), json.dumps(res)) for res in batch]
    for resource in batch:
        resource_id = resource.get("id")
        if resource_id: 
            rows.append({"file": os.path.basename(file_path), "id": resource_id})
            staging_logger.info(f"resource.getid: {resource_id}")
    

    try:
        execute_batch(cur, sql, values, page_size=100)
        conn.commit()
        staging_logger.info(f"Inserted {len(values)} organization resources into DB")
    except Exception as e:
        conn.rollback()
        staging_logger.error(f"DB insert failed: {e}")
    finally:
        cur.close()
        conn.close()

# ----------------------------
# Process a single hospital file
# ----------------------------
def process_hospital_file(file_path, batch_size=100):
    count = 0
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        if "entry" not in data:
            staging_logger.warning(f"No entries in {file_path}")
            return 0

        batch = []
        for entry in data["entry"]:
            resource = entry.get("resource")
            if not resource:
                continue

            batch.append(resource)
            count += 1

            if len(batch) >= batch_size:
                insert_organization_batch(batch, file_path)
                batch = []

        # Insert any leftover batch
        if batch:
            insert_organization_batch(batch, file_path)

        staging_logger.info(f"Processed {count} resources from {file_path}")
        return count

    except json.JSONDecodeError as e:
        staging_logger.error(f"Malformed JSON in {file_path}: {e}")
        return 0
    except Exception as e:
        staging_logger.exception(f"Error processing file {file_path}")
        return 0

# ----------------------------
# Sequential loader for all Hospital files
# ----------------------------
def load_hospital_files_sequential(hospital_folder):
    files = glob.glob(os.path.join(hospital_folder, "*.json"))
    if not files:
        staging_logger.warning(f"No Hospital JSON files found in {hospital_folder}")
        return

    total_resources = 0
    for file_path in files:
        count = process_hospital_file(file_path, batch_size=100)
        total_resources += count

    staging_logger.info(f"Finished processing Hospital files. Total resources: {total_resources}")

# ----------------------------
# Main execution
# ----------------------------
if __name__ == "__main__":
    fhir_folder = r"C:\Users\tonim\Downloads\output\fhir"
    hospital_folder = os.path.join(fhir_folder, "Hospital")
    load_hospital_files_sequential(hospital_folder)
    df = pd.DataFrame(rows)
    df[["file", "id"]].to_csv(r"C:\Users\tonim\Downloads\output\fhir\org_id_1_file.csv", index=False)
    id_counts = df.groupby("id").size().reset_index(name="count")
    id_counts.to_csv(r"C:\Users\tonim\Downloads\output\fhir\count_ids_1_file.csv")
    id_file_map = df.groupby("id")["file"].agg(list).reset_index()
    id_file_map.to_csv(r"C:\Users\tonim\Downloads\output\fhir\total_ids_1_file.csv")
    
    #file = r"C:\Users\tonim\Downloads\output\fhir\Hospital\hospitalInformation1755716738658.json"
    #load_hospital_files_sequential(file)

In [7]:
#---------------09/10/2025--------------------------#
#---------------Practitioners-----------------------#
# .9 sec to run
import os
import glob
import json
import logging
import psycopg2
from datetime import datetime
from psycopg2.extras import execute_batch
from psycopg2.extras import Json, execute_values
import pandas as pd
from collections import defaultdict

# ----------------------------
# Logger setup
# ----------------------------
#staging_logger = logging.getLogger("staging_logger")
#logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
def logger(name, log_file, level=logging.DEBUG):
    logger = logging.getLogger(name)
    logger.setLevel(level)
    if not logger.handlers:
        fh = logging.FileHandler(log_file, 'w', 'utf-8')
        fh.setLevel(level)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        logger.addHandler(fh)
    return logger

staging_logger = logger(
        "staging_test", 
        "C:\\Users\\tonim\\Downloads\\output\\fhir\\staging_logger.log"
)
#staging_logger = logging.getLogger("staging")

def logger(name, log_file, level=logging.DEBUG):
    logger = logging.getLogger(name)
    logger.setLevel(level)
    if not logger.handlers:
        fh = logging.FileHandler(log_file, 'w', 'utf-8')
        fh.setLevel(level)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        logger.addHandler(fh)
    return logger

# FHIR resource → staging table mapping
FHIR_STAGING_PRACT_MAP = {
    "Practitioner": ("practitioners_fhir_raw", "practitioner_id"),
    "PractitionerRole": ("practitioner_roles_fhir_raw", "practitioner_role_id")
}
# -----------------------------
# DB config
# -----------------------------
DB_CONFIG = {
    "host": "localhost",
    "port": 5432,
    "database": "FHIR_staging",
    "user": "postgres",
    "password": "new_password"
}

# ----------------------------
# DB Connection
# ----------------------------
def get_db_connection():
    return psycopg2.connect(
        host="localhost",
        dbname="FHIR_staging",
        user="postgres",
        password="new_password",
        port=5432
    )

# ----------------------------
# Batch insert
# ----------------------------
#df = pd.DataFrame()
rows = []
id_file_map = pd.DataFrame()

def insert_practitioner_batch(conn, table, id_field, rows):
    if not rows:
        return 0

    #sql = f"""
    #    INSERT INTO fhir_staging.{table} ({id_field}, resource)
    #    VALUES %s
    #    ON CONFLICT ({id_field}) DO NOTHING
    #"""
    sql = f"""
        INSERT INTO fhir_staging_sample.{table} ({id_field}, resource)
        VALUES %s
        ON CONFLICT ({id_field}) DO NOTHING
    """
    try:
        with conn.cursor() as cur:
            execute_values(cur, sql, rows)
        return len(rows)
    except Exception as e:
        conn.rollback()
        staging_logger.error(f"Error inserting batch into {table}: {e}")
        return 0
records =[]
df = pd.DataFrame()
# ----------------------------
# Process a single hospital file
# ----------------------------
def process_practitioner_file(file_path):
    count = 0
    batch_map = defaultdict(list)

    try:
        conn = psycopg2.connect(**DB_CONFIG)
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        if "entry" not in data:
            staging_logger.warning(f"No entries in {file_path}")
            return 0
        
        for entry in data["entry"]:
            resource = entry.get("resource")
            if not resource:
                continue

            rtype = resource.get("resourceType")
            rid = resource.get("id")
            records.append({
                "resource_type" : rtype,
                "id" : rid   
            })
            

            if not rtype or not rid:
                continue

            if rtype not in FHIR_STAGING_PRACT_MAP:
                staging_logger.warning(f"Skipping unsupported resourceType: {rtype}")
                continue

            table, id_field = FHIR_STAGING_PRACT_MAP[rtype]
            batch_map[(table, id_field)].append((rid, Json(resource)))
            count += 1

        # Insert per table
        for (table, id_field), rows in batch_map.items():
            inserted = insert_practitioner_batch(conn, table, id_field, rows)
            staging_logger.info(f"Inserted {inserted} into {table}")

        conn.commit()
        conn.close()

        staging_logger.info(f"Processed {count} resources from {file_path}")
        return count

    except json.JSONDecodeError as e:
        staging_logger.error(f"Malformed JSON in {file_path}: {e}")
        return 0
    except Exception as e:
        staging_logger.error(f"Error processing file {file_path}: {e}")
        return 0


# ----------------------------
# Sequential loader for all Hospital files
# ----------------------------
def load_practioner_files_sequential(practitioner_folder):
    files = glob.glob(os.path.join(practitioner_folder, "*.json"))
    if not files:
        staging_logger.warning(f"No Hospital JSON files found in {practitioner_folder}")
        return

    total_resources = 0
    for file_path in files:
        count = process_practitioner_file(file_path)
        total_resources += count

    staging_logger.info(f"Finished processing Practioner files. Total resources: {total_resources}")

# ----------------------------
# Main execution
# ----------------------------
if __name__ == "__main__":
    fhir_folder = r"C:\Users\tonim\Downloads\output\fhir"
    practitioner_folder = os.path.join(fhir_folder, "Practitioner")
    load_practioner_files_sequential(practitioner_folder)
    df= pd.DataFrame(records)
    df.to_csv(r"C:\Users\tonim\Downloads\output\fhir\pract2_file.csv", index=False)
    counts_df = df.groupby("resource_type")["id"].nunique().reset_index(name="unique_count")
    counts_df.to_csv(r"C:\Users\tonim\Downloads\output\fhir\count_Pract_role2_file.csv")
    #id_counts.to_csv(r"C:\Users\tonim\Downloads\output\fhir\count_ids_file.csv")
    #id_file_map = df.groupby("id")["file"].agg(list).reset_index()
    #id_file_map.to_csv(r"C:\Users\tonim\Downloads\output\fhir\total_ids_file.csv")
    #file = r"C:\Users\tonim\Downloads\output\fhir\Hospital\hospitalInformation1755716738658.json"
    #load_hospital_files_sequential(file)