In [0]:
import json
import os
def replace_keys(data, key_mapping):
    """Recursively replace keys in a nested JSON structure using key_mapping."""
    if isinstance(data, dict):
        new_data = {}
        for key, value in data.items():
            new_key = key_mapping.get(key, key)
            if isinstance(value, dict):
                new_data[new_key] = replace_keys(value, key_mapping)
            elif isinstance(value, list):
                new_data[new_key] = [replace_keys(item, key_mapping) if isinstance(item, dict) else item for item in value]
            else:
                new_data[new_key] = value
        return new_data
    return data

def remove_file_path(data, keys_names):
    """
    Recursively remove file paths for specified keys by keeping only the basename.
    """
    if isinstance(data, dict):
        for key, value in data.items():
            if key in keys_names and isinstance(value, str):
                data[key] = os.path.basename(value)
            elif isinstance(value, dict):
                remove_file_path(value, keys_names)
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        remove_file_path(item, keys_names)
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, dict):
                remove_file_path(item, keys_names)
    return data

def prefix_folder_path(data, folder_path,table_name):
    """
    Recursively prefix folder paths for specified keys in a nested JSON structure.
    """
    if isinstance(data, dict):
        for key, value in data.items():
            if key in folder_path and isinstance(value, str):
                #print(data)
                data[key] = os.path.join(folder_path[key],table_name)
            elif isinstance(value, dict):
                prefix_folder_path(value, folder_path,table_name)
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        prefix_folder_path(item, folder_path,table_name)
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, dict):
                prefix_folder_path(item, folder_path,table_name)
    return data

def correct_onboard_json(volume_path):
    combined_json = []
    keys_to_modify = ["source_path_dev", "bronze_table_path_dev", "bronze_data_quality_expectations_json_dev", "bronze_data_quality_expectations_json_prod", "bronze_table_path_prod", "bronze_table_path_dev", "bronze_quarantine_table_path_prod", "bronze_quarantine_table_path_dev","silver_table_path_dev"]

    key_mapping = {"bronze_database_dev": f"bronze_database_{dbutils.widgets.get('environment')}", "silver_database_dev": f"silver_database_{dbutils.widgets.get('environment')}", "silver_transformation_json_dev": f"silver_transformation_json_{dbutils.widgets.get('environment')}","source_path_dev": f"source_path_{dbutils.widgets.get('environment')}","bronze_database_quarantine_dev": f"bronze_database_quarantine_{dbutils.widgets.get('environment')}","silver_table_path_dev": f"silver_table_path_{dbutils.widgets.get('environment')}"}
    
    folder_path = {f"source_path_{dbutils.widgets.get('environment')}": dbutils.widgets.get("source_path_dev"),
               "bronze_table_path_dev":dbutils.widgets.get("bronze_table_path_dev"), 
               #"source_schema_path":dbutils.widgets.get("source_schema_path"), 
               f"silver_table_path_{dbutils.widgets.get('environment')}":dbutils.widgets.get("silver_table_path_dev"),
               "filestore":dbutils.widgets.get("filestore"),
               "DDL_file_path":dbutils.widgets.get("DDL_file_path"),
               f"bronze_database_{dbutils.widgets.get('environment')}":dbutils.widgets.get("bronze_database"),
               "bronze_data_quality_expectations_json_dev":dbutils.widgets.get("bronze_data_quality_expectations_json_dev"),
               "bronze_data_quality_expectations_json_prod": dbutils.widgets.get("bronze_data_quality_expectations_json_prod"),
               "bronze_table_path_prod":dbutils.widgets.get("bronze_table_path_prod"),
               "bronze_quarantine_table_path_prod": dbutils.widgets.get("bronze_quarantine_table_path_prod"),
               "bronze_quarantine_table_path_dev": dbutils.widgets.get("bronze_quarantine_table_path_dev")
               #"silver_transformation_json_dev" : dbutils.widgets.get("silver_transformation_json_dev")               
               }


    for file_name in os.listdir(volume_path):
        if file_name.endswith('.json'):
            with open(os.path.join(volume_path,file_name)) as json_file:
                data = json.load(json_file)
                # data = [{key_mapping.get(k, k): v for k, v in item.items()} for item in data]
                data = [replace_keys(item, key_mapping) for item in data]
                corrected_data = remove_file_path(data[0],keys_to_modify)
                table_name = os.path.basename(corrected_data['source_details'][f'source_path_{dbutils.widgets.get("environment")}'])
                prefix_path = prefix_folder_path(corrected_data,folder_path,table_name)
                combined_json.append(prefix_path)
    
    return combined_json


In [0]:
combined_json = correct_onboard_json(os.path.join(dbutils.widgets.get("volume_path"), dbutils.widgets.get("bronze_database")))

with open(os.path.join(dbutils.widgets.get("volume_path"),dbutils.widgets.get("combined_json_file_name")), 'w') as outfile:
    json.dump(combined_json, outfile)

    