## Check for NaN

In [None]:
import json
import math

def load_json_file(path):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

def find_nan_paths(obj, path=""):
    paths_with_nan = []

    if isinstance(obj, dict):
        for key, value in obj.items():
            new_path = f"{path}.{key}" if path else key
            if isinstance(value, (dict, list)):
                paths_with_nan.extend(find_nan_paths(value, new_path))
            elif isinstance(value, float) and math.isnan(value):
                # Exclude the final key with NaN, keep the path before it
                parent_path = path if path else key
                if parent_path not in paths_with_nan:
                    paths_with_nan.append(parent_path)
    elif isinstance(obj, list):
        for index, item in enumerate(obj):
            new_path = f"{path}[{index}]"
            if isinstance(item, (dict, list)):
                paths_with_nan.extend(find_nan_paths(item, new_path))
            elif isinstance(item, float) and math.isnan(item):
                # Exclude the final key with NaN, keep the path before it
                paths_with_nan.append(path)

    return paths_with_nan



# Convert "NaN" strings to actual NaN if needed
def convert_to_nan(obj):
    if isinstance(obj, dict):
        return {k: convert_to_nan(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_nan(i) for i in obj]
    elif isinstance(obj, str) and obj.lower() == "nan":
        return float('nan')
    return obj

file_dir = "test_compare_base_disagg/test"
file_names = [
     "mseg_res_com_emm_allfuel_tech.json",
     "mseg_res_com_emm_allfuel_eu.json",
     "mseg_res_com_emm_elec_tech.json",
     "mseg_res_com_emm_elec_eu.json",
     "mseg_res_com_state_allfuel_tech.json",
     "mseg_res_com_state_allfuel_eu.json",
     "mseg_res_com_state_elec_tech.json",
     "mseg_res_com_state_elec_eu.json"
]
for file_name in file_names:
    file_path = f"./{file_dir}/{file_name}"
    json_data = load_json_file(file_path)
    
    json_data = convert_to_nan(json_data)
    
    # Find and print paths
    nan_paths = find_nan_paths(json_data)
    print(f"\n{file_name}: Paths leading to NaN values (excluding final key):")
    if nan_paths:
        for p in sorted(set(nan_paths)):
            print(p)
    else:
        print("No NaN values found.")


## Compare 2 json files

In [None]:
import json

def load_json_file(path):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

def compare_json(a, b, path=""):
    differences = []

    if isinstance(a, dict) and isinstance(b, dict):
        all_keys = set(a.keys()) | set(b.keys())
        for key in all_keys:
            new_path = f"{path}.{key}" if path else key
            if key not in a:
                differences.append(f"{new_path} - missing in first file")
            elif key not in b:
                differences.append(f"{new_path} - missing in second file")
            else:
                differences.extend(compare_json(a[key], b[key], new_path))
    elif isinstance(a, list) and isinstance(b, list):
        min_len = min(len(a), len(b))
        for i in range(min_len):
            new_path = f"{path}[{i}]"
            differences.extend(compare_json(a[i], b[i], new_path))
        if len(a) > len(b):
            differences.append(f"{path} - extra elements in first file")
        elif len(b) > len(a):
            differences.append(f"{path} - extra elements in second file")
    else:
        if a != b:
            differences.append(f"{path} - value mismatch")

    return differences

# Example usage
file_dir = "test_compare_base_disagg/test"
file1 = "mseg_res_com_state_allfuel_tech.json"
file2 = "mseg_res_com_state_allfuel_tech_1.json"

json1 = load_json_file(f"./{file_dir}/{file1}")
json2 = load_json_file(f"./{file_dir}/{file2}")

diffs = compare_json(json1, json2)

if not diffs:
    print("Two files are identical.")
else:
    print("Differences found:")
    for diff in diffs:
        print(diff)
