In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
from itertools import product
import glob
from time import sleep

In [20]:
from datetime import datetime

In [34]:
def load_data(metrics_paths, ipcc_regions_path, lat_lon_gdf_path, population_data_path):
    """
    Load the necessary data files and return them.

    Parameters:
        metrics_paths (list): Paths to the metrics dictionary files.
        ipcc_regions_path (str): Path to the IPCC regions GeoJSON file.
        lat_lon_gdf_path (str): Path to the latitude-longitude GeoDataFrame saved as GeoJSON.

    Returns:
        metrics_dicts (list): List of metrics dictionaries.
        ipcc_regions (GeoDataFrame): IPCC regions as a GeoDataFrame.
        lat_lon_gdf (GeoDataFrame): Latitude-longitude grid as a GeoDataFrame.
        :param population_data_path: filepath to population data
    """
    # Load metrics dictionaries
    # metrics_dicts = [np.load(path, allow_pickle=True).item() for path in metrics_paths]
    metrics_dicts = {path.split('f1_')[1].split('_p')[0]: np.load(path, allow_pickle=True).item() for path in
                    metrics_paths}
    #metrics_dicts = {path.split('f1_')[1].split('_p')[0]: {k:np.random.random((180, 360)) for (k, v) in np.load(path, allow_pickle=True).item().items()} for path in
    #                 metrics_paths}

    # Load IPCC regions
    ipcc_regions = gpd.read_file(ipcc_regions_path)

    # Load latitude-longitude grid
    lat_lon_gdf = gpd.read_file(lat_lon_gdf_path)

    # Load population data
    population_var_name = 'Population Count, v4.11 (2000, 2005, 2010, 2015, 2020): 1 degree'
    population_data_xr = xr.open_dataset(population_data_path)
    population_array = population_data_xr[population_var_name][0, :, :].values
    population_array = np.flipud(population_array)  # Flip the array vertically

    # Add population data to lat_lon_gdf
    lat_lon_gdf['population'] = population_array.flatten()

    return metrics_dicts, ipcc_regions, lat_lon_gdf

In [35]:
def assign_regions_to_grid(lat_lon_gdf, ipcc_regions):
    for index, region in ipcc_regions.iterrows():
        mask = lat_lon_gdf.within(region.geometry)
        lat_lon_gdf.loc[mask, 'Region'] = region['Name']
        lat_lon_gdf.loc[mask, 'Acronym'] = region['Acronym']
    return lat_lon_gdf

In [128]:
def get_rid_of_NaNs(lat_lon_gdf):
    # get rid of NaNs
    land_based_gdf = lat_lon_gdf.dropna(subset=['population']).copy()
    return land_based_gdf

In [36]:
def select_scenario_array(scenario_percentiles, metrics_dicts):
    """
    Select the arrays corresponding to the specified 'elbow' percentiles for each metric.

    Parameters: metrics_dict (list): List of metrics dictionaries. scenario_percentiles (list): List of core scenario
    percentiles for each metric (this will be the 'elbows' normally).

    Returns:
        selected_arrays (dict): Dictionary containing selected arrays for each metric based on elbow percentiles.
    """
    selected_arrays = {}
    for i, metric in enumerate(metrics_dicts):
        percentile = scenario_percentiles[i]
        selected_arrays[metric] = metrics_dicts[metric].get(percentile, None)
    return selected_arrays

In [37]:
def calculate_cell_failure_ratio(selected_arrays):
    """
    Calculate the failure ratio for each cell based on the selected arrays.

    Parameters: selected_arrays (dict): Dictionary containing selected arrays for each metric based on scenario
    (elbow) percentiles.

    Returns:
        failure_ratios (numpy.ndarray): Array containing the failure ratio for each cell.
    """
    # Stack the selected arrays along a new axis to form a 3D array
    stacked_arrays = np.stack(list(selected_arrays.values()), axis=-1)

    # Calculate the failure ratio for each cell (sum along the new axis, divided by number of metrics)
    # failure_ratios = np.nansum(stacked_arrays, axis=-1) / len(selected_arrays)
    failure_ratios = np.sum(stacked_arrays, axis=-1) / len(selected_arrays)

    return failure_ratios

In [67]:
def calculate_weighted_regional_average(failure_ratios, lat_lon_gdf, scenario_percentiles):
    """
    Calculate the weighted average for each IPCC region based on the failure ratios.

    Parameters:
        failure_ratios (numpy.ndarray): Array containing the failure ratio for each cell.
        lat_lon_gdf (GeoDataFrame): Latitude-longitude grid with assigned IPCC regions and population weights.

    Returns:
        regional_average_df (DataFrame): DataFrame containing the weighted average for each IPCC region.
    """
    # Flatten the failure_ratios array and add it to the GeoDataFrame
    lat_lon_gdf['failure_ratios'] = failure_ratios.flatten()
    # get rid of NaNs
    land_based_gdf = lat_lon_gdf.dropna(subset=['population']).copy()

    # Filter failure_ratios to only include corresponding land-based cells
    failure_ratios_filtered = failure_ratios.flatten()[land_based_gdf.index]

    # Add failure_ratios to land_based_gdf
    land_based_gdf['failure_ratios'] = failure_ratios_filtered

    # Group by region
    regional_grouped = land_based_gdf.groupby('Acronym')

    # Calculate weighted average of failure ratios, normalised by total population in the region
    # regional_sum = regional_grouped.apply(lambda group: np.sum(group['failure_ratios'] * group['population'])) # if none-population-weighted input data is used
    regional_sum = regional_grouped['failure_ratios'].sum()
    #logging.info('regional_sum')
    regional_population = regional_grouped['population'].sum()
    regional_weighted_average = regional_sum / regional_population
    #logging.info('regional_weighted_average')
    print('regional_weighted_average:\n', regional_weighted_average)
    print('\nINDEXED\n', regional_weighted_average.to_numpy())
    print(len(regional_weighted_average.to_list()))

    # Calculate weighted standard deviation of failure ratios for each region
    def weighted_std(group):
        diff = group['failure_ratios'] - regional_weighted_average.loc[group.name]
        weighted_diff_sq = (diff ** 2) * group['population']
        # Count the number of non-zero and non-NaN weights
        valid_weights = group['population'][(group['population'] != 0) & ~np.isnan(group['population'])]
        M = len(valid_weights)
        if M > 1:
            return np.sqrt(weighted_diff_sq.sum() / ((M - 1) / M * group['population'].sum()))
        else:
            return np.nan  # Return NaN if M is 1 or less

    regional_std = regional_grouped.apply(weighted_std)
    print('regional_std', regional_std)
    
    # Create an empty DataFrame with MultiIndex structure
    columns = pd.MultiIndex.from_product([regional_weighted_average.index, ['Mean', 'Std']])
    regional_stats_df = pd.DataFrame(columns=columns)

    # Fill in the DataFrame
    for region in regional_weighted_average.index:
        regional_stats_df.loc[0, (region, 'Mean')] = regional_weighted_average[region]
        regional_stats_df.loc[0, (region, 'Std')] = regional_std[region]

    # Set the index name based on elbow_percentiles
    ### as string...
    # percentile_str = ','.join(map(str, elbow_percentiles))
    # regional_stats_df.index = [percentile_str]
    ### as tuple...
    regional_stats_df.index = [tuple(scenario_percentiles)]

    return regional_stats_df

In [129]:
### returns individual pd.series of the means and stds rather than constucting a line of dataframe 

def calculate_weighted_regional_average_nondf(failure_ratios, lat_lon_gdf, land_based_gdf, scenario_percentiles):
    """
    Calculate the weighted average for each IPCC region based on the failure ratios.

    Parameters:
        failure_ratios (numpy.ndarray): Array containing the failure ratio for each cell.
        lat_lon_gdf (GeoDataFrame): Latitude-longitude grid with assigned IPCC regions and population weights.

    Returns:
        regional_average_df (DataFrame): DataFrame containing the weighted average for each IPCC region.
    """
    # Filter failure_ratios to only include corresponding land-based cells
    failure_ratios_filtered = failure_ratios.flatten()[land_based_gdf.index]

    # Add failure_ratios to land_based_gdf
    land_based_gdf['failure_ratios'] = failure_ratios_filtered

    # Group by region
    regional_grouped = land_based_gdf.groupby('Acronym')

    # Calculate weighted average of failure ratios, normalised by total population in the region
    # regional_sum = regional_grouped.apply(lambda group: np.sum(group['failure_ratios'] * group['population'])) # if none-population-weighted input data is used
    regional_sum = regional_grouped['failure_ratios'].sum()
    
    #logging.info('regional_sum')
    regional_population = regional_grouped['population'].sum()
    regional_weighted_average = regional_sum / regional_population
    #logging.info('regional_weighted_average')

    # Calculate weighted standard deviation of failure ratios for each region
    def weighted_std(group):
        diff = group['failure_ratios'] - regional_weighted_average.loc[group.name]
        weighted_diff_sq = (diff ** 2) * group['population']
        # Count the number of non-zero and non-NaN weights
        valid_weights = group['population'][(group['population'] != 0) & ~np.isnan(group['population'])]
        M = len(valid_weights)
        if M > 1:
            return np.sqrt(weighted_diff_sq.sum() / ((M - 1) / M * group['population'].sum()))
        else:
            return np.nan  # Return NaN if M is 1 or less

    regional_std = regional_grouped.apply(weighted_std)
    
    return regional_weighted_average, regional_std

In [138]:
### returns numpy arrays rather than pd.series

def calculate_weighted_regional_average_nondf(failure_ratios, lat_lon_gdf, land_based_gdf, scenario_percentiles):
    """
    Calculate the weighted average for each IPCC region based on the failure ratios.

    Parameters:
        failure_ratios (numpy.ndarray): Array containing the failure ratio for each cell.
        lat_lon_gdf (GeoDataFrame): Latitude-longitude grid with assigned IPCC regions and population weights.

    Returns:
        regional_average_df (DataFrame): DataFrame containing the weighted average for each IPCC region.
    """
    # Filter failure_ratios to only include corresponding land-based cells
    failure_ratios_filtered = failure_ratios.flatten()[land_based_gdf.index]

    # Add failure_ratios to land_based_gdf
    land_based_gdf['failure_ratios'] = failure_ratios_filtered

    # Group by region
    regional_grouped = land_based_gdf.groupby('Acronym')

    # Calculate weighted average of failure ratios, normalised by total population in the region
    # regional_sum = regional_grouped.apply(lambda group: np.sum(group['failure_ratios'] * group['population'])) # if none-population-weighted input data is used
    regional_sum = regional_grouped['failure_ratios'].sum()
    
    #logging.info('regional_sum')
    regional_population = regional_grouped['population'].sum()
    regional_weighted_average = regional_sum / regional_population
    #logging.info('regional_weighted_average')

    # Calculate weighted standard deviation of failure ratios for each region
    def weighted_std(group):
        diff = group['failure_ratios'] - regional_weighted_average.loc[group.name]
        weighted_diff_sq = (diff ** 2) * group['population']
        # Count the number of non-zero and non-NaN weights
        valid_weights = group['population'][(group['population'] != 0) & ~np.isnan(group['population'])]
        M = len(valid_weights)
        if M > 1:
            return np.sqrt(weighted_diff_sq.sum() / ((M - 1) / M * group['population'].sum()))
        else:
            return np.nan  # Return NaN if M is 1 or less

    regional_std = regional_grouped.apply(weighted_std)
    
    return regional_weighted_average.to_numpy(), regional_std.to_numpy()

In [39]:
def hadi_calculate_results_for_thresholds(id):
    #logging.info(f"Running on process ID {os.getpid()}")
    # Select the elbow arrays based on the current combination of percentiles
    frames = []
    for combo in shared_all_combinations[(id*shared_bundle)-shared_bundle:(id*shared_bundle)]:
        selected_scenario_arrays = select_scenario_array(scenario_percentiles=combo,
                                                        metrics_dicts=shared_metrics_dicts)

        # Calculate the failure ratios
        failure_ratios = calculate_cell_failure_ratio(selected_arrays=selected_scenario_arrays)

        # Calculate the regional statistics
        regional_stats_df = calculate_weighted_regional_average(failure_ratios=failure_ratios,
                                                                lat_lon_gdf=shared_lat_lon_gdf,
                                                                scenario_percentiles=combo)

        # Set the row index based on the current combination of percentiles
        regional_stats_df.index = [tuple(combo)]
        frames.append(regional_stats_df)

    #batch job save
    results = pd.concat(frames)
    #results.to_csv(f'{shared_output_dir}partial_results_{(id*shared_bundle)-shared_bundle}_to_{(id*shared_bundle)-1}.csv')
    results.to_pickle(f'{shared_output_dir}partial_results_{(id*shared_bundle)-shared_bundle}_to_{(id*shared_bundle)-1}.pkl')
    #logging.info(f'{shared_output_dir}partial_results_{(id*shared_bundle)-shared_bundle}_to_{(id*shared_bundle)-1}.csv  ... SAVED!')
    #logging.info(f'{shared_output_dir}partial_results_{(id*shared_bundle)-shared_bundle}_to_{(id*shared_bundle)-1}.pkl  ... SAVED!')

    frames = []   # Clear the list to save memory
    results = []  # Clear the list to save memory
    return "job done"

In [26]:
root = "C:/Users/ci1twx/DATA/CDS_indices/OUTPUT/population_weighted/"
output_dir = 'C:/Users/ci1twx/DATA/CDS_indices/OUTPUT/test_output/'

metrics_paths = glob.glob(f'{root}*')
ipcc_regions_path = 'C:/Users/ci1twx/DATA/geojson/IPCC-WGI-reference-regions-v4.geojson'
lat_lon_gdf_path = 'C:/Users/ci1twx/DATA/geojson/lat_lon_gdf.geojson'
population_data_path = 'C:/Users/ci1twx/DATA/Population Data/gpw_v4_population_count_rev11_1_deg.nc'

In [28]:
metrics_dicts, ipcc_regions, lat_lon_gdf = load_data(metrics_paths, ipcc_regions_path, lat_lon_gdf_path,
                                                     population_data_path)

In [130]:
lat_lon_gdf = assign_regions_to_grid(lat_lon_gdf, ipcc_regions)

In [131]:
land_based_gdf = get_rid_of_NaNs(lat_lon_gdf)

In [18]:
percentiles_range = list(range(5, 16, 10))
print(f'percentiles_range:  \b{percentiles_range[:5]}')

percentiles_range:  [5, 15]


In [19]:
# Generate all combinations of 7 metrics
all_combinations = list(product(percentiles_range, repeat=7))
print(f'combinations:  {all_combinations[:5]}')
print(len(all_combinations))

combinations:  [(5, 5, 5, 5, 5, 5, 5), (5, 5, 5, 5, 5, 5, 15), (5, 5, 5, 5, 5, 15, 5), (5, 5, 5, 5, 5, 15, 15), (5, 5, 5, 5, 15, 5, 5)]
128


In [64]:
unique_regions = sorted([x for x in lat_lon_gdf['Acronym'].unique() if isinstance(x, str)])


In [66]:
len(unique_regions)

58

In [132]:
now_ST = datetime.now()
current_time = now_ST.strftime("%H:%M:%S")
print(f'PROCESS START TIME: {current_time}')

for i, combo in enumerate(all_combinations):
    now_st = datetime.now()
    current_time = now_st.strftime("%H:%M:%S")
    print(f'{combo} START TIME: {current_time}')
    
    frames = []
    selected_scenario_arrays = select_scenario_array(scenario_percentiles=combo, metrics_dicts=metrics_dicts)
    failure_ratios = calculate_cell_failure_ratio(selected_arrays=selected_scenario_arrays)
    regional_stats_df = calculate_weighted_regional_average(failure_ratios=failure_ratios,
                                                                lat_lon_gdf=lat_lon_gdf,
                                                                scenario_percentiles=combo)
    # Set the row index based on the current combination of percentiles
    regional_stats_df.index = [tuple(combo)]
    frames.append(regional_stats_df)

    now_end = datetime.now()
    current_time = now_end.strftime("%H:%M:%S")
    #print(f'{combo} END TIME: {current_time}')
    elapsed = now_end - now_st
    print(f'completed in:  {elapsed}')
    
results = pd.concat(frames)
#results.to_csv(f'{shared_output_dir}partial_results_{(id*shared_bundle)-shared_bundle}_to_{(id*shared_bundle)-1}.csv')
results.to_pickle(f'{output_dir}test.pkl')
print(f'{output_dir}test.pkl  ... SAVED!')

frames = []   # Clear the list to save memory
results = []  # Clear the list to save memory
    
now_END = datetime.now()
current_time = now_END.strftime("%H:%M:%S")
total = now_END - now_ST
print(f'PROCESS END TIME: {current_time}')
print(f'TOTAL TIME:  {total}')

PROCESS START TIME: 12:22:15
(5, 5, 5, 5, 5, 5, 5) START TIME: 12:22:15


TypeError: calculate_weighted_regional_average() got an unexpected keyword argument 'land_based_gdf'

In [84]:
def create_df(means, stds, combos):
    # Ensure that the lengths of the lists are equal
    assert len(combos) == len(means) == len(stds), "List lengths must be equal"
    
    # List to hold the DataFrames for each scenario
    dfs = []
    
    for combo, mean_series, std_series in zip(combos, means, stds):
        # Create DataFrames for means and stds and concatenate them horizontally
        mean_df = pd.DataFrame({(region, 'mean'): mean_series for region in mean_series.index})
        std_df = pd.DataFrame({(region, 'std'): std_series for region in std_series.index})
        df = pd.concat([mean_df, std_df], axis=1)
        
        # Set the index to the scenario tuple
        df.index = [scenario] * len(df)
        
        dfs.append(df)
    
    # Concatenate all scenario DataFrames vertically
    final_df = pd.concat(dfs)
    
    return final_df

In [88]:
## not ready yet

def create_multi_index_df(scenarios, means_list, stds_list):
    assert len(scenarios) == len(means_list) == len(stds_list), "List lengths must be equal"
    
    dfs = []  # List to hold the DataFrames for each scenario
    
    for scenario, means, stds in zip(scenarios, means_list, stds_list):
        print(f"Type of means: {type(means)}")  # Print the type of the variable for debugging
        print(f"Type of stds: {type(stds)}")  # Print the type of the variable for debugging
        
        # Create DataFrames for means and stds and concatenate them horizontally
        mean_df = pd.DataFrame({(region, 'mean'): [means[region]] for region in means.index})
        std_df = pd.DataFrame({(region, 'std'): [stds[region]] for region in stds.index})
        df = pd.concat([mean_df, std_df], axis=1)
        
        # Set the index to the scenario tuple
        df.index = [scenario]
        
        dfs.append(df)
    
    # Concatenate all scenario DataFrames vertically
    final_df = pd.concat(dfs)
    
    return final_df


"""---------------------------"""
    # Create an empty DataFrame with MultiIndex structure
    columns = pd.MultiIndex.from_product([means[0].index, ['Mean', 'Std']])
    regional_stats_df = pd.DataFrame(columns=columns)

    # Fill in the DataFrame
    for region in regional_weighted_average.index:
        regional_stats_df.loc[0, (region, 'Mean')] = regional_weighted_average[region]
        regional_stats_df.loc[0, (region, 'Std')] = regional_std[region]

    # Set the index name based on elbow_percentiles
    ### as string...
    # percentile_str = ','.join(map(str, elbow_percentiles))
    # regional_stats_df.index = [percentile_str]
    ### as tuple...
    regional_stats_df.index = [tuple(scenario_percentiles)]

In [121]:
#save as numpy array rather than dataframe:

def create_structured_array(scenarios, mean_series_list, std_series_list, region_acronyms):
    # Define a structured dtype with a field for the mean and std for each region acronym
    dtype = np.dtype([(f'{region}_mean', 'f8') for region in region_acronyms] +
                     [(f'{region}_std', 'f8') for region in region_acronyms])
    
    # Create and fill the structured array
    num_scenarios = len(scenarios)
    structured_array = np.empty(num_scenarios, dtype=dtype)
    
    for i, (mean_series, std_series) in enumerate(zip(mean_series_list, std_series_list)):
        for j, region in enumerate(region_acronyms):
            structured_array[i][f'{region}_mean'] = mean_series[j]
            structured_array[i][f'{region}_std'] = std_series[j]
            
    return structured_array

In [141]:
region_acronyms = np.load(f'{output_dir}acronym_list.npy', allow_pickle=True)

In [143]:
now_ST = datetime.now()
current_time = now_ST.strftime("%H:%M:%S")
print(f'PROCESS START TIME: {current_time}')

means = []
stds  = []
for i, combo in enumerate(all_combinations):
    now_st = datetime.now()
    current_time = now_st.strftime("%H:%M:%S")
    print(f'{combo} START TIME: {current_time}')
    
    st=datetime.now() 
    selected_scenario_arrays = select_scenario_array(scenario_percentiles=combo, metrics_dicts=metrics_dicts)
    et=datetime.now()
    print(f'{select_scenario_array} takes {et-st}')

    st=datetime.now()     
    failure_ratios = calculate_cell_failure_ratio(selected_arrays=selected_scenario_arrays)
    et=datetime.now()
    print(f'{calculate_cell_failure_ratio} takes {et-st}')
    
    st=datetime.now()     
    regional_mean, regional_std = calculate_weighted_regional_average_nondf(failure_ratios=failure_ratios,
                                                                lat_lon_gdf=lat_lon_gdf,
                                                                land_based_gdf = land_based_gdf,
                                                                scenario_percentiles=combo)
    et=datetime.now()
    print(f'{calculate_weighted_regional_average_nondf} takes {et-st}')
    
    means.append(regional_mean)
    stds.append(regional_std)
    
    # Set the row index based on the current combination of percentiles
    #regional_stats_df.index = [tuple(combo)]
    #frames.append(regional_stats_df)

    now_end = datetime.now()
    current_time = now_end.strftime("%H:%M:%S")
    #print(f'{combo} END TIME: {current_time}')
    elapsed = now_end - now_st
    print(f'completed in:  {elapsed}')

st=datetime.now()
structured_array = create_structured_array(all_combinations, means, stds, region_acronyms)
et=datetime.now()
print(f'{create_structured_array} takes {et-st}')

    #df1 = create_df(all_combinations, means, stds)
    
#results = pd.concat(frames)
#results.to_csv(f'{shared_output_dir}partial_results_{(id*shared_bundle)-shared_bundle}_to_{(id*shared_bundle)-1}.csv')
#df1.to_pickle(f'{output_dir}test_v2.pkl')
#print(f'{output_dir}test.pkl  ... SAVED!')

#frames = []   # Clear the list to save memory
#results = []  # Clear the list to save memory
    
now_END = datetime.now()
current_time = now_END.strftime("%H:%M:%S")
total = now_END - now_ST
print(f'PROCESS END TIME: {current_time}')
print(f'TOTAL TIME:  {total}')

PROCESS START TIME: 12:28:13
(5, 5, 5, 5, 5, 5, 5) START TIME: 12:28:13
<function select_scenario_array at 0x0000020D26CA3DC0> takes 0:00:00
<function calculate_cell_failure_ratio at 0x0000020D2133EDC0> takes 0:00:00.004987
<function calculate_weighted_regional_average_nondf at 0x0000020D42AAD790> takes 0:00:00.054888
completed in:  0:00:00.059875
(5, 5, 5, 5, 5, 5, 15) START TIME: 12:28:13
<function select_scenario_array at 0x0000020D26CA3DC0> takes 0:00:00
<function calculate_cell_failure_ratio at 0x0000020D2133EDC0> takes 0:00:00.003007
<function calculate_weighted_regional_average_nondf at 0x0000020D42AAD790> takes 0:00:00.049886
completed in:  0:00:00.052893
(5, 5, 5, 5, 5, 15, 5) START TIME: 12:28:13
<function select_scenario_array at 0x0000020D26CA3DC0> takes 0:00:00
<function calculate_cell_failure_ratio at 0x0000020D2133EDC0> takes 0:00:00.003006
<function calculate_weighted_regional_average_nondf at 0x0000020D42AAD790> takes 0:00:00.049854
completed in:  0:00:00.052860
(5, 5,

In [147]:
structured_array.shape

(128,)

In [149]:
structured_array['ARO_mean']

array([0.00253758, 0.00253758, 0.00253758, 0.00253758, 0.00484531,
       0.00484531, 0.00484531, 0.00484531, 0.00253758, 0.00253758,
       0.00253758, 0.00253758, 0.00484531, 0.00484531, 0.00484531,
       0.00484531, 0.00255596, 0.00255596, 0.00255596, 0.00255596,
       0.00486369, 0.00486369, 0.00486369, 0.00486369, 0.00255596,
       0.00255596, 0.00255596, 0.00255596, 0.00486369, 0.00486369,
       0.00486369, 0.00486369, 0.00253758, 0.00253758, 0.00253758,
       0.00253758, 0.00484531, 0.00484531, 0.00484531, 0.00484531,
       0.00253758, 0.00253758, 0.00253758, 0.00253758, 0.00484531,
       0.00484531, 0.00484531, 0.00484531, 0.00255596, 0.00255596,
       0.00255596, 0.00255596, 0.00486369, 0.00486369, 0.00486369,
       0.00486369, 0.00255596, 0.00255596, 0.00255596, 0.00255596,
       0.00486369, 0.00486369, 0.00486369, 0.00486369, 0.00272734,
       0.00272734, 0.00272734, 0.00272734, 0.00503507, 0.00503507,
       0.00503507, 0.00503507, 0.00272734, 0.00272734, 0.00272

In [None]:
# Extract the region acronyms from the first mean series in the list
region_acronyms = mean_series_list[0].astype(str)  # Convert to string to handle NaN values

# Creating the structured array using the loaded data
structured_array = create_structured_array(scenarios, mean_series_list, std_series_list, region_acronyms)

# Return the shape and dtype of the created structured array as well as a sample of the data
structured_array.shape, structured_array.dtype, structured_array[:2]

In [108]:
np.save(f'{output_dir}test_means.npy', means)

In [109]:
np.save(f'{output_dir}test_stds.npy', stds)

In [110]:
np.save(f'{output_dir}test_combinations.npy', all_combinations)

In [112]:
output_dir

'C:/Users/ci1twx/DATA/CDS_indices/OUTPUT/test_output/'

In [119]:
np.save(f'{output_dir}acronym_list.npy', means[0].index.to_list())

In [None]:
    
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print(f'START TIME: {current_time}')
    print(f'num_workers = {num_workers}')
    # create and configure the process pool
    with Pool(processes=num_workers, initializer=init_worker, initargs=(metrics_dicts, ipcc_regions, lat_lon_gdf, all_combinations, chunk_size)) as pool:
        # issue tasks into the process pool
        #pool.map(hadi_calculate_results_for_thresholds, range(100))
        pool.map(hadi_calculate_results_for_thresholds, filtered_ids)

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print(f'END TIME: {current_time}')