## Global Sensitivity and Uncertainty Analysis

The GSUA approach is based on the work of  A. Carmona-Cabrero and R. Muñoz-Carpena, University of Florida. The paper can be found  in JASS:  [(Carmona-Cabrero et al., 2024)](https://doi.org/10.18564/jasss.5174) 



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from SALib.sample import sobol
from ring_plot_funct import ring_plot

In [2]:
import json
import pandas as pd
from pathlib import Path

# Folder containing config files
CONFIG_DIR = Path("/blue/carpena/haasehelen/ifwaste/bash-scripts/configuration")

def extract_parameters(d, path=""):
    result = {}
    for key, value in d.items():
        current_path = f"{path}:{key}" if path else key

        if isinstance(value, dict):
            if "value" in value and "layer" in value:
                result[current_path] = {
                    "value": value.get("value", "NAN"),
                    "distribution":  value.get("distribution", "NAN"),
                    "bounds":  value.get("bounds", "NAN"),
                    "layer": value["layer"],
                    "dtype": value.get("dtype", "NAN"),
                    "length": value.get("length", "NAN"),
                    "options":value.get("options", []),
                    "decimals": value.get("decimals", "NAN")
                }
            elif "distribution" in value:
                result[current_path] = {
                    "value":  value.get("value", "NAN"),
                    "distribution": value["distribution"],
                    "bounds":  value.get("bounds", "NAN"),
                    "layer": value.get("layer", value["distribution"]),
                    "dtype": value.get("dtype", "NAN"),
                    "length": value.get("length", "NAN"),
                    "options":value.get("options", []),
                    "decimals": value.get("decimals", "NAN")
                }
            else:
                nested_result = extract_parameters(value, current_path)
                result.update(nested_result)
    return result

# Collect all JSON config files
all_json_files = list(CONFIG_DIR.glob("*.json"))
# Collect DataFrames
df_list = []
for json_file in all_json_files:
    with open(json_file) as f:
        config = json.load(f)
    flat_params = extract_parameters(config)
    df = pd.DataFrame.from_dict(flat_params, orient="index")
    df.index.name = "name"
    df.reset_index(inplace=True)
    df.replace("", "NAN", inplace=True)
    df_list.append(df)

# Combine all into one big DataFrame
df = pd.concat(df_list, ignore_index=True)


constants = df[df["value"] != "NAN"]

parameter = df[df["value"] == "NAN"]

mask = (parameter["distribution"] == "triang") | (parameter["distribution"] == "unif")
simple_distributions = parameter[mask ]

other_distributions = parameter[~(mask)]
other_distributions

Unnamed: 0,name,value,distribution,bounds,layer,dtype,length,options,decimals
10,Premium_retailer:Sales:high_stock_discount_int...,NAN,sales,NAN,neighborhood,NAN,NAN,[DISCOUNT10],NAN
13,Premium_retailer:Sales:seasonal_discount,NAN,sales,NAN,neighborhood,NAN,NAN,[DISCOUNT10],NAN
18,Premium_retailer:Sales:clearance_interval_1_di...,NAN,sales,NAN,neighborhood,NAN,NAN,[DISCOUNT10],NAN
24,Convenience_store:Sales:high_stock_discount_in...,NAN,sales,NAN,neighborhood,NAN,NAN,"[DISCOUNT20, DISCOUNT30, DISCOUNT40, DISCOUNT5...",NAN
27,Convenience_store:Sales:seasonal_discount,NAN,sales,NAN,neighborhood,NAN,NAN,"[DISCOUNT10, DISCOUNT20, DISCOUNT30, DISCOUNT4...",NAN
32,Convenience_store:Sales:clearance_interval_1_d...,NAN,sales,NAN,neighborhood,NAN,NAN,"[DISCOUNT10, DISCOUNT20, DISCOUNT30]",NAN
37,Neighborhood:nh_store_amounts,NAN,vector,"[0, 1]",neighborhood,int,3,[],NAN
94,Household:hh_shopping_frequency,NAN,subgroup,"{'1-2': 0.08, '3-6': 0.12, '7-10': 0.8}",household,int,NAN,[],NAN
96,Household:hh_pay_day_interval,NAN,subgroup,"{'14': 0.628, '30': 0.103, '7': 0.269}",household,int,NAN,[],NAN
107,Adult:adult_preference_vector,NAN,vector,"[0, 1]",household,float,7,[],2


In [3]:
constants

Unnamed: 0,name,value,distribution,bounds,layer,dtype,length,options,decimals
0,Simulation:runs,1,NAN,NAN,simulation,NAN,NAN,[],NAN
1,Simulation:total_days,100,NAN,NAN,simulation,NAN,NAN,[],NAN
2,Simulation:name,debug,NAN,NAN,simulation,NAN,NAN,[],NAN
3,Simulation:output_folder,data,NAN,NAN,simulation,NAN,NAN,[],NAN
4,Simulation:write_to_file_interval,1,NAN,NAN,simulation,NAN,NAN,[],NAN
...,...,...,...,...,...,...,...,...,...
149,Discount_retailer:Sales:clearance_interval_1_e...,3,NAN,NAN,simulation,NAN,NAN,[],NAN
150,Discount_retailer:Sales:clearance_interval_2_e...,,NAN,NAN,simulation,NAN,NAN,[],NAN
151,Discount_retailer:Sales:clearance_interval_3_e...,,NAN,NAN,simulation,NAN,NAN,[],NAN
153,Discount_retailer:Sales:clearance_interval_2_d...,,NAN,NAN,simulation,NAN,NAN,[],NAN


In [4]:
other_distributions

Unnamed: 0,name,value,distribution,bounds,layer,dtype,length,options,decimals
10,Premium_retailer:Sales:high_stock_discount_int...,NAN,sales,NAN,neighborhood,NAN,NAN,[DISCOUNT10],NAN
13,Premium_retailer:Sales:seasonal_discount,NAN,sales,NAN,neighborhood,NAN,NAN,[DISCOUNT10],NAN
18,Premium_retailer:Sales:clearance_interval_1_di...,NAN,sales,NAN,neighborhood,NAN,NAN,[DISCOUNT10],NAN
24,Convenience_store:Sales:high_stock_discount_in...,NAN,sales,NAN,neighborhood,NAN,NAN,"[DISCOUNT20, DISCOUNT30, DISCOUNT40, DISCOUNT5...",NAN
27,Convenience_store:Sales:seasonal_discount,NAN,sales,NAN,neighborhood,NAN,NAN,"[DISCOUNT10, DISCOUNT20, DISCOUNT30, DISCOUNT4...",NAN
32,Convenience_store:Sales:clearance_interval_1_d...,NAN,sales,NAN,neighborhood,NAN,NAN,"[DISCOUNT10, DISCOUNT20, DISCOUNT30]",NAN
37,Neighborhood:nh_store_amounts,NAN,vector,"[0, 1]",neighborhood,int,3,[],NAN
94,Household:hh_shopping_frequency,NAN,subgroup,"{'1-2': 0.08, '3-6': 0.12, '7-10': 0.8}",household,int,NAN,[],NAN
96,Household:hh_pay_day_interval,NAN,subgroup,"{'14': 0.628, '30': 0.103, '7': 0.269}",household,int,NAN,[],NAN
107,Adult:adult_preference_vector,NAN,vector,"[0, 1]",household,float,7,[],2


In [6]:
#convert other distributions to sobol-sampleable values
for _,row in other_distributions.iterrows(): 
    if row["distribution"] == "subgroup": 
        new_row = pd.DataFrame({"name":row["name"], "distribution":"unif", "bounds":[[0,1]], "dtype":"float",
                            "decimals":2, "layer": row["layer"], "value":"NAN", "options":"NAN"})
        simple_distributions = pd.concat([simple_distributions, new_row], ignore_index=True)
        
    if row["distribution"] == "vector": 
        for i in range(row["length"]): 
            new_row = pd.DataFrame({"name":row["name"]+"_"+str(i), "distribution":"unif", "bounds":[[0,1]], "dtype":row["dtype"],
                            "decimals":2, "layer": row["layer"], "value":"NAN", "options":"NAN"})
            simple_distributions = pd.concat([simple_distributions, new_row], ignore_index=True)
            
    if row["distribution"] == "sales": 
        new_row = pd.DataFrame({"name":row["name"], "distribution":"unif", "bounds":[[0,1]], "dtype":"float",
                            "decimals":2, "layer": row["layer"], "value":"NAN", "options":"NAN"})
        #sales werden dann von vorne nach hinten ausgewählt - nicht noch randomisiert - zack 
        simple_distributions = pd.concat([simple_distributions, new_row], ignore_index=True)        
    

In [7]:
simple_distributions

Unnamed: 0,name,value,distribution,bounds,layer,dtype,length,options,decimals
0,Premium_retailer:Sales:high_stock_interval_1,NAN,unif,"[1.3, 1.5]",neighborhood,float,NAN,[],2
1,Premium_retailer:Sales:seasonal_likelihood,NAN,unif,"[0, 0.05]",neighborhood,float,NAN,[],2
2,Convenience_store:Sales:high_stock_interval_1,NAN,unif,"[1, 1.5]",neighborhood,float,NAN,[],2
3,Convenience_store:Sales:seasonal_likelihood,NAN,unif,"[0, 0.25]",neighborhood,float,NAN,[],2
4,Neighborhood:Grid:travel_time_per_cell,NAN,triang,"[3, 7, 0.5]",neighborhood,float,NAN,[],1
...,...,...,...,...,...,...,...,...,...
94,Child:child_preference_vector_5,NAN,unif,"[0, 1]",household,float,,NAN,2
95,Child:child_preference_vector_6,NAN,unif,"[0, 1]",household,float,,NAN,2
96,Discount_retailer:Sales:high_stock_discount_in...,NAN,unif,"[0, 1]",neighborhood,float,,NAN,2
97,Discount_retailer:Sales:seasonal_discount,NAN,unif,"[0, 1]",neighborhood,float,,NAN,2


In [8]:
#sample other distributions by hand: subgroup
#import random

#def get_value_for_subgroup(bounds):
## Step 1: Extract ranges and probabilities
#    ranges = []
#    probs = []
#    for key, prob in bounds.items():
#        start, end = map(int, key.split('-'))
#        ranges.append((start, end))
#        probs.append(prob)
#    chosen_range = random.choices(ranges, weights=probs, k=1)[0]
#    sampled_value = random.randint(chosen_range[0], chosen_range[1])

#for row in other_distributions.rows: 
#    if row["distribution"] == "subgroup":
#        row["value"] = get_value_for_subgroup(row["bounds"])
#    elif row["distribution"] == "vector": 
#        if row["name"].contains["store_amounts"]: 
            

In [9]:
'''import random

def get_value_for_subgroup(bounds):
    ranges = []
    probs = []

    for key, prob in bounds.items():
        if '-' in key:
            start, end = map(int, key.split('-'))
        else:
            start = end = int(key)  # Single value case
        ranges.append((start, end))
        probs.append(prob)

    # Choose a range/value based on probability
    chosen_range = random.choices(ranges, weights=probs, k=1)[0]
    sampled_value = random.randint(chosen_range[0], chosen_range[1])
    
    return sampled_value
print(get_value_for_subgroup({'14': 0.628, '30': 0.103, '7': 0.269}))'''

"import random\n\ndef get_value_for_subgroup(bounds):\n    ranges = []\n    probs = []\n\n    for key, prob in bounds.items():\n        if '-' in key:\n            start, end = map(int, key.split('-'))\n        else:\n            start = end = int(key)  # Single value case\n        ranges.append((start, end))\n        probs.append(prob)\n\n    # Choose a range/value based on probability\n    chosen_range = random.choices(ranges, weights=probs, k=1)[0]\n    sampled_value = random.randint(chosen_range[0], chosen_range[1])\n    \n    return sampled_value\nprint(get_value_for_subgroup({'14': 0.628, '30': 0.103, '7': 0.269}))"

In [10]:
l = 4  # Sampling intensity
N = 10


In [11]:
problems = {}

for layer in simple_distributions["layer"].unique():
        names = simple_distributions["name"].to_list()
        distributions = simple_distributions["distribution"].to_list()
        bounds = simple_distributions["bounds"].to_list()
        problem = {'num_vars': len(names),
        'names': names,
        'bounds': bounds,
        'dists':distributions
        }
        param_values = pd.DataFrame(sobol.sample(problem, l, calc_second_order=False), columns=names)  # calc_second_order=True for second-order interactions. 
        print("GSA sample matrix size: {}".format(param_values.shape))


GSA sample matrix size: (404, 99)
GSA sample matrix size: (404, 99)


In [12]:
param_values

Unnamed: 0,Premium_retailer:Sales:high_stock_interval_1,Premium_retailer:Sales:seasonal_likelihood,Convenience_store:Sales:high_stock_interval_1,Convenience_store:Sales:seasonal_likelihood,Neighborhood:Grid:travel_time_per_cell,Neighborhood:BasketCurator:increment_likelihood,Neighborhood:BasketCurator:max_items_quickshop,Neighborhood:Food:FGMeat:expiration,Neighborhood:Food:FGMeat:impulse_buy_likelihood,Neighborhood:Food:FGDairy:expiration,...,Child:child_preference_vector_0,Child:child_preference_vector_1,Child:child_preference_vector_2,Child:child_preference_vector_3,Child:child_preference_vector_4,Child:child_preference_vector_5,Child:child_preference_vector_6,Discount_retailer:Sales:high_stock_discount_interval_1,Discount_retailer:Sales:seasonal_discount,Discount_retailer:Sales:clearance_interval_1_discount
0,1.453692,0.013536,1.011261,0.059033,4.392566,0.943491,5.256637,5.553667,0.396931,9.557176,...,0.053811,0.695255,0.291455,0.600928,0.023446,0.110840,0.761810,0.554469,0.366080,0.500096
1,1.311823,0.013536,1.011261,0.059033,4.392566,0.943491,5.256637,5.553667,0.396931,9.557176,...,0.053811,0.695255,0.291455,0.600928,0.023446,0.110840,0.761810,0.554469,0.366080,0.500096
2,1.453692,0.015112,1.011261,0.059033,4.392566,0.943491,5.256637,5.553667,0.396931,9.557176,...,0.053811,0.695255,0.291455,0.600928,0.023446,0.110840,0.761810,0.554469,0.366080,0.500096
3,1.453692,0.013536,1.023733,0.059033,4.392566,0.943491,5.256637,5.553667,0.396931,9.557176,...,0.053811,0.695255,0.291455,0.600928,0.023446,0.110840,0.761810,0.554469,0.366080,0.500096
4,1.453692,0.013536,1.011261,0.027259,4.392566,0.943491,5.256637,5.553667,0.396931,9.557176,...,0.053811,0.695255,0.291455,0.600928,0.023446,0.110840,0.761810,0.554469,0.366080,0.500096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399,1.402689,0.043948,1.340484,0.207384,4.473596,0.553472,4.806584,4.380087,0.566567,20.668627,...,0.300447,0.101941,0.222342,0.022912,0.681660,0.363306,0.169553,0.778634,0.975433,0.224844
400,1.402689,0.043948,1.340484,0.207384,4.473596,0.553472,4.806584,4.380087,0.566567,20.668627,...,0.300447,0.101941,0.222342,0.022912,0.681660,0.363306,0.335326,0.008597,0.975433,0.224844
401,1.402689,0.043948,1.340484,0.207384,4.473596,0.553472,4.806584,4.380087,0.566567,20.668627,...,0.300447,0.101941,0.222342,0.022912,0.681660,0.363306,0.335326,0.778634,0.805258,0.224844
402,1.402689,0.043948,1.340484,0.207384,4.473596,0.553472,4.806584,4.380087,0.566567,20.668627,...,0.300447,0.101941,0.222342,0.022912,0.681660,0.363306,0.335326,0.778634,0.975433,0.542821


In [13]:
#convert to correct dtype with correct rounding

for param_name in param_values.columns: 
    row = simple_distributions[simple_distributions["name"] == param_name]
    if row["dtype"].values[0] == "int":
        param_values[param_name] = param_values[param_name].round()
    elif row["dtype"].values[0] == "float": 
        param_values[param_name] = param_values[param_name].astype("float").round(int(row["decimals"].values[0]))
        

In [14]:
param_values

Unnamed: 0,Premium_retailer:Sales:high_stock_interval_1,Premium_retailer:Sales:seasonal_likelihood,Convenience_store:Sales:high_stock_interval_1,Convenience_store:Sales:seasonal_likelihood,Neighborhood:Grid:travel_time_per_cell,Neighborhood:BasketCurator:increment_likelihood,Neighborhood:BasketCurator:max_items_quickshop,Neighborhood:Food:FGMeat:expiration,Neighborhood:Food:FGMeat:impulse_buy_likelihood,Neighborhood:Food:FGDairy:expiration,...,Child:child_preference_vector_0,Child:child_preference_vector_1,Child:child_preference_vector_2,Child:child_preference_vector_3,Child:child_preference_vector_4,Child:child_preference_vector_5,Child:child_preference_vector_6,Discount_retailer:Sales:high_stock_discount_interval_1,Discount_retailer:Sales:seasonal_discount,Discount_retailer:Sales:clearance_interval_1_discount
0,1.45,0.01,1.01,0.06,4.4,0.94,5.0,6.0,0.40,10.0,...,0.05,0.70,0.29,0.60,0.02,0.11,0.76,0.55,0.37,0.50
1,1.31,0.01,1.01,0.06,4.4,0.94,5.0,6.0,0.40,10.0,...,0.05,0.70,0.29,0.60,0.02,0.11,0.76,0.55,0.37,0.50
2,1.45,0.02,1.01,0.06,4.4,0.94,5.0,6.0,0.40,10.0,...,0.05,0.70,0.29,0.60,0.02,0.11,0.76,0.55,0.37,0.50
3,1.45,0.01,1.02,0.06,4.4,0.94,5.0,6.0,0.40,10.0,...,0.05,0.70,0.29,0.60,0.02,0.11,0.76,0.55,0.37,0.50
4,1.45,0.01,1.01,0.03,4.4,0.94,5.0,6.0,0.40,10.0,...,0.05,0.70,0.29,0.60,0.02,0.11,0.76,0.55,0.37,0.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399,1.40,0.04,1.34,0.21,4.5,0.55,5.0,4.0,0.57,21.0,...,0.30,0.10,0.22,0.02,0.68,0.36,0.17,0.78,0.98,0.22
400,1.40,0.04,1.34,0.21,4.5,0.55,5.0,4.0,0.57,21.0,...,0.30,0.10,0.22,0.02,0.68,0.36,0.34,0.01,0.98,0.22
401,1.40,0.04,1.34,0.21,4.5,0.55,5.0,4.0,0.57,21.0,...,0.30,0.10,0.22,0.02,0.68,0.36,0.34,0.78,0.81,0.22
402,1.40,0.04,1.34,0.21,4.5,0.55,5.0,4.0,0.57,21.0,...,0.30,0.10,0.22,0.02,0.68,0.36,0.34,0.78,0.98,0.54


In [15]:

#for HPC added "sens_class" line
param_values = pd.concat([pd.DataFrame(['sens_class'] * param_values.shape[0]), param_values], axis=1)
param_values.head()
        
#filter out unperformable sets (no stores in simulation)
print(len(param_values))
param_values = param_values[~((param_values['Neighborhood:nh_store_amounts_0'] + param_values['Neighborhood:nh_store_amounts_1'] + param_values['Neighborhood:nh_store_amounts_2']) == 0)]
print(len(param_values))

404
305


In [None]:

sample_matrix_realizations = pd.DataFrame(np.repeat(param_values.to_numpy(), repeats=N, axis=0))
print("GSA sample matrix size: {}".format(sample_matrix_realizations.shape))
#write the sample matrix to csv to configure the model

sample_matrix_realizations.to_csv('{}_ifwaste_sample.txt'.format(layer), header=False, index=False, sep='\t')
df["name"].to_csv('{}_ifwaste_sample_header.txt'.format(layer), index=False, header=False)


GSA sample matrix size: (4040, 100)


Generating the GSA sample matrix

In [None]:
param_values = pd.DataFrame(sobol.sample(problem, l, calc_second_order=False), columns=names)  # calc_second_order=True for second-order interactions. 
print("GSA sample matrix size: {}".format(param_values.shape))

#for HPC added "sens_class" line
sample_matrix = pd.concat([pd.DataFrame(['sens_class'] * param_values.shape[0]), param_values], axis=1)
sample_matrix.head()

sample_matrix_realizations = pd.DataFrame(np.repeat(sample_matrix.to_numpy(), repeats=N, axis=0))
print("GSA sample matrix size: {}".format(sample_matrix_realizations.shape))
#write the sample matrix to csv to configure the model

#sample_matrix_realizations.to_csv('{}_layer.txt'.format(layer), header=False, index=False, sep='\t')


GSA sample matrix size: (404, 99)
GSA sample matrix size: (4040, 100)
