In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from SALib.sample import saltelli
from SALib.analyze import sobol
from ring_plot_funct import ring_plot
REALIZATIONS = 16

In [34]:
import pandas as pd
import os

def load_scenario_data(scenarios: list) -> pd.DataFrame:
    """
    Loads 'aggregated_outputs.csv' files from a list of scenarios into a single DataFrame.

    Args:
        scenarios (list): A list of scenario names (e.g., ['01_scenario0_no_promotions', '02_scenario1_promotions']).

    Returns:
        pd.DataFrame: A DataFrame with combined data and a unique identifier column.
    """
    all_data = []

    for scenario_dir in scenarios:
        scenario_id = scenario_dir.split('_')[1][-1]
        
        # Walk through the directory to find 'aggregated_outputs.csv' files
        for root, dirs, files in os.walk(scenario_dir):
            if 'aggregated_outputs.csv' in files:
                run_dir = os.path.basename(root)
                if run_dir.startswith('run_'):
                    run_id = run_dir.split('_')[1]
                    file_path = os.path.join(root, 'aggregated_outputs.csv')
                    
                    try:
                        df = pd.read_csv(file_path)
                        df['household'] = df['household'].astype(int)
                        
                        #df['id'] = f"{scenario_id}_{run_id}_" + df['household'].astype(str)
                        df["scenario"] = scenario_id
                        df["run"] = run_id

                        all_data.append(df)
                    except Exception as e:
                        print(f"Could not load {file_path}: {e}")
                        
    if not all_data:
        print("No data found. Check your file paths and scenario list.")
        return pd.DataFrame()
        
    return pd.concat(all_data, ignore_index=True)

In [35]:
PATH = "/home/haasehelen/haasehelen/ifwaste/output/"
#scenarios = [PATH+'01_scenario0_no_promotions', PATH+'01_scenario1_bogos_only', PATH+'01_scenario2_sales_only', PATH+"01_scenario3_both"]
scenarios = [PATH+'01_scenario0_no_promotions'] 
outputs = load_scenario_data(scenarios)

In [36]:
outputs

Unnamed: 0,household,FGMEAT,FGDAIRY,FGVEGETABLE,FGDRYFOOD,FGSNACKS,FGBAKED,FGSTOREPREPARED,Inedible Parts,Plate Waste,...,Preprepared,n_quickcook,n_cook,n_attempted_cook,n_leftovers,n_shop,n_quickshop,n_attempted_shop,scenario,run
0,191,621.570,213.199,373.678,130.187,226.891,419.675,686.494,389.425,534.023,...,686.493676,8.0,92.0,0.0,100.0,9.0,26.0,4.0,0,0
1,82,348.928,477.117,472.963,181.319,408.171,316.341,187.881,445.448,1826.918,...,187.880536,2.0,98.0,0.0,100.0,9.0,34.0,2.0,0,0
2,283,125.221,391.134,137.080,234.755,404.800,81.831,246.072,205.970,1414.923,...,246.071975,0.0,100.0,0.0,100.0,0.0,100.0,10.0,0,0
3,488,86.767,45.231,21.091,22.278,41.947,3.338,141.099,73.891,138.149,...,141.099035,4.0,96.0,0.0,100.0,11.0,24.0,2.0,0,0
4,422,219.914,143.399,81.020,93.443,208.197,37.955,175.779,158.288,710.964,...,175.778834,4.0,96.0,0.0,100.0,10.0,14.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,144,111.280,178.070,326.562,109.527,172.635,245.296,146.042,241.311,725.287,...,146.042000,15.0,85.0,0.0,100.0,15.0,2.0,0.0,0,9
7996,121,292.903,539.281,236.299,337.314,553.135,191.951,356.702,296.168,2011.647,...,356.702000,0.0,100.0,0.0,100.0,1.0,92.0,10.0,0,9
7997,298,278.841,482.357,232.912,334.087,626.511,285.374,404.369,295.761,2007.719,...,404.369000,12.0,88.0,0.0,100.0,4.0,76.0,9.0,0,9
7998,80,221.362,355.507,253.976,211.218,342.624,168.442,204.782,294.975,1411.136,...,204.782000,1.0,99.0,0.0,100.0,4.0,69.0,7.0,0,9


In [37]:
import pandas as pd
import os

def load_household_configs(scenarios: list) -> pd.DataFrame:
    """
    Loads household and neighborhood configuration data by applying the first
    row of neighborhood parameters to all households within each scenario.

    Args:
        scenarios (list): A list of scenario directory names (e.g., ['01_scenario0_no_promotions']).

    Returns:
        pd.DataFrame: A DataFrame with combined household and neighborhood data,
                      including 'scenario' and 'household_id' columns.
    """
    for scenario_dir in scenarios:
        scenario_id = scenario_dir.split('_')[2][-1]
        
        # Define file paths
        household_file = os.path.join(scenario_dir, 'household_ifwaste_sample_df.txt')
        bounds_file = os.path.join(scenario_dir, 'bounds_df.csv')
        neighborhood_file = os.path.join(scenario_dir, 'neighborhood_ifwaste_sample_df.txt')
        
        if not os.path.exists(household_file) or not os.path.exists(neighborhood_file):
            print(f"Skipping scenario '{scenario_dir}': Missing one or both configuration files.")
            continue
        # Load household and neighborhood data
        household_df = pd.read_csv(household_file, sep="\t")
        bounds_df = pd.read_csv(bounds_file, sep="\t")
        neighborhood_df = pd.read_csv(neighborhood_file, sep="\t")
        # Select the first row of neighborhood data and convert it to a dictionary
        # The .iloc[0] selects the first row, and .to_dict() converts it to a dictionary
        first_neighborhood_row = neighborhood_df.iloc[0].to_dict()
        # Add new columns to the household DataFrame based on the first row's values
        for col, value in first_neighborhood_row.items():
            household_df[col] = value
        
        
        # Add scenario and household ID columns
        household_df['scenario'] = scenario_id
        household_df['household_id'] = household_df.index
        return (household_df, bounds_df)
            


In [38]:
INPUT_PATH = "/home/haasehelen/haasehelen/ifwaste/input/gsua_based_configuration/samples/shopping/gsua_test/"
input_df, bounds_df = load_household_configs(scenarios=[INPUT_PATH])

In [39]:
input_df.head()

Unnamed: 0,Household:hh_amount_children,Household:hh_amount_adults,Household:hh_level_of_concern,Household:hh_impulse_buy_likelihood,Household:hh_daily_budget,Household:hh_min_time_to_cook,Household:hh_time_per_store,Household:hh_price_sensitivity,Household:hh_brand_sensitivity,Household:hh_quality_sensitivity,...,Neighborhood:Food:FGVegetable:impulse_buy_likelihood,Neighborhood:Food:FGDryFood:expiration,Neighborhood:Food:FGDryFood:impulse_buy_likelihood,Neighborhood:Food:FGSnacks:expiration,Neighborhood:Food:FGSnacks:impulse_buy_likelihood,Neighborhood:Food:FGStorePrepared:expiration,Neighborhood:Food:FGStorePrepared:impulse_buy_likelihood,Neighborhood:nh_store_amounts,scenario,household_id
0,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]",a,0
1,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]",a,1
2,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]",a,2
3,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]",a,3
4,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]",a,4


In [45]:
input_df = input_df.drop(columns=['scenario', 'household_id'])
input_df.head()

Unnamed: 0,Household:hh_amount_children,Household:hh_amount_adults,Household:hh_level_of_concern,Household:hh_impulse_buy_likelihood,Household:hh_daily_budget,Household:hh_min_time_to_cook,Household:hh_time_per_store,Household:hh_price_sensitivity,Household:hh_brand_sensitivity,Household:hh_quality_sensitivity,...,Neighborhood:Food:FGBaked:impulse_buy_likelihood,Neighborhood:Food:FGVegetable:expiration,Neighborhood:Food:FGVegetable:impulse_buy_likelihood,Neighborhood:Food:FGDryFood:expiration,Neighborhood:Food:FGDryFood:impulse_buy_likelihood,Neighborhood:Food:FGSnacks:expiration,Neighborhood:Food:FGSnacks:impulse_buy_likelihood,Neighborhood:Food:FGStorePrepared:expiration,Neighborhood:Food:FGStorePrepared:impulse_buy_likelihood,Neighborhood:nh_store_amounts
0,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.54,7,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]"
1,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.54,7,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]"
2,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.54,7,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]"
3,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.54,7,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]"
4,3,1,0.28,0.79,73.35,18,14,0.35,0.6,0.16,...,0.54,7,0.32,293,0.17,195,0.89,5,0.63,"[0, 0, 1]"


In [40]:
bounds_df.head()

Unnamed: 0.1,Unnamed: 0,name,distribution,bounds,dtype,decimals
0,0,Household:hh_amount_children,unif,"[0, 6]",int,NAN
1,1,Household:hh_amount_adults,unif,"[1, 2]",int,NAN
2,2,Household:hh_level_of_concern,unif,"[0, 1]",float,2
3,3,Household:hh_impulse_buy_likelihood,unif,"[0, 1.6]",float,2
4,4,Household:hh_daily_budget,unif,"[8.36, 79.5]",float,2


In [47]:
columns = list(input_df.columns)


In [48]:
import ast
bounds_df["bounds"] = bounds_df["bounds"].apply(ast.literal_eval)
bounds_df


Unnamed: 0.1,Unnamed: 0,name,distribution,bounds,dtype,decimals
0,0,Household:hh_amount_children,unif,"[0, 6]",int,NAN
1,1,Household:hh_amount_adults,unif,"[1, 2]",int,NAN
2,2,Household:hh_level_of_concern,unif,"[0, 1]",float,2
3,3,Household:hh_impulse_buy_likelihood,unif,"[0, 1.6]",float,2
4,4,Household:hh_daily_budget,unif,"[8.36, 79.5]",float,2
...,...,...,...,...,...,...
91,91,Household:hh_shopping_frequency,subgroup,"{'1-2': 0.08, '3-6': 0.12, '7-10': 0.8}",int,NAN
92,92,Household:hh_pay_day_interval,subgroup,"{'14': 0.628, '30': 0.103, '7': 0.269}",int,NAN
93,93,Adult:adult_preference_vector,vector,"[0, 1]",float,2
94,94,Child:child_preference_vector,vector,"[0, 1]",float,2


In [49]:
bounds_df["bounds"]

0                                      [0, 6]
1                                      [1, 2]
2                                      [0, 1]
3                                    [0, 1.6]
4                                [8.36, 79.5]
                       ...                   
91    {'1-2': 0.08, '3-6': 0.12, '7-10': 0.8}
92     {'14': 0.628, '30': 0.103, '7': 0.269}
93                                     [0, 1]
94                                     [0, 1]
95                                     [0, 1]
Name: bounds, Length: 96, dtype: object

In [50]:
for i, bound in bounds_df["bounds"].items():
    if len(bound) == 3 and not isinstance(bound, dict): 
        bounds_df.at[i, "bounds"] =  [bound[0], bound[1]]

def normalize_bound(bound):
    if isinstance(bound, dict):
        mins, maxs = [], []
        for key in bound.keys():
            if "-" in key:  # range like "7-10"
                start, end = map(int, key.split("-"))
                mins.append(start)
                maxs.append(end)
            else:  # single number like "10"
                val = int(key)
                mins.append(val)
                maxs.append(val)
        return [min(mins), max(maxs)]

    return bound

bounds_df["bounds"] = bounds_df["bounds"].apply(normalize_bound)

In [51]:
import pandas as pd

def expand_list_columns(df):
    out = df.copy()
    
    for col in out.columns:
        if not col == "scenario":
            # Step 1: convert string to list of floats
            def to_list(x):
                if isinstance(x, str):
                    x = x.strip()
                    if x.startswith("[") and x.endswith("]"):
                        # list string like "[1,2,3]" or "[0.1,0.2]"
                        return [float(n.strip()) for n in x[1:-1].split(",")]
                    else:
                        # single number string like "5" or "0.39"
                        return [float(x)]
                return [float(x)] if isinstance(x, (int, float)) else [x]  # wrap single numbers
            out[col] = out[col].apply(to_list)

        # Step 2: expand list columns
        if out[col].apply(lambda v: isinstance(v, list)).any():
            expanded = out[col].apply(pd.Series)
            expanded.columns = [f"{col}_{i}" for i in expanded.columns]
            out = out.drop(columns=[col]).join(expanded)

    return out

input_df = expand_list_columns(input_df)

In [52]:
len(input_df.columns)

90

In [53]:
bounds_df

Unnamed: 0.1,Unnamed: 0,name,distribution,bounds,dtype,decimals
0,0,Household:hh_amount_children,unif,"[0, 6]",int,NAN
1,1,Household:hh_amount_adults,unif,"[1, 2]",int,NAN
2,2,Household:hh_level_of_concern,unif,"[0, 1]",float,2
3,3,Household:hh_impulse_buy_likelihood,unif,"[0, 1.6]",float,2
4,4,Household:hh_daily_budget,unif,"[8.36, 79.5]",float,2
...,...,...,...,...,...,...
91,91,Household:hh_shopping_frequency,subgroup,"[1, 10]",int,NAN
92,92,Household:hh_pay_day_interval,subgroup,"[7, 30]",int,NAN
93,93,Adult:adult_preference_vector,vector,"[0, 1]",float,2
94,94,Child:child_preference_vector,vector,"[0, 1]",float,2


In [54]:
input_df.columns

Index(['Household:hh_amount_children_0', 'Household:hh_amount_adults_0',
       'Household:hh_level_of_concern_0',
       'Household:hh_impulse_buy_likelihood_0', 'Household:hh_daily_budget_0',
       'Household:hh_min_time_to_cook_0', 'Household:hh_time_per_store_0',
       'Household:hh_price_sensitivity_0', 'Household:hh_brand_sensitivity_0',
       'Household:hh_quality_sensitivity_0',
       'Household:hh_availability_sensitivity_0',
       'Household:hh_deal_sensitivity_0', 'Household:hh_planner_0',
       'Household:hh_impulsivity_0', 'Household:hh_brand_preference_0',
       'Adult:adult_plate_waste_0', 'Adult:male_veg_servings_0',
       'Adult:male_baked_servings_0', 'Adult:male_dry_food_servings_0',
       'Adult:male_dairy_servings_0', 'Adult:male_meat_servings_0',
       'Adult:male_snacks_servings_0', 'Adult:male_store_prepared_ratio_0',
       'Adult:female_veg_servings_0', 'Adult:female_baked_servings_0',
       'Adult:female_dry_food_servings_0', 'Adult:female_dairy_se

In [57]:
bounds_df = bounds_df[~bounds_df["name"].isin([
    "Adult:adult_preference_vector",
    "Child:child_preference_vector",
    "Neighborhood:nh_store_amounts", 
    "Household:hh_max_avail_time_per_day",
    "scenario",
    "household",
    "run"
])].reset_index(drop=True)


In [60]:
bounds_df["name"]

0            Household:hh_amount_children
1              Household:hh_amount_adults
2           Household:hh_level_of_concern
3     Household:hh_impulse_buy_likelihood
4               Household:hh_daily_budget
                     ...                 
87        Neighborhood:nh_store_amounts_0
88        Neighborhood:nh_store_amounts_1
89        Neighborhood:nh_store_amounts_2
90        Household:hh_shopping_frequency
91          Household:hh_pay_day_interval
Name: name, Length: 92, dtype: object

In [58]:
print(len(bounds_df) == len(input_df.columns))
print(len(bounds_df))
print(len(input_df.columns))

False
92
90


In [19]:
problem = { "num_vars" : len(input_df.columns), 
            "names" : list(input_df.columns), 
            "bounds" : list(bounds_df["bounds"])}

In [20]:
print(len(input_df))
input_df = pd.concat([input_df]*REALIZATIONS, ignore_index=True)
len(input_df)

500


8000

In [21]:
len(outputs) == len(input_df)

True

In [None]:
outputs = outputs.sort_values(
    by=['scenario', 'run', 'household'],
    ascending=[True, True, True]  # optional, True is default
).reset_index(drop=True)
outputs = outputs.drop(columns=['scenario', 'run', 'household'])

## DATA is ready now


### Deterministic Variance

In [23]:
exp = pd.DataFrame(outputs.values.reshape(-1, REALIZATIONS, outputs.shape[1]).mean(1))  # Mean of the model realizations, E[Y|X] = Y_d
exp.columns = outputs.columns
exp.head()

Unnamed: 0,FGMEAT,FGDAIRY,FGVEGETABLE,FGDRYFOOD,FGSNACKS,FGBAKED,FGSTOREPREPARED,Inedible Parts,Plate Waste,Spoiled Food,Prepared,Unprepared,Preprepared,n_quickcook,n_cook,n_attempted_cook,n_leftovers,n_shop,n_quickshop,n_attempted_shop
0,187.004687,316.736437,77.990437,176.752125,244.751,63.267,286.672438,226.741437,968.134125,158.298563,819.298437,247.203125,286.67245,12.625,87.125,0.25,99.8125,23.75,68.375,27.0625
1,294.091813,500.238563,221.480812,301.984063,284.582438,104.239937,512.595312,346.375688,1171.918187,700.919,1247.735563,458.882,512.595397,1.5,98.1875,0.3125,99.875,5.5625,67.75,5.75
2,358.820375,498.054188,199.846125,292.806062,424.78975,141.313,492.126313,365.806938,1527.693625,514.2555,1508.123312,407.506375,492.126328,5.1875,94.4375,0.375,99.875,13.125,58.9375,11.5625
3,211.110563,391.513375,163.952562,212.284,308.040125,113.493313,364.974438,272.953375,1019.972437,472.442375,1021.935438,378.45875,364.974372,3.875,95.875,0.25,99.875,6.4375,52.0625,5.3125
4,241.848562,346.079938,133.4695,254.011,356.893375,195.51625,451.975063,235.382937,1199.030313,545.38025,1141.986938,385.8315,451.975112,2.125,97.75,0.125,99.9375,6.375,54.875,4.375


In [24]:
var = pd.DataFrame(outputs.values.reshape(-1, REALIZATIONS, outputs.shape[1]).var(1))  # Variance of the model realizations, V[Y|X] = \Psi_s
var.columns = outputs.columns
var.head()

Unnamed: 0,FGMEAT,FGDAIRY,FGVEGETABLE,FGDRYFOOD,FGSNACKS,FGBAKED,FGSTOREPREPARED,Inedible Parts,Plate Waste,Spoiled Food,Prepared,Unprepared,Preprepared,n_quickcook,n_cook,n_attempted_cook,n_leftovers,n_shop,n_quickshop,n_attempted_shop
0,19971.791936,14198.043224,1386.532758,6706.484378,16117.207736,2377.563198,26216.553285,5844.424005,137310.554158,43902.459286,114367.819443,12338.715401,26216.554727,338.484375,336.359375,0.1875,0.152344,1094.4375,695.234375,550.308594
1,32388.969822,95530.610987,30376.328957,51581.232224,26682.858757,5113.905414,90868.696645,25902.176445,455798.633723,326823.617198,610259.426912,49097.055007,90868.743743,1.875,2.277344,0.214844,0.109375,14.746094,424.0625,10.6875
2,21809.369356,22950.128151,31234.798842,24149.656687,26741.240474,17251.841707,57176.377732,14020.620469,711061.878949,289224.970372,437167.961533,20571.613082,57176.367817,33.277344,31.496094,0.359375,0.109375,137.984375,633.558594,137.246094
3,5205.11916,18211.132677,18973.72991,10332.713881,18731.474928,15307.26664,62254.23054,10778.811555,122445.907196,175428.947734,128986.415155,41519.355968,62254.186341,20.109375,18.609375,0.1875,0.109375,5.996094,522.058594,5.714844
4,25349.941009,13853.98704,11085.727709,9536.820119,12725.57109,41041.832376,65723.741312,10250.138522,288866.557877,204395.678103,129975.323575,101562.029069,65723.720643,0.859375,0.9375,0.109375,0.058594,6.859375,317.734375,8.609375


In [25]:
v_exp = exp.var()
e_var = var.mean()
for variable in outputs.columns:
    print('Output {}: V(Y) = {}. Vd + Es = {}'.format(variable, outputs.var()[variable], v_exp[variable] + e_var[variable]))

Output FGMEAT: V(Y) = 33527.02444266754. Vd + Es = 33540.73616730531
Output FGDAIRY: V(Y) = 57603.257701987546. Vd + Es = 57628.64992098168
Output FGVEGETABLE: V(Y) = 37162.61058224042. Vd + Es = 37170.97544861002
Output FGDRYFOOD: V(Y) = 32913.97371007092. Vd + Es = 32926.32907670202
Output FGSNACKS: V(Y) = 39656.05939983954. Vd + Es = 39670.20696956386
Output FGBAKED: V(Y) = 28529.414258330937. Vd + Es = 28535.10618943945
Output FGSTOREPREPARED: V(Y) = 95227.53329715962. Vd + Es = 95263.35358293286
Output Inedible Parts: V(Y) = 22872.96006484993. Vd + Es = 22882.749007807764
Output Plate Waste: V(Y) = 562196.3241106762. Vd + Es = 562438.3147766865
Output Spoiled Food: V(Y) = 324958.4454837955. Vd + Es = 325044.27322585613
Output Prepared: V(Y) = 534389.065498561. Vd + Es = 534630.0216590546
Output Unprepared: V(Y) = 87518.46605642165. Vd + Es = 87540.71879186563
Output Preprepared: V(Y) = 95227.5342241869. Vd + Es = 95263.35450836702
Output n_quickcook: V(Y) = 69.03156338292285. Vd +

In [26]:
vy = outputs.var()  # Total variance, V(Y)
v_exp = exp.var()  # Deterministic component, V_s
s_exp = v_exp/vy 
print('Fraction of deterministic variance:\n', s_exp)

Fraction of deterministic variance:
 FGMEAT              0.266988
FGDAIRY             0.282906
FGVEGETABLE         0.175044
FGDRYFOOD           0.250192
FGSNACKS            0.240878
FGBAKED             0.162255
FGSTOREPREPARED     0.250577
Inedible Parts      0.276485
Plate Waste         0.277719
Spoiled Food        0.194560
Prepared            0.287950
Unprepared          0.189632
Preprepared         0.250577
n_quickcook         0.208528
n_cook              0.207706
n_attempted_cook    0.266151
n_leftovers         0.186926
n_shop              0.203250
n_quickshop         0.141688
n_attempted_shop    0.186772
dtype: float64


### Second step: Decomposition of the deterministic component, $V_{d}$


In [32]:
print(len(input_df.columns))

92


In [28]:
print(len(input_df))
print(len(outputs))


8000
8000


In [29]:
keys = ['S1', 'ST', 'S1_conf', 'ST_conf']  # Keys in the SALib returned dictionary
# Initialize the DataFrame containing our stochastic sensitivity indices
df_exp = pd.DataFrame()

for out in outputs.columns:
    # Analyze the GSA outputs
    s_indices = sobol.analyze(problem, np.array(exp[out]), calc_second_order=False)

    # Use a dictionary to store the sensitivity indices and their confidence intervals
    s_dict = {x: s_indices[x] for x in keys}
    # Save the original sensitivity indices
    s_dict['oSi'] = s_dict['S1']
    s_dict['oST'] = s_dict['ST']
    # Scale the sensitivity indices with the fraction of deterministic variance
    s_dict['S1'] = s_dict['S1'] * s_exp[out]
    s_dict['ST'] = s_dict['ST'] * s_exp[out]
    # Name the inputs and outputs
    s_dict['input'] = problem['names']
    s_dict['output'] = [out] * len(s_indices['S1'])
    # Append the results for each of the outputs to the same dataframe
    temp = pd.DataFrame(s_dict)
    df_exp = pd.concat([df_exp, temp], ignore_index=1)

# Rename the DataFrame columns, changing the notation from S1 to Si
df_exp.columns = ['Si', 'ST', 'Si_conf', 'ST_conf', 'oSi', 'oST', 'input', 'output']

df_exp.head()

  names = list(pd.unique(groups))


RuntimeError: 
        Incorrect number of samples in model output file.
        Confirm that calc_second_order matches option used during sampling.

In [None]:
# Initialize a column using the name of the output
df_exp['S_exp'] = df_exp['output'].values
# Substitute that column with the corresponding fraction of variance due to deterministic effects based on the output
df_exp['S_exp'] = df_exp['S_exp'].map(s_exp.to_dict())
print(df_exp.head())

# Replace negative values with 0
df_exp[df_exp.select_dtypes("float64").columns] = df_exp.select_dtypes("float64").clip(lower=0)
df_exp.to_csv('det_S_ind.csv', index=False)  # Save deterministic sensitivity indices
df_exp.head()

ring_plot(df_exp, det_sto='det')  # The det_sto argument indicates whether our sensitivity indices are deterministic or stochastic

## 2. Decomposition of the Stochastic Variance: Stochastic Sensitivity Indice

In [None]:
sensitivity_results = {}

for i, col in enumerate(outputs.columns):
    Y = outputs[:, i]  # select one output column
    s_indices = sobol.analyze(problem, Y, calc_second_order=False)
    sensitivity_results[col] = s_indices
