# Step 1: Identify sources of uncertainty in the composite indicator pipeline

# 1. List methodological stages
stages = [
    "Indicator selection",        # Which variables are included
    "Missing data imputation",    # How gaps are filled
    "Normalization method",       # Z-score vs. Min-Max vs. others
    "Thematic grouping",          # How indicators map to themes
    "Weighting scheme",           # Equal vs. custom vs. PCA-based
    "Aggregation method",         # Arithmetic vs. geometric mean
    "Final scaling",              # Sigmoid vs. exclusive percentile
    "Outlier treatment"           # Shifts or caps applied
]

# 2. Display the list
for i, stage in enumerate(stages, 1):
    print(f"{i}. {stage}")


In [1]:
import pandas as pd
import numpy as np
from scipy.special import expit
from scipy.stats import spearmanr

# 1. Load your normalized Z-score data
df_z = pd.read_csv("tech_adoption_zscore.csv", index_col=0)

# 2. Define thematic groups and equal weights (1/3 each)
groups = {
    'infrastructure_access': ['Internet_Users_Pct', 'Fixed_Broadband_per_100', 'Mobile_Cellular_per_100'],
    'innovation_capability':  ['R&D_Expenditure_Pct_GDP', 'High_Technology_Exports_USD', 'ICT_Graduates_Pct'],
    'policy_readiness':       ['Problem_Solving_Tech_Level3_Pct', 'B2C_Index_2020', 'DAI_Business', 'DAI_People', 'DAI_Gov']
}
weights = {theme: 1/3 for theme in groups}

# 3. Function to compute composite sigmoid index
def compute_sigmoid_index(df_norm):
    epsilon = 1e-6
    sub = {}
    for theme, cols in groups.items():
        block = df_norm[cols]
        shift = (-block.min()) + epsilon
        pos = block + shift
        sub[theme] = np.exp(np.log(pos).mean(axis=1))
    raw = sum(sub[t] * weights[t] for t in groups)
    return expit(raw)

# 4. Baseline index
baseline = compute_sigmoid_index(df_z)

# 5. Drop‐one analysis
results = []
for col in df_z.columns:
    df_drop = df_z.drop(columns=[col])                              # remove one variable
    # adjust groups by dropping the column
    groups_adj = {t: [c for c in cols if c != col] for t, cols in groups.items()}
    # compute index on reduced data
    def comp_adj(df_in):
        sub_adj = {}
        for theme, cols in groups_adj.items():
            block = df_in[cols]
            shift = (-block.min()) + epsilon
            pos = block + shift
            sub_adj[theme] = np.exp(np.log(pos).mean(axis=1))
        raw_adj = sum(sub_adj[t] * weights[t] for t in groups_adj)
        return expit(raw_adj)
    sig_drop = comp_adj(df_drop)
    # Spearman rank correlation with baseline
    corr, _ = spearmanr(baseline, sig_drop)
    results.append((col, corr))

# 6. Collect and display results
var_sensitivity = pd.DataFrame(results, columns=['Variable','Spearman_Corr']).set_index('Variable')
print(var_sensitivity.sort_values('Spearman_Corr'))


NameError: name 'epsilon' is not defined