In [None]:
from MDP_functions import get_data, find_next_state, transition_probabilities_faster
from utils import *
from MDP_generator import *
np.random.seed(0)



['loan_amount', 'credit_score', 'debt_to_income', 'purpose', 'num_calls', 'num_offers', 'offer_amount', 'customer_response_to_call', 'customer_acceptance_of_offer', 'customer_cancellation', 'last_action']
% of unseen state: 26.19272307853623%


In [None]:
env = InterpretableLoanMDP()
dataset = 'simulation'

# Bisimulation loss

## Estimated stochastic bisimulation metrics

In [None]:
state_abstractions = [('k_means', 500), ('k_means_features', 200), ('structural', 1), ('k_means_features', 50), ('k_means_features', 300), ('k_means_features', 10), ('k_means', 3000), ('k_means', 200), ('k_means', 1000), ('k_means_features', 500), ('k_means', 50), ('k_means', 300), ('k_means', 10)]
ground_thruth = False
pair_sample_ratio = 0.1  # ➔ sample x% of all possible pairs
c = 0.6

for state_abstraction, k in state_abstractions:
    print(f"=== State Abstraction: {state_abstraction} with k: {k} ===")
    state_cols, state_cols_simulation, terminal_actions = define_state_cols_sim(state_abstraction, k)
    df, df_success, all_cases, all_actions, activity_index_df, n_actions, budget = get_data(dataset, k, state_abstraction)
    activity_index = {v: k for k, v in env.activity_meanings.items()}
    all_states_unabs = df[state_cols_simulation].drop_duplicates().reset_index(drop=True)
    n_states_unabs = len(all_states_unabs)
    print(f"Simulating the environment with {n_states_unabs} distinct original states")
    transition_proba = transition_probabilities_faster_2(df, state_cols_simulation, all_states_unabs, activity_index, n_actions)
    all_states_unabs = all_states_unabs.drop(columns=["state_index"])
    all_states = df[state_cols].drop_duplicates().reset_index(drop=True)
    n_states = len(all_states)
    print(f"Training with {n_states} distinct states ({n_states / n_states_unabs * 100:.2f}%) of original state space) (k={k})")
    all_state_index = {tuple(row): idx for idx, row in all_states.iterrows()}
    all_state_unabs_index = {tuple(row): idx for idx, row in all_states_unabs.iterrows()}
        
    unabs_to_abs_state = {tuple(row[state_cols_simulation]): all_state_index.get(tuple(row[state_cols]), None) 
                        for _, row in df.iterrows()}

    #encode all object (string/categorical) columns
    all_states_unabs_encoded = all_states_unabs.copy()
    all_states_unabs_encoded['last_action'] = all_states_unabs_encoded['last_action'].map(activity_index)


    # === Precompute transitions and encoded states ===
    print("Precomputing transitions and encoded states...")
    precomputed_transitions = {}
    for s in tqdm(range(n_states_unabs), desc="States"):
        for a in range(n_actions):
            result = get_transitions_and_rewards(s, a, transition_proba)
            if not any(x is None for x in result):
                precomputed_transitions[(s, a)] = result

    encoded_state_array = np.array(all_states_unabs_encoded.values)
    scaler = StandardScaler()
    encoded_state_array = scaler.fit_transform(encoded_state_array)

    # === Group unabstracted states by abstracted state ===
    abs_to_unabs = {}
    for unabs_state, abs_state in unabs_to_abs_state.items():
        if abs_state is not None:
            abs_to_unabs.setdefault(abs_state, []).append(unabs_state)
    
    # ===== Computing bisimulation distance ============
    total_bisimilarity_distance = 0
    total_weight = 0  
    bisim_distances = dict()

    with tqdm(abs_to_unabs.items(), desc="Computing cluster distances", unit="cluster") as pbar:
            for abs_state, unabs_states in pbar:
                if len(unabs_states) <2:
                    continue
                indices = [all_state_unabs_index[s] for s in unabs_states if s in all_state_unabs_index]
                all_pairs = list(combinations(indices, 2))

                #sample a subset of pairs
                n_sample = min(len(all_pairs), max(1, int(len(all_pairs) * pair_sample_ratio)))
                sampled_pairs = random.sample(all_pairs, n_sample)
                if len(sampled_pairs) == 0:
                    raise ValueError(f"No pairs sampled for abstracted state {abs_state}. Check the pair_sample_ratio or the number of unabstracted states.")

                bisimilarity_sum = 0
                state_size = len(indices)  #cluster size

                for idx_i, idx_j in sampled_pairs:
                    s_i = idx_i
                    s_j = idx_j

                    # Get available actions
                    possible_actions_i = [a for a in range(n_actions)
                                        if (s_i, a) in precomputed_transitions]
                    possible_actions_j = [a for a in range(n_actions)
                                        if (s_j, a) in precomputed_transitions]
                    common_actions = list(set(possible_actions_i) & set(possible_actions_j)) #common action can be empty if terminal states
                    all_possible_actions = set(possible_actions_i) | set(possible_actions_j)
                
                    
                    max_action_dist = 0
                    for action in all_possible_actions: #abstracted have the same possible actions
                            
                            result_i = precomputed_transitions.get((s_i, action), ([s_i], [1.0], -100.0))
                            result_j = precomputed_transitions.get((s_j, action), ([s_j], [1.0], -100.0))

                            next_vecs_i, prob_i, r_i = result_i
                            next_vecs_j, prob_j, r_j = result_j

                            if isinstance(next_vecs_i[0], (int, np.integer)):
                                next_vecs_i = encoded_state_array[next_vecs_i]
                            if isinstance(next_vecs_j[0], (int, np.integer)):
                                next_vecs_j = encoded_state_array[next_vecs_j]

                            reward_diff = abs(r_i - r_j)
                            trans_dist = wasserstein_distance_nd(u_values=next_vecs_i, v_values=next_vecs_j, u_weights=prob_i, v_weights=prob_j)
                            action_dist = (1-c)*reward_diff + (c*trans_dist)
                            max_action_dist = max(max_action_dist, action_dist)
                            
                    bisimilarity_sum += max_action_dist
                    if abs_state not in bisim_distances:
                        bisim_distances[abs_state] = dict()
                                    
                    bisim_distances[abs_state][(idx_i, idx_j)] = max_action_dist

                #compute the average bisimilarity distance for this cluster
                #average distance for this cluster
                avg_bisimilarity_distance = bisimilarity_sum / len(sampled_pairs)

                #update total weighted bisimilarity distance
                total_bisimilarity_distance += avg_bisimilarity_distance * state_size #with size of cluster
                total_weight += state_size

                pbar.set_postfix(avg_bisimilarity_distance=f"{avg_bisimilarity_distance:.2f}")

    #compute the total weighted average bisimilarity distance
    weighted_avg_bisimilarity_distance = total_bisimilarity_distance / total_weight
    print(f"Weighted Bisimilarity Distance of {state_abstraction} with {n_states} blocks: {weighted_avg_bisimilarity_distance:.2f}")

    results = {
        'weighted_avg_bisimilarity_distance': weighted_avg_bisimilarity_distance,
        'bisim_distances': bisim_distances,
        'n_states': n_states, 
        'ratio': pair_sample_ratio
    }

    filename = f"bisim_metrics_{dataset}_{state_abstraction}_{k}_{ground_thruth}.pkl"
    with open(filename, 'wb') as f:
                pickle.dump(results, f)

=== State Abstraction: k_means with k: 500 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:16<00:00, 1798.18it/s]


Training with 3161 distinct states (6.03%) of original state space) (k=500)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 23393.21it/s]
Computing cluster distances: 100%|██████████| 3161/3161 [00:25<00:00, 125.15cluster/s, avg_bisimilarity_distance=0.00] 


Weighted Bisimilarity Distance of k_means with 3161 blocks: 12.86
=== State Abstraction: k_means_features with k: 200 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1727.46it/s]


Training with 25034 distinct states (47.72%) of original state space) (k=200)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 21782.19it/s]
Computing cluster distances: 100%|██████████| 25034/25034 [00:08<00:00, 2840.34cluster/s, avg_bisimilarity_distance=0.00] 


Weighted Bisimilarity Distance of k_means_features with 25034 blocks: 7.43
=== State Abstraction: approximate_structural with k: 1 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1717.63it/s]


Training with 38652 distinct states (73.68%) of original state space) (k=1)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 22186.77it/s]
Computing cluster distances: 100%|██████████| 38652/38652 [03:33<00:00, 180.91cluster/s, avg_bisimilarity_distance=0.00]


Weighted Bisimilarity Distance of approximate_structural with 38652 blocks: 0.00
=== State Abstraction: k_means_features with k: 50 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1698.15it/s]


Training with 10224 distinct states (19.49%) of original state space) (k=50)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 22306.55it/s]
Computing cluster distances: 100%|██████████| 10224/10224 [00:10<00:00, 938.76cluster/s, avg_bisimilarity_distance=0.00] 


Weighted Bisimilarity Distance of k_means_features with 10224 blocks: 7.81
=== State Abstraction: k_means_features with k: 300 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:18<00:00, 1662.26it/s]


Training with 31329 distinct states (59.72%) of original state space) (k=300)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 23438.20it/s]
Computing cluster distances: 100%|██████████| 31329/31329 [00:10<00:00, 3105.65cluster/s, avg_bisimilarity_distance=0.00]  


Weighted Bisimilarity Distance of k_means_features with 31329 blocks: 7.42
=== State Abstraction: k_means_features with k: 10 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1702.78it/s]


Training with 2994 distinct states (5.71%) of original state space) (k=10)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 22379.85it/s]
Computing cluster distances: 100%|██████████| 2994/2994 [00:46<00:00, 64.78cluster/s, avg_bisimilarity_distance=0.00]   


Weighted Bisimilarity Distance of k_means_features with 2994 blocks: 8.29
=== State Abstraction: k_means with k: 3000 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1675.93it/s]


Training with 16180 distinct states (30.84%) of original state space) (k=3000)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 21606.08it/s]
Computing cluster distances: 100%|██████████| 16180/16180 [00:09<00:00, 1738.32cluster/s, avg_bisimilarity_distance=40.64]


Weighted Bisimilarity Distance of k_means with 16180 blocks: 13.72
=== State Abstraction: k_means with k: 200 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1699.62it/s]


Training with 1280 distinct states (2.44%) of original state space) (k=200)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 22553.35it/s]
Computing cluster distances: 100%|██████████| 1280/1280 [01:07<00:00, 18.86cluster/s, avg_bisimilarity_distance=0.00]  


Weighted Bisimilarity Distance of k_means with 1280 blocks: 12.88
=== State Abstraction: k_means with k: 1000 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1698.64it/s]


Training with 6215 distinct states (11.85%) of original state space) (k=1000)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 21971.72it/s]
Computing cluster distances: 100%|██████████| 6215/6215 [00:16<00:00, 385.27cluster/s, avg_bisimilarity_distance=0.00]  


Weighted Bisimilarity Distance of k_means with 6215 blocks: 13.03
=== State Abstraction: k_means_features with k: 500 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1691.90it/s]


Training with 41973 distinct states (80.01%) of original state space) (k=500)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 23336.62it/s]
Computing cluster distances: 100%|██████████| 41973/41973 [00:06<00:00, 6118.50cluster/s, avg_bisimilarity_distance=0.54]  


Weighted Bisimilarity Distance of k_means_features with 41973 blocks: 7.21
=== State Abstraction: k_means with k: 50 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1695.81it/s]


Training with 335 distinct states (0.64%) of original state space) (k=50)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 23206.32it/s]
Computing cluster distances: 100%|██████████| 335/335 [04:33<00:00,  1.22cluster/s, avg_bisimilarity_distance=0.00] 


Weighted Bisimilarity Distance of k_means with 335 blocks: 13.18
=== State Abstraction: k_means with k: 300 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1681.26it/s]


Training with 1929 distinct states (3.68%) of original state space) (k=300)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 23149.61it/s]
Computing cluster distances: 100%|██████████| 1929/1929 [00:46<00:00, 41.73cluster/s, avg_bisimilarity_distance=0.00]  


Weighted Bisimilarity Distance of k_means with 1929 blocks: 12.88
=== State Abstraction: k_means with k: 10 ===
Simulating the environment with 52458 distinct original states


Processing Cases: 100%|██████████| 30000/30000 [00:17<00:00, 1691.89it/s]


Training with 90 distinct states (0.17%) of original state space) (k=10)
Precomputing transitions and encoded states...


States: 100%|██████████| 52458/52458 [00:02<00:00, 21950.36it/s]
Computing cluster distances: 100%|██████████| 90/90 [14:49<00:00,  9.89s/cluster, avg_bisimilarity_distance=0.00]  


Weighted Bisimilarity Distance of k_means with 90 blocks: 13.74
