In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from gurobipy import Model, GRB, quicksum
import networkx as nx
import warnings 
warnings.filterwarnings('ignore')
import numpy as np
import random

In [17]:
# LOAD DATA
#Compatibility
compatibility = pd.read_csv('/Users/valentina/Library/CloudStorage/OneDrive - UAI/TESIS/CODIGO/Datas_simulaciones/compatibilidad_total.csv', index_col=0)
compatibility.index = range(len(compatibility))
compatibility.columns = range(len(compatibility.columns))
#Pairs, recipients and donors
pairs = pd.read_csv('/Users/valentina/Library/CloudStorage/OneDrive - UAI/TESIS/CODIGO/Datas_simulaciones/parejas.csv',index_col=0)
recipients = pd.read_csv('/Users/valentina/Library/CloudStorage/OneDrive - UAI/TESIS/CODIGO/Datas_simulaciones/receptores.csv',index_col=0)
donors = pd.read_csv('/Users/valentina/Library/CloudStorage/OneDrive - UAI/TESIS/CODIGO/Datas_simulaciones/donantes.csv',index_col=0)
#Weights
hla_hr = pd.read_csv("/Users/valentina/Library/CloudStorage/OneDrive - UAI/TESIS/CODIGO/KEP_DRDQB/datos/peso_HR.csv",index_col=0)
hla_lr = pd.read_csv("/Users/valentina/Library/CloudStorage/OneDrive - UAI/TESIS/CODIGO/KEP_DRDQB/datos/peso_lr.csv",index_col=0)
hla_hr.columns = hla_hr.columns.astype(int)
hla_lr.columns = hla_lr.columns.astype(int)

In [18]:
# Data of each locus 
base = '/Users/valentina/Library/CloudStorage/OneDrive - UAI/TESIS/CODIGO/Datas_simulaciones/locis_por_separado/BR/'
mats = {
    'B': 'mismatch_BR_B.csv',
    'DQ': 'mismatch_BR_DQ.csv',
    'DR': 'mismatch_BR_DR.csv'
}

matrix_mismatch = {}
for locus, archive in mats.items():
    matrix_mismatch[locus] = pd.read_csv(base + archive, index_col=0)

mismatch_B = matrix_mismatch['B']
mismatch_DQ = matrix_mismatch['DQ']
mismatch_DR = matrix_mismatch['DR']

In [19]:
# Mismatch to HLA score
HLA_DR = 2 - mismatch_DR
HLA_DQ = 2- mismatch_DQ
HLA_B = 2 - mismatch_B

In [20]:
# Creation of initial graph with graph resolution and a minimum of quality (k)
def create_graph(pairs, compatibility, hla1, k):
    G = nx.DiGraph()
    added_edges = 0
    for i in pairs.index:
        for j in pairs.index:
            if compatibility.at[i, j] == 1 and hla1.at[i, j] >= k:
                G.add_edge(j, i, weight=hla1.at[i, j])
                added_edges += 1
    return G

In [21]:
# Changing initial weights to optimization weights on the initial graph arcs
def changing_resolution_weights(G, hla2):
    for u, v, data in G.edges(data=True):
        try:
            data['weight'] = hla2.iloc[int(v), int(u)]
        except KeyError:
            print(f"No se encontró peso para el arco ({u}, {v})")
        except Exception as e:
            print(f"Error al actualizar peso para el arco ({u}, {v}): {str(e)}")


In [22]:
# Optimization function 
def optimization(G, l=3, k=3):
    total_cycles = list(nx.simple_cycles(G, length_bound=3))
    valid_cycles = [cycle for cycle in total_cycles if len(cycle) <= l and all(G[u][v]['weight'] >= k for u, v in zip(cycle, cycle[1:] + cycle[:1]))]
        
    P = len(G.nodes())  # "m" of the paper
    Z = 6 # 6 for antigen and allele level

    # Optimization model
    m = Model("optimization")
    m.setParam('OutputFlag', 0)

    x = {tuple(cycle): m.addVar(vtype=GRB.BINARY, name=f"x_{'_'.join(map(str, cycle))}") for cycle in valid_cycles}

    m.setObjective(
        quicksum(
            x[tuple(cycle)] * (
                (len(cycle) + (1 / P) * sum(G[u][v]['weight'] / Z for u, v in zip(cycle, cycle[1:] + cycle[:1])))
                / P
            )
            for cycle in valid_cycles
        ),
        GRB.MAXIMIZE
    )

    # Restriction
    for i in G.nodes():
        m.addConstr(quicksum(x[tuple(cycle)] for cycle in valid_cycles if i in cycle) <= 1, name=f"node_usage_{i}")

    m.optimize()

    G_optimal = nx.DiGraph()
    selected_cycles = []

    if m.status == GRB.OPTIMAL:
        for cycle in valid_cycles:
            if x[tuple(cycle)].X > 0.5:
                selected_cycles.append(cycle)
                for i in range(len(cycle)):
                    u, v = cycle[i], cycle[(i + 1) % len(cycle)]
                    G_optimal.add_edge(u, v, weight=G[u][v]['weight'])

    return G_optimal, selected_cycles

In [25]:
# Complete definition of the simulation

def update_matrices(indexes, df):
    return df.iloc[indexes, indexes]
frequiency_waiting_nodes = {}

def run_simulation(total_time, arrival_rate, departure_rate, match_run, pairs, compatibility, hla_lr, hla_hr):
    # Evaluation at different resolutions
    lr_quality_ethcat1 = []
    lr_quality_ethcat2 = []
    lr_quality_ethcat4 = []
    lr_quality_ethcat5 = []
    lr_quality_ethcat6 = []
    lr_quality_ethcat7 = []
    
    hr_quality_ethcat1 = []
    hr_quality_ethcat2 = []
    hr_quality_ethcat4 = []
    hr_quality_ethcat5 = []
    hr_quality_ethcat6 = []
    hr_quality_ethcat7 = []

    waiting_list = []
    waiting_times = {}  
    arrivals_by_ethcat = {}
    historial_cycles = []
    historial_departures = []
    cont=0
    available_indexes = set(pairs.index)

    
    departures_by_ethcat = {1: [], 2: [], 4: [], 5: [], 6: [], 7: []}
    HLA_B_ethcat = {1: [], 2: [], 4: [], 5: [], 6: [], 7: []}
    HLA_DR_ethcat = {1: [], 2: [], 4: [], 5: [], 6: [], 7: []}
    HLA_DQ_ethcat = {1: [], 2: [], 4: [], 5: [], 6: [], 7: []}

    # Simulation per month
    for month in range(total_time):
     
        selected_cycles = []
       
        new_entries = np.random.poisson(arrival_rate)
        new_entries = min(new_entries, len(available_indexes))
        new_index = np.random.choice(list(available_indexes), size=new_entries, replace=False)
        cont+= len(new_index)
        waiting_list.extend(new_index)
        available_indexes.difference_update(new_index)

        for idx in new_index:
            waiting_times[idx] = {'arrival': month}
            ethnicity = recipients.loc[recipients['Nodo'] == idx, 'ETHCAT'].iloc[0]
            if ethnicity in arrivals_by_ethcat:
                arrivals_by_ethcat[ethnicity] += 1
            else:
                arrivals_by_ethcat[ethnicity] = 1
        
 
        departure = np.random.poisson(departure_rate)
        departure_indexes = [] 
        if departure:
            departure = min(departure, len(waiting_list))
            departure_indexes = np.random.choice(waiting_list, size=departure, replace=False)
            waiting_list = [idx for idx in waiting_list if idx not in departure_indexes]
            historial_departures.extend(departure_indexes)

        for idx in departure_indexes:
            ethnicity = recipients.loc[idx, 'ETHCAT']
            departures_by_ethcat[ethnicity].append(idx)

        if (month + 1) % match_run == 0:
            waiting_list_index = waiting_list.copy()
            df_waiting_list = pairs.loc[waiting_list_index]
            
            filtered_compatibility = update_matrices(waiting_list_index, compatibility)
            filtered_weight = update_matrices(waiting_list_index, hla_lr) # Insert the resolution data that you want for create the graph
            
            G = create_graph(df_waiting_list, filtered_compatibility, filtered_weight, k= 2) # Insert the minimum weight for create the graph 
            changing_resolution_weights(G, hla_lr) # Insert the resolution data that you want for the optimization
            
            G_optimal, selected_cycles = optimization(G, l=3, k=2) # Insert the minimum quality that you want for the cycles 

            for u, v, data in G_optimal.edges(data=True):
                node_ethcat = recipients.loc[v, 'ETHCAT'] 
                
                # HLA for differents loci (optimization resolution)
                value_HLA_B = HLA_B.iloc[v, u]
                value_HLA_DR = HLA_DR.iloc[v, u]
                value_HLA_DQ = HLA_DQ.iloc[v, u]

                HLA_B_ethcat[node_ethcat].append(value_HLA_B)
                HLA_DR_ethcat[node_ethcat].append(value_HLA_DR)
                HLA_DQ_ethcat[node_ethcat].append(value_HLA_DQ)


                if node_ethcat == 1:
                    lr_quality_ethcat1.append(hla_lr.iloc[v, u])
                    hr_quality_ethcat1.append(hla_hr.iloc[v, u])
                elif node_ethcat == 2:
                    lr_quality_ethcat2.append(hla_lr.iloc[v, u])
                    hr_quality_ethcat2.append(hla_hr.iloc[v, u])
                elif node_ethcat == 4:
                    lr_quality_ethcat4.append(hla_lr.iloc[v, u])
                    hr_quality_ethcat4.append(hla_hr.iloc[v, u])
                elif node_ethcat == 5:
                    lr_quality_ethcat5.append(hla_lr.iloc[v, u])
                    hr_quality_ethcat5.append(hla_hr.iloc[v, u])
                elif node_ethcat == 6:
                    lr_quality_ethcat6.append(hla_lr.iloc[v, u])
                    hr_quality_ethcat6.append(hla_hr.iloc[v, u])
                elif node_ethcat == 7:
                    lr_quality_ethcat7.append(hla_lr.iloc[v, u])
                    hr_quality_ethcat7.append(hla_hr.iloc[v, u])
            historial_cycles.extend(selected_cycles)

            nodes_in_cycles = [node for cycle in selected_cycles for node in cycle]
            waiting_list = [idx for idx in waiting_list if idx not in nodes_in_cycles]
            for idx in nodes_in_cycles:
                waiting_times[idx]['departure'] = month

    nodes_in_historial = [node for cycle in historial_cycles for node in cycle]

    data = []

    for idx in nodes_in_historial:
        if idx in waiting_times:
            waiting_time = waiting_times[idx]['departure'] - waiting_times[idx]['arrival']
            ethnicity = recipients.loc[idx, 'ETHCAT'] 
            data.append({'node': idx, 'waiting time (months)': waiting_time, 'ethnicity': ethnicity})

    

    df_ethnicity = pd.DataFrame(data)
    df_ethnicity['ethnicity'] = df_ethnicity['ethnicity'].astype(int)
    df_ethnicity1 = df_ethnicity[df_ethnicity['ethnicity'] == 1]
    df_ethnicity2 = df_ethnicity[df_ethnicity['ethnicity'] == 2]
    df_ethnicity4 = df_ethnicity[df_ethnicity['ethnicity'] == 4]
    df_ethnicity5 = df_ethnicity[df_ethnicity['ethnicity'] == 5]
    df_ethnicity6 = df_ethnicity[df_ethnicity['ethnicity'] == 6]
    df_ethnicity7 = df_ethnicity[df_ethnicity['ethnicity'] == 7]
    waiting_time_mean_1 = df_ethnicity1['waiting time (months)'].mean()
    waiting_time_mean_2 = df_ethnicity2['waiting time (months)'].mean()
    waiting_time_mean_4 = df_ethnicity4['waiting time (months)'].mean()
    waiting_time_mean_5 = df_ethnicity5['waiting time (months)'].mean()
    waiting_time_mean_6 = df_ethnicity6['waiting time (months)'].mean()
    waiting_time_mean_7 = df_ethnicity7['waiting time (months)'].mean()

    
    outgoing_per_ethcat = {e: len(departures_by_ethcat[e]) for e in [1,2,4,5,6,7]}
    L_per_ethcat  = {e: outgoing_per_ethcat [e] / arrivals_by_ethcat.get(e, 1) for e in outgoing_per_ethcat } 
    F_per_ethcat  = {e: len([c for c in historial_cycles for n in c if recipients.loc[n,'ETHCAT']==e]) / arrivals_by_ethcat.get(e, 1)
                   for e in [1,2,4,5,6,7]}  
    
     
    total_trasplants = sum(len(cycle) for cycle in historial_cycles)
    total_entries = sum(arrivals_by_ethcat.values())
    F_total = total_trasplants / total_entries if total_entries > 0 else 0


     

    return {
        'historial_cycles': historial_cycles,
        'historial_departures': historial_departures,
        'entries_nodes': cont,
        'arrivals_by_ethcat': arrivals_by_ethcat,
        'waiting_time_mean1':   waiting_time_mean_1 ,
        'waiting_time_mean2':   waiting_time_mean_2,
        'waiting_time_mean4':   waiting_time_mean_4,
        'waiting_time_mean5':   waiting_time_mean_5,
        'waiting_time_mean6':   waiting_time_mean_6,
        'waiting_time_mean7':   waiting_time_mean_7,
        'lr_quality_ethcat1': np.mean(lr_quality_ethcat1),
        'lr_quality_ethcat2': np.mean(lr_quality_ethcat2),
        'lr_quality_ethcat4': np.mean(lr_quality_ethcat4),
        'lr_quality_ethcat5': np.mean(lr_quality_ethcat5),
        'lr_quality_ethcat6': np.mean(lr_quality_ethcat6),
        'lr_quality_ethcat7': np.mean(lr_quality_ethcat7),

        'hr_quality_ethcat1': np.mean(hr_quality_ethcat1),
        'hr_quality_ethcat2': np.mean(hr_quality_ethcat2),
        'hr_quality_ethcat4': np.mean(hr_quality_ethcat4),
        'hr_quality_ethcat5': np.mean(hr_quality_ethcat5),
        'hr_quality_ethcat6': np.mean(hr_quality_ethcat6),
        'hr_quality_ethcat7': np.mean(hr_quality_ethcat7),

        'avg_B_HLA_ethcat1': np.mean(HLA_B_ethcat[1]), 
        'avg_B_HLA_ethcat2': np.mean(HLA_B_ethcat[2]),
        'avg_B_HLA_ethcat4': np.mean(HLA_B_ethcat[4]),
        'avg_B_HLA_ethcat5': np.mean(HLA_B_ethcat[5]),
        'avg_B_HLA_ethcat6': np.mean(HLA_B_ethcat[6]),
        'avg_B_HLA_ethcat7': np.mean(HLA_B_ethcat[7]), 
        'HLA_B_total': np.mean(
                                    HLA_B_ethcat[1] + 
                                    HLA_B_ethcat[2] + 
                                    HLA_B_ethcat[4] + 
                                    HLA_B_ethcat[5] + 
                                    HLA_B_ethcat[6] + 
                                    HLA_B_ethcat[7]
                                ),

        'avg_DR_HLA_ethcat1': np.mean(HLA_DR_ethcat[1]), 
        'avg_DR_HLA_ethcat2': np.mean(HLA_DR_ethcat[2]),
        'avg_DR_HLA_ethcat4': np.mean(HLA_DR_ethcat[4]),
        'avg_DR_HLA_ethcat5': np.mean(HLA_DR_ethcat[5]),
        'avg_DR_HLA_ethcat6': np.mean(HLA_DR_ethcat[6]),
        'avg_DR_HLA_ethcat7': np.mean(HLA_DR_ethcat[7]), 
        'HLA_DR_total': np.mean(
                                    HLA_DR_ethcat[1] + 
                                    HLA_DR_ethcat[2] + 
                                    HLA_DR_ethcat[4] + 
                                    HLA_DR_ethcat[5] + 
                                    HLA_DR_ethcat[6] + 
                                    HLA_DR_ethcat[7]
                                ),

        'avg_DQ_HLA_ethcat1': np.mean(HLA_DQ_ethcat[1]), 
        'avg_DQ_HLA_ethcat2': np.mean(HLA_DQ_ethcat[2]),
        'avg_DQ_HLA_ethcat4': np.mean(HLA_DQ_ethcat[4]),
        'avg_DQ_HLA_ethcat5': np.mean(HLA_DQ_ethcat[5]),
        'avg_DQ_HLA_ethcat6': np.mean(HLA_DQ_ethcat[6]),
        'avg_DQ_HLA_ethcat7': np.mean(HLA_DQ_ethcat[7]), 
        'HLA_DQ_total': np.mean(
                                    HLA_DQ_ethcat[1] + 
                                    HLA_DQ_ethcat[2] + 
                                    HLA_DQ_ethcat[4] + 
                                    HLA_DQ_ethcat[5] + 
                                    HLA_DQ_ethcat[6] + 
                                    HLA_DQ_ethcat[7]
                                ),

        'F_per_ethcat': F_per_ethcat,        
        'L_per_ethcat' : L_per_ethcat,          
        'F_total': F_total,
        'lr_quality_total': np.mean(
                                    lr_quality_ethcat1 + 
                                    lr_quality_ethcat2 + 
                                    lr_quality_ethcat4 + 
                                    lr_quality_ethcat5 + 
                                    lr_quality_ethcat6 + 
                                    lr_quality_ethcat7
                                ),
        'hr_quality_total': np.mean(
                                    hr_quality_ethcat1 + 
                                    hr_quality_ethcat2 + 
                                    hr_quality_ethcat4 + 
                                    hr_quality_ethcat5 + 
                                    hr_quality_ethcat6 + 
                                    hr_quality_ethcat7
                                ),
        'Waiting_time_mean': pd.concat([
                                    df_ethnicity1['waiting time (months)'],
                                    df_ethnicity2['waiting time (months)'],
                                    df_ethnicity4['waiting time (months)'],
                                    df_ethnicity5['waiting time (months)'],
                                    df_ethnicity6['waiting time (months)'],
                                    df_ethnicity7['waiting time (months)']
                                    ]).mean()


        }

In [26]:
# Runing the simulations 

simulations_results = []
for i in range(100):
    print(f"Runing simulation {i + 1} of 100")
    results = run_simulation(
        total_time=10*12, 
        arrival_rate=(990/(10*12)), 
        departure_rate=((990/(10*12))*0.29), 
        match_run=3,
        pairs=pairs, 
        compatibility=compatibility, 
        hla_lr = hla_lr,
        hla_hr=hla_hr
    )
   
    simulations_results.append({
        'simulacion': i + 1,
        'cycles': results['historial_cycles'],
        'total_cycles': len(results['historial_cycles']),
        'total_departures': len(results['historial_departures']),
        'total_entries': results['entries_nodes'],
        'arrivals_by_ethcat': results['arrivals_by_ethcat'],
        'waiting_time_mean1':   results['waiting_time_mean1'] ,
        'waiting_time_mean2':   results['waiting_time_mean2'],
        'waiting_time_mean4':   results['waiting_time_mean4'],
        'waiting_time_mean5':   results['waiting_time_mean5'],
        'waiting_time_mean6':   results['waiting_time_mean6'],
        'waiting_time_mean7':   results['waiting_time_mean7'],
        'lr_quality_ethcat1': results['lr_quality_ethcat1'],
        'lr_quality_ethcat2': results['lr_quality_ethcat2'],
        'lr_quality_ethcat4': results['lr_quality_ethcat4'],
        'lr_quality_ethcat5': results['lr_quality_ethcat5'],
        'lr_quality_ethcat6': results['lr_quality_ethcat6'],
        'lr_quality_ethcat7': results['lr_quality_ethcat7'],
        'hr_quality_ethcat1': results['hr_quality_ethcat1'],
        'hr_quality_ethcat2': results['hr_quality_ethcat2'],
        'hr_quality_ethcat4': results['hr_quality_ethcat4'],
        'hr_quality_ethcat5': results['hr_quality_ethcat5'],
        'hr_quality_ethcat6': results['hr_quality_ethcat6'],
        'hr_quality_ethcat7': results['hr_quality_ethcat7'],
        'avg_B_HLA_ethcat1': results['avg_B_HLA_ethcat1'],
        'avg_B_HLA_ethcat2': results['avg_B_HLA_ethcat2'],
        'avg_B_HLA_ethcat4': results['avg_B_HLA_ethcat4'],
        'avg_B_HLA_ethcat5': results['avg_B_HLA_ethcat5'],
        'avg_B_HLA_ethcat6': results['avg_B_HLA_ethcat6'],
        'avg_B_HLA_ethcat7': results['avg_B_HLA_ethcat7'],
        'HLA_B_total': results['HLA_B_total'],

        'avg_DR_HLA_ethcat1': results['avg_DR_HLA_ethcat1'],
        'avg_DR_HLA_ethcat2': results['avg_DR_HLA_ethcat2'],
        'avg_DR_HLA_ethcat4': results['avg_DR_HLA_ethcat4'],
        'avg_DR_HLA_ethcat5': results['avg_DR_HLA_ethcat5'],
        'avg_DR_HLA_ethcat6': results['avg_DR_HLA_ethcat6'],
        'avg_DR_HLA_ethcat7': results['avg_DR_HLA_ethcat7'],
        'HLA_DR_total': results['HLA_DR_total'],
        

        'avg_DQ_HLA_ethcat1': results['avg_DQ_HLA_ethcat1'],
        'avg_DQ_HLA_ethcat2': results['avg_DQ_HLA_ethcat2'],
        'avg_DQ_HLA_ethcat4': results['avg_DQ_HLA_ethcat4'],
        'avg_DQ_HLA_ethcat5': results['avg_DQ_HLA_ethcat5'],
        'avg_DQ_HLA_ethcat6': results['avg_DQ_HLA_ethcat6'],
        'avg_DQ_HLA_ethcat7': results['avg_DQ_HLA_ethcat7'],
        'HLA_DQ_total': results['HLA_DQ_total'],

        'F_per_ethcat':   results['F_per_ethcat'],   
        'L_per_ethcat':   results['L_per_ethcat'],   
        'F_total': results['F_total'],
        'lr_quality_total': results['lr_quality_total'],
        'hr_quality_total': results['hr_quality_total'],
        'Waiting_time_mean': results['Waiting_time_mean']
    })
    

Runing simulation 1 of 100
Runing simulation 2 of 100
Runing simulation 3 of 100
Runing simulation 4 of 100
Runing simulation 5 of 100
Runing simulation 6 of 100
Runing simulation 7 of 100
Runing simulation 8 of 100
Runing simulation 9 of 100
Runing simulation 10 of 100
Runing simulation 11 of 100
Runing simulation 12 of 100
Runing simulation 13 of 100
Runing simulation 14 of 100
Runing simulation 15 of 100
Runing simulation 16 of 100
Runing simulation 17 of 100
Runing simulation 18 of 100
Runing simulation 19 of 100
Runing simulation 20 of 100
Runing simulation 21 of 100
Runing simulation 22 of 100
Runing simulation 23 of 100
Runing simulation 24 of 100
Runing simulation 25 of 100
Runing simulation 26 of 100
Runing simulation 27 of 100
Runing simulation 28 of 100
Runing simulation 29 of 100
Runing simulation 30 of 100
Runing simulation 31 of 100
Runing simulation 32 of 100
Runing simulation 33 of 100
Runing simulation 34 of 100
Runing simulation 35 of 100
Runing simulation 36 of 100
R

In [27]:
df_results = pd.DataFrame(simulations_results)

list_HLA_B_total = df_results['HLA_B_total'].tolist()


list_HLA_DR_total = df_results['HLA_DR_total'].tolist()


list_HLA_DQ_total = df_results['HLA_DQ_total'].tolist()



lists_B = {
    1: df_results['avg_B_HLA_ethcat1'].dropna().tolist(),
    2: df_results['avg_B_HLA_ethcat2'].dropna().tolist(),
    4: df_results['avg_B_HLA_ethcat4'].dropna().tolist(),
    5: df_results['avg_B_HLA_ethcat5'].dropna().tolist(),
    6: df_results['avg_B_HLA_ethcat6'].dropna().tolist(),
    7: df_results['avg_B_HLA_ethcat7'].dropna().tolist()
}

lists_DR = {
    1: df_results['avg_DR_HLA_ethcat1'].dropna().tolist(),
    2: df_results['avg_DR_HLA_ethcat2'].dropna().tolist(),
    4: df_results['avg_DR_HLA_ethcat4'].dropna().tolist(),
    5: df_results['avg_DR_HLA_ethcat5'].dropna().tolist(),
    6: df_results['avg_DR_HLA_ethcat6'].dropna().tolist(),
    7: df_results['avg_DR_HLA_ethcat7'].dropna().tolist()
}

lists_DQ = {
    1: df_results['avg_DQ_HLA_ethcat1'].dropna().tolist(),
    2: df_results['avg_DQ_HLA_ethcat2'].dropna().tolist(),
    4: df_results['avg_DQ_HLA_ethcat4'].dropna().tolist(),
    5: df_results['avg_DQ_HLA_ethcat5'].dropna().tolist(),
    6: df_results['avg_DQ_HLA_ethcat6'].dropna().tolist(),
    7: df_results['avg_DQ_HLA_ethcat7'].dropna().tolist()
}



In [28]:
import scipy.stats as st

ethnicity = [1, 2, 4, 5, 6, 7]
n_sim = len(df_results)

def media_ci(list):
    m = np.mean(list)
    s = np.std(list, ddof=1)
    low, high = st.t.interval(0.95, len(list)-1, loc=m, scale=s/np.sqrt(len(list)))
    return m, f"{m:.3f} [{low:.3f}; {high:.3f}]"

rows = []
# Reporting per ethnicity 

for e in ethnicity:
    arrivals_e = df_results['arrivals_by_ethcat'].apply(lambda d: d.get(e, 0)).sum() / n_sim
    transplants_e = df_results['cycles'].apply(
        lambda cycles: sum(1 for c in cycles for n in c if recipients.loc[n, 'ETHCAT'] == e)
    ).sum() / n_sim

    F_vals = df_results['F_per_ethcat'].apply(lambda d: d[e])
    _, txt_F = media_ci(F_vals)
    std_F = np.std(F_vals, ddof=1)

    lr_quality = df_results[f'lr_quality_ethcat{e}']
    _, txt_antigen = media_ci(lr_quality)

    hr_quality = df_results[f'hr_quality_ethcat{e}']
    _, txt_allele = media_ci(hr_quality)

    waiting = df_results[f'waiting_time_mean{e}'].mean()

    L_vals = df_results['L_per_ethcat'].apply(lambda d: d[e])
    _, txt_L = media_ci(L_vals)

    f_mean = np.mean(F_vals)
    l_mean = np.mean(L_vals)
    still_in_kep = round(1 - f_mean - l_mean, 3)


    list_B = lists_B[e]

    _, txt_B = media_ci(list_B)

    list_dr = lists_DR[e]
    _, txt_dr = media_ci(list_dr)

    list_dq = lists_DQ[e]
    _, txt_dq = media_ci(list_dq)


    rows.append({
        'Ethnicity(s)': e,
        'Arrivals': round(arrivals_e, 2),
        'Transplants': round(transplants_e, 2),
        'F(s) (Matched)': txt_F,
        'HLA(s) Antigen': txt_antigen,
        'HLA(s) Allele': txt_allele,
        'Waiting Time': round(waiting, 3),
        'L(s) (Left Unmatched)': txt_L,
        '1-F(s)-L(s) (Still in KEP)': still_in_kep,
        'HLA B': txt_B,
        'HLA DR': txt_dr,
        'HLA DQ': txt_dq,

    })

# Reporting the totals

total_arrivals = sum(df_results['arrivals_by_ethcat'].apply(lambda d: sum(d.values()))) / n_sim
total_transplants = df_results['cycles'].apply(lambda cycles: sum(len(c) for c in cycles)).sum() / n_sim
F_total = total_transplants / total_arrivals


F_s_total, txt_F_total = media_ci(df_results['F_total'])

L_vals_flat = (df_results['total_departures'] / df_results['total_entries']).tolist()
L_s_total, ic_L_total = media_ci(L_vals_flat)
txt_L_total = ic_L_total

_, txt_total_lr_quality = media_ci(df_results['lr_quality_total'])
_,txt_total_hr_quality = media_ci(df_results['hr_quality_total'])

txt_waiting_time_mean = np.mean(df_results['Waiting_time_mean'])

F_vals_flat = [df_results['F_per_ethcat'].iloc[i][e] for i in range(n_sim) for e in ethnicity]
std_F_total = np.std(F_vals_flat, ddof=1)

f_total_mean = F_total  
total_outgoing = sum(
    df_results['L_per_ethcat'].apply(lambda d: sum(d.values()))
) / n_sim

l_total_mean = total_outgoing / total_arrivals
still_in_kep_total = round(1 - F_total - l_total_mean, 3)


_, txt_B_total = media_ci(list_HLA_B_total)

_, txt_dr_total = media_ci(list_HLA_DR_total )

_, txt_dq_total = media_ci(list_HLA_DQ_total )

rows.append({
    'Ethnicity(s)': 'Entire Population',
    'Arrivals': round(total_arrivals, 2),
    'Transplants': round(total_transplants, 2),
    'F(s) (Matched)': txt_F_total,
    'HLA(s) Antigen': txt_total_lr_quality,
    'HLA(s) Allele': txt_total_hr_quality,
    'Waiting Time': txt_waiting_time_mean,
    'L(s) (Left Unmatched)': txt_L_total,
    '1-F(s)-L(s) (Still in KEP)': f"{(1 - L_s_total - F_s_total):.3f}",
    'HLA B': txt_B_total,
    'HLA DR': txt_dr_total,
    'HLA DQ': txt_dq_total
})

df_final_table = pd.DataFrame(rows)
display(df_final_table)

Unnamed: 0,Ethnicity(s),Arrivals,Transplants,F(s) (Matched),HLA(s) Antigen,HLA(s) Allele,Waiting Time,L(s) (Left Unmatched),1-F(s)-L(s) (Still in KEP),HLA B,HLA DR,HLA DQ
0,1,590.74,409.91,0.694 [0.690; 0.698],3.064 [3.055; 3.073],2.147 [2.137; 2.157],3.089,0.274 [0.271; 0.278],0.032,0.703 [0.698; 0.708],1.063 [1.058; 1.068],1.298 [1.294; 1.303]
1,2,169.59,111.03,0.654 [0.647; 0.662],2.756 [2.741; 2.771],1.631 [1.615; 1.646],3.184,0.305 [0.298; 0.312],0.041,0.543 [0.532; 0.553],0.943 [0.933; 0.952],1.270 [1.261; 1.279]
2,4,146.89,92.63,0.630 [0.623; 0.638],2.817 [2.798; 2.837],1.714 [1.694; 1.733],3.779,0.326 [0.319; 0.332],0.044,0.573 [0.562; 0.584],1.010 [1.000; 1.021],1.234 [1.225; 1.243]
3,5,55.29,34.17,0.618 [0.608; 0.628],2.602 [2.575; 2.629],1.423 [1.393; 1.453],4.102,0.337 [0.327; 0.348],0.045,0.485 [0.466; 0.504],0.881 [0.863; 0.899],1.236 [1.217; 1.255]
4,6,7.84,5.81,0.741 [0.713; 0.770],2.764 [2.700; 2.828],1.683 [1.599; 1.767],2.099,0.242 [0.215; 0.270],0.016,0.572 [0.531; 0.612],0.929 [0.882; 0.976],1.263 [1.223; 1.303]
5,7,5.9,2.81,0.478 [0.440; 0.516],2.757 [2.653; 2.860],1.377 [1.241; 1.513],4.889,0.450 [0.413; 0.487],0.072,0.516 [0.443; 0.589],0.889 [0.819; 0.959],1.352 [1.290; 1.413]
6,Entire Population,976.25,656.36,0.672 [0.668; 0.676],2.949 [2.942; 2.955],1.953 [1.946; 1.961],3.259109,0.292 [0.289; 0.295],0.036,0.644 [0.640; 0.648],1.024 [1.020; 1.028],1.281 [1.277; 1.285]


In [None]:
df_final_table.to_excel("ofresults_BRBR.xlsx", index=True)