# ABM_Tutorial_Wellcome_EBEC [see latest version on GitHub]
### File: 2_notebook_exploration.ipynb
### Date: 2025.06.24

### Description 

- Exemplar simulation: Dropdown - choose model condition; Slider - choose time point 
    - load a big dataframe including ONE simulation per model condition, for ALL time points
    - show scatter plot of cells, histogram of tumour sizes

- Results from replicate:
    - APP1: Dropdown - choose model type; Dropdown choose seeding density
        - histogram of tumour sizes, colour coded by model condition (time point)
        - scatter plot of time-averaged tumour count vs time, size vs time; curves corresponding to replicate simulations

    - APP2: Dropdown - choose time points; Dropdown choose seeding density
        - histogram of tumour sizes, colour coded by model condition (model type)
        - scatter plot of tumour size mean vs std, marker size = count 


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from dash import Dash, callback, Input, Output, State, html, dcc

In [None]:
plt.rcParams.update(
    {"pdf.fonttype":42, "font.family":"Arial", "font.size": 10}
)

In [None]:
from typing import Dict, Tuple

# Functions 

In [None]:
def get_cell_configurations() -> Tuple[Dict]:
    """_summary_

    Returns:
        Tuple[Dict]: A Tuple of Dict variables, including
            site_types: Dict[int, str] maps integer to string names of site types
            sites_states: Dict[int, str] maps integer to string names of site states
            color_map: Dict[str, str] contains the colors corresponding to site types 
            markersize_map: Dict[str, float] contains the marker sizes corresponding to site types 
    """
    
    # configuration for cells
    site_types = {
        0: "CV", # central vein
        1: "PT", # portal triad 
        2: "HEP", # hepatocyte
        3: "NO", # not occupied
        4: "CC", # cancer cell 
        5: "ECM" 
    }

    sites_states = {
        0: "quiescent", 
        1: "proliferative", 
        2: "apoptotic", 
        # 3: "migratory"
    }

    color_map = {
        "CV" : "blue", 
        "PT": "red", 
        "HEP": "lightgreen",
        "NO" : "#EEEEEE",
        "CC" : "#525100",
        "ECM": "magenta"
    }
    markersize_map = {
        "CV": 2, "PT": 2, 
        "HEP": 0.75,
        "NO": 0.75,
        "CC": 1.25,
        "ECM": 1.25
    }
    
    return (site_types, sites_states, color_map, markersize_map)


### [1] Exemplar simulation

In [None]:
# get cell configurations
(site_types, sites_states, color_map, markersize_map) = get_cell_configurations()

In [None]:
# for use on local laptop
path_to_combined_snapshots = "./files/combined_simulation_snapshots_at_40.csv"
combined_snapshots = pd.read_csv(path_to_combined_snapshots)

# for use on Google Colab
# [TO UPDATE HERE]

In [None]:
combined_snapshots.head()

In [None]:
combined_snapshots['model_type'] = combined_snapshots['model_condition'].map(
    lambda x : '_'.join(x.split('_')[:2])
)
combined_snapshots['seeding_density'] = combined_snapshots['model_condition'].map(
    lambda x : '_'.join(x.split('_')[2:])
)
data = combined_snapshots.copy()

In [None]:
# ===== plotly dash APP1 =====

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div(
    [
        html.Div([
            html.Div([
                # Dropdown - model type
                dcc.Dropdown(
                    sorted(combined_snapshots['model_type'].unique()), 
                    combined_snapshots['model_type'].values[0], 
                    placeholder="Select model type...",
                    id='dropdown-model-type'
                )
            ], className="six columns"),  
            html.Div([
                # Dropdown - seeding density
                dcc.Dropdown(
                    sorted(combined_snapshots['seeding_density'].unique()), 
                    combined_snapshots['seeding_density'].values[0], 
                    placeholder="Select seeding density...",
                    id='dropdown-seeding-density'
                )
            ], className="six columns"),  
        ], className="row"),
    
        # Scatter plots
        html.Div([
            html.Div([
                dcc.Graph(id='scatter1')
            ]),

        ])
        
    ]
)


@callback(
    [
        Output('scatter1', 'figure'),
    ],
    [
        Input('dropdown-model-type', 'value'),
        Input('dropdown-seeding-density', 'value')
    ]
)
def update_plot(value1, value2):
    
    df_plot = data.loc[
        (data.model_type==value1) & (data.seeding_density==value2)
    ].copy()
        
    # ===== scatter plots =====
    scatter1_data = list()
    
    df_plot["site_type_name"] = df_plot["site_type"].map(
        lambda x : site_types[x]
    )
    
    # df_plot["markersize"] = df_plot["site_type"].map(
    #     lambda x : markersize_map[site_types[x]]
    # )
    
    scatter1 = px.scatter(
        data_frame=df_plot,
        x='x', y='y',
        color='site_type_name',
        facet_col='pid', facet_col_wrap=2,
        color_discrete_map=color_map,
    )
    

    # customize the figure
    
    scatter1.update_layout(
        template='simple_white', width=1200, height=1200
    )
    scatter1.update_traces(
        marker=dict(size=1.5)
    )
    scatter1.update_xaxes(title=dict(text="x", font_family="Arial", font_size=14))
    scatter1.update_yaxes(
        title=dict(text="y", font_family="Arial", font_size=14),
        scaleanchor="x", scaleratio=1
        )

    return [scatter1]

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
df_plot = data.loc[
    (data.model_type=='model_3') & (data.seeding_density=='SeedDen_1')
].copy()
df_plot

### [2] Results from replicate simulations

In [None]:
# for use on local laptop
path_to_combined_results = "./files/combined_results_tumour_sizes.csv"
combined_results = pd.read_csv(path_to_combined_results)

# for use on Google Colab
# [TO UPDATE HERE]

In [None]:
combined_results['model_type'] = combined_results['model_condition'].map(
    lambda x : '_'.join(x.split('_')[:2])
)
combined_results['seeding_density'] = combined_results['model_condition'].map(
    lambda x : '_'.join(x.split('_')[2:])
)
data = combined_results.copy()

In [None]:
data.model_type.unique()

In [None]:
# ===== plotly dash APP1 =====

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div(
    [
        html.Div([
            html.Div([
                # Dropdown - model type
                dcc.Dropdown(
                    sorted(combined_results['model_type'].unique()), 
                    combined_results['model_type'].values[0], 
                    placeholder="Select model type...",
                    id='dropdown-model-type'
                )
            ], className="six columns"),  
            html.Div([
                # Dropdown - seeding density
                dcc.Dropdown(
                    sorted(combined_results['seeding_density'].unique()), 
                    combined_results['seeding_density'].values[0], 
                    placeholder="Select seeding density...",
                    id='dropdown-seeding-density'
                )
            ], className="six columns"),  
        ], className="row"),
    
        # Histograms
        html.Div([
            html.Div([
                html.H3('Area'),
                dcc.Graph(id='histogram1')
            ], className="six columns"),

            html.Div([
                html.H3('Area scaled by median'),
                dcc.Graph(id='histogram2')
            ], className="six columns"),
        ], className="row"),
        
        # Scatter plots
        html.Div([
            html.Div([
                dcc.Graph(id='scatter1')
            ], className="six columns"),

            html.Div([
                dcc.Graph(id='scatter2')
            ], className="six columns"),
        ], className="row")
        
    ]
)


@callback(
    [
        Output('histogram1', 'figure'),
        Output('histogram2', 'figure'),
        Output('scatter1', 'figure'),
        Output('scatter2', 'figure'),
    ],
    [
        Input('dropdown-model-type', 'value'),
        Input('dropdown-seeding-density', 'value')
    ]
)
def update_plot(value1, value2):
    
    data_subset = data.loc[
        (data.model_type==value1) & (data.seeding_density==value2)
    ].copy()
    
    # ===== histograms of tumour areas (per DBSCAN cluster) =====
    
    histogram1_data = list(); histogram2_data = list()
    for t in sorted(data_subset['time'].unique()):
        
        data_subsubset = data_subset.loc[data_subset['time']==t].copy()
        
        areas = data_subsubset['size'].values
        areas_scaled = areas / np.percentile(areas, 30)
        
        log10_areas = np.log10( areas ); data_subsubset['log10_areas'] = log10_areas
        log10_areas_scaled = np.log10( areas_scaled ); data_subsubset['log10_areas_scaled'] = log10_areas_scaled
        
        # histogram1 - produce histogram data wiht numpy
        count, index = np.histogram(log10_areas, bins=30)
        histogram1_data.append(
            go.Scatter(
                x=index, y = count,
                line=dict(width = 1, shape='hvh'),
                name=f"t={t}",
            )
        )
        
        # histogram2 - produce histogram data wiht numpy
        count, index = np.histogram(log10_areas_scaled, bins=30)
        histogram2_data.append(
            go.Scatter(
                x=index, y = count,
                line=dict(width = 1, shape='hvh'),
                name=f"t={t}",
            )
        )
        
    # ===== scatter plots =====
    scatter1_data = list(); scatter2_data = list()
    for pid in data_subset['pid'].unique():
        
        data_subsubset = data_subset.loc[data_subset.pid==pid].copy()
        areas = data_subsubset['size'].values
        log10_areas = np.log10( areas ); data_subsubset['log10_areas'] = log10_areas
        
        time_average_summary = data_subsubset.groupby('time', as_index=False).agg({'log10_areas': ['count','mean','std']})
        
        # scatter1 - 
        scatter1_data.append(
            go.Scatter(
                x=time_average_summary['time'].values,
                y=time_average_summary['log10_areas']['mean'].values,
                name=f"pid={pid}",
                mode="lines",
                marker_size=time_average_summary['log10_areas']['mean'].values,
            )
        )
        
        # scatter2 - 
        scatter2_data.append(
            go.Scatter(
                x=time_average_summary['time'].values,
                y=time_average_summary['log10_areas']['count'].values,
                name=f"pid={pid}",
                mode="lines+markers",
                marker_size=time_average_summary['log10_areas']['count'].values,
            )
        )
        

    # customize the figure
    
    # histograms
    histogram1 = go.Figure(data=histogram1_data); histogram2 = go.Figure(data=histogram2_data)
    histogram1.update_layout(template='simple_white', width=700, height=400); histogram2.update_layout(template='simple_white', width=700, height=400)
    histogram1.update_xaxes(title=dict(text="Log10 (tumour size)", font_family="Arial", font_size=14))
    histogram1.update_yaxes(title=dict(text="Probability density", font_family="Arial", font_size=14))
    histogram2.update_xaxes(title=dict(text="Log10 (tumour size scaled by median)", font_family="Arial", font_size=14))
    histogram2.update_yaxes(title=dict(text="Probability density", font_family="Arial", font_size=14))
    
    # scatter plots
    scatter1 = go.Figure(data=scatter1_data); scatter2 = go.Figure(data=scatter2_data)
    scatter1.update_layout(template='simple_white', width=700, height=400); scatter2.update_layout(template='simple_white', width=700, height=400)
    scatter1.update_xaxes(title=dict(text="Time", font_family="Arial", font_size=14))
    scatter1.update_yaxes(title=dict(text="Log10 (tumour size) mean", font_family="Arial", font_size=14))
    scatter2.update_xaxes(title=dict(text="Time", font_family="Arial", font_size=14))
    scatter2.update_yaxes(title=dict(text="Number of tumours observed", font_family="Arial", font_size=14))

    return [histogram1, histogram2, scatter1, scatter2]

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
# ===== plotly dash APP2 =====

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app2 = Dash(__name__, external_stylesheets=external_stylesheets)

app2.layout = html.Div(
    [
        html.Div([
            html.Div([
                # Dropdown - model type
                dcc.Dropdown(
                    sorted(combined_results['time'].unique()), 
                    combined_results['time'].values[0], 
                    placeholder="Select time point...",
                    id='dropdown-time'
                )
            ], className="six columns"),  
            html.Div([
                # Dropdown - seeding density
                dcc.Dropdown(
                    sorted(combined_results['seeding_density'].unique()), 
                    combined_results['seeding_density'].values[0], 
                    placeholder="Select seeding density...",
                    id='dropdown-seeding-density'
                )
            ], className="six columns"),  
        ], className="row"),
    
        # Histograms
        html.Div([
            html.Div([
                html.H3('Area'),
                dcc.Graph(id='histogram1')
            ], className="six columns"),

            html.Div([
                html.H3('Area scaled by median'),
                dcc.Graph(id='histogram2')
            ], className="six columns"),
        ], className="row"),
        
        # Scatter plots
        html.Div([
            html.Div([
                dcc.Graph(id='scatter1')
            ], className="six columns"),

            html.Div([
                dcc.Graph(id='scatter2')
            ], className="six columns"),
        ], className="row")
        
    ]
)


@callback(
    [
        Output('histogram1', 'figure'),
        Output('histogram2', 'figure'),
        Output('scatter1', 'figure'),
        Output('scatter2', 'figure'),
    ],
    [
        Input('dropdown-time', 'value'),
        Input('dropdown-seeding-density', 'value')
    ]
)
def update_plot(value1, value2):
    
    data_subset = data.loc[
        (data['time']==value1) & (data.seeding_density==value2)
    ].copy()
    
    # histograms
    histogram1_data = list(); histogram2_data = list()
    
    # scatter plots
    scatter1_data = list(); scatter2_data = list()
    
    for model_type in sorted(data_subset['model_type'].unique()):
        
        data_subsubset = data_subset.loc[data_subset['model_type']==model_type].copy()
        
        # ===== histograms of tumour areas (per DBSCAN cluster) =====
        areas = data_subsubset['size'].values
        areas_scaled = areas / np.percentile(areas, 50)
        
        log10_areas = np.log10( areas ); data_subsubset['log10_areas'] = log10_areas
        log10_areas_scaled = np.log10( areas_scaled ); data_subsubset['log10_areas_scaled'] = log10_areas_scaled
        
        # histogram1 - produce histogram data wiht numpy
        count, index = np.histogram(log10_areas, bins=30)
        histogram1_data.append(
            go.Scatter(
                x=index, y = count,
                line=dict(width = 1, shape='hvh'),
                name=model_type,
            )
        )
        
        # histogram2 - produce histogram data wiht numpy
        count, index = np.histogram(log10_areas_scaled, bins=30)
        histogram2_data.append(
            go.Scatter(
                x=index, y = count,
                line=dict(width = 1, shape='hvh'),
                name=model_type,
            )
        )
        
        # ===== scatter plots of areas | mean, std (per simulation) =====
        
        # scatter1 - 
        log10_areas_summary = data_subsubset.groupby('pid').agg({'log10_areas':['mean', 'std', 'count']})
        scatter1_data.append(
            go.Scatter(
                x=log10_areas_summary['log10_areas']['mean'].values,
                y=log10_areas_summary['log10_areas']['std'].values,
                name=model_type,
                mode="markers",
                marker_size=log10_areas_summary['log10_areas']['count'].values,
            )
        )
        
        # scatter2 - 
        log10_areas_scaled_summary = data_subsubset.groupby('pid').agg({'log10_areas_scaled':['mean', 'std', 'count']})
        scatter2_data.append(
            go.Scatter(
                x=log10_areas_scaled_summary['log10_areas_scaled']['mean'].values,
                y=log10_areas_scaled_summary['log10_areas_scaled']['std'].values,
                name=model_type,
                mode="markers",
                marker_size=log10_areas_scaled_summary['log10_areas_scaled']['count'].values,
            )
        )

    # customize the figures
    
    histogram1 = go.Figure(data=histogram1_data)
    histogram2 = go.Figure(data=histogram2_data)
    histogram1.update_layout(template='simple_white', width=700, height=400); histogram2.update_layout(template='simple_white', width=700, height=400)
    histogram1.update_xaxes(title=dict(text="Log10 (tumour size)", font_family="Arial", font_size=14))
    histogram1.update_yaxes(title=dict(text="Probability density", font_family="Arial", font_size=14))
    histogram2.update_xaxes(title=dict(text="Log10 (tumour size scaled by median)", font_family="Arial", font_size=14))
    histogram2.update_yaxes(title=dict(text="Probability density", font_family="Arial", font_size=14))
    
    scatter1 = go.Figure(data=scatter1_data)
    scatter2 = go.Figure(data=scatter2_data)
    scatter1.update_layout(template='simple_white', width=700, height=400); scatter2.update_layout(template='simple_white', width=700, height=400)
    scatter1.update_xaxes(title=dict(text="Log10 (tumour size) mean", font_family="Arial", font_size=14))
    scatter1.update_yaxes(title=dict(text="Log10 (tumour size) std", font_family="Arial", font_size=14))
    scatter2.update_xaxes(title=dict(text="Log10 (tumour size scaled by median) mean", font_family="Arial", font_size=14))
    scatter2.update_yaxes(title=dict(text="Log10 (tumour size scaled by median) std", font_family="Arial", font_size=14))
    

    return [histogram1, histogram2, scatter1, scatter2]

if __name__ == '__main__':
    app2.run(debug=True)