# Statistics in Tables
Generate statistics over all the datapoints and put that into Latex tables.

In [None]:
import warnings

from IPython.display import HTML, display # Remove blank lines from tqdm
import pickle

from ad_sensitivity_analysis.plot.latexify import cites_to_citep
from ad_sensitivity_analysis.printer.order_top_params import get_ordered_top_dic
from ad_sensitivity_analysis.printer.process_description import get_process_desc_table
from ad_sensitivity_analysis.printer.top_params import top_to_table
from ad_sensitivity_analysis.statistics.top_params import traj_get_top_params
from ad_sensitivity_analysis.statistics.stats_overall import get_sums_phase, get_phase_flow_statistics


warnings.simplefilter(action="ignore", category=RuntimeWarning)

def rm_out_padding(): display(HTML("<style>div.jp-OutputPrompt { padding:unset;}</style>"))

rm_out_padding()

## At first we calculate some statistics
This may take a long time.

In [None]:
file_path = "data/simulation"
sums_phase = get_sums_phase(
    file_path, 
    "data/statistics_", 
    only_asc600=True, 
    inoutflow_time=240,
)

## Load the data
In case you already have executed the cells above in a previous run.

In [None]:
with open("data/statistics_phase.pkl", "rb") as f:
    sums_phase = pickle.load(f)

## Get the top parameters
We get the top 10 parameters and the parameters with an impact within one magnitude.

In [None]:
(
    top_magn_set_phase,
    top10_set_phase,
    top_magn_sens_dic_phase,
    top_sens_dic_phase,
) = traj_get_top_params(
    sums_phase, 
    sums_phase.keys(), 
    n=10,               # top 10 parameters
    orders=1,           # top parameters within one magnitude
)

## A table with the processes from the top parameters

In [None]:
caption = r"The processes where model parameters with a large impact are involved. \textbf{TODO Maicon: This is an auto-generated table. The text needs some work.}"
label = "tab:processes"
table = get_process_desc_table(list(top_magn_set_phase), caption=caption, label=label, breakup=True)
table = cites_to_citep(cites_to_citep)
print(table)

## Get the number of parameters and the table for each model state variable
The parameter with the largest impact is the first in each table

In [None]:
text = top_to_table(
    get_ordered_top_dic(top_magn_sens_dic_phase, sums_phase), 
    caption="", 
    label="").replace(r"\partial", "")
print(text)

## Get the top parameters for each model state variable and phase

In [None]:
caption = """\
The model parameters with sensitivity in the top magnitude for each model state variable and phase.\
"""
label = "tab:analysis:top_magn_phase"
table = top_to_table(
    get_ordered_top_dic(top_magn_sens_dic_phase, sums_phase),
    caption, 
    label, 
    parse=True,
)
table = table.replace(r"\partial", "")
print(table)

## Number of datapoints in each phase/flow and average temperatures

In [None]:
n_phases, n_flows, temp_phases, temp_flows = get_phase_flow_statistics(
    file_path="data/simulation/",
    inoutflow_time=240,
)

In [None]:
print("Phase: Min, Max, Mean, Std Temperature")
for key in temp_phases:
    print(f"{key:<20}: \
    {temp_phases[key][0]:3.2f}, \
    {temp_phases[key][1]:3.2f}, \
    {temp_phases[key][2]:3.2f}, \
    {np.sqrt(temp_phases[key][3]):3.2f}") 

In [None]:
print("Flow: Min, Max, Mean, Var Temperature")
for key in temp_flows:
    print(f"{key:<20}: \
    {temp_flows[key][0]:3.2f}, \
    {temp_flows[key][1]:3.2f}, \
    {temp_flows[key][2]:3.2f}, \
    {np.sqrt(temp_flows[key][3]):3.2f}") 

In [None]:
print("Number of datapoints in each phase")
s = 0
for key in n_phases:
    if "neutral" not in key:
        s += n_phases[key]
        print(f"{key}: {n_phases[key]}")
print("Percentage")
for key in n_phases:
    if "neutral" not in key:
        print(f"{key}: {n_phases[key]/s*100:3.2f}")

In [None]:
print("Number of datapoints in each flow")
s = 0
for key in n_flows:
    if "neutral" not in key:
        s += n_flows[key]
        print(f"{key}: {n_flows[key]}")
print("Percentage")
for key in n_flows:
    if "neutral" not in key:
        print(f"{key}: {n_flows[key]/s*100:3.2f}")

## Calculate the statistics for each phase and flow combination

In [None]:
phase_flows, temp_dict = get_phase_flow_statistics(
    file_path="data/simulation/",
    inoutflow_time=240,
)

In [None]:
print("Phase: Min, Max, Mean, Std Temperature")
for key in temp_dict:
    print(f"{key:<20}: \
    {temp_dict[key][0]:3.2f}, \
    {temp_dict[key][1]:3.2f}, \
    {temp_dict[key][2]:3.2f}, \
    {np.sqrt(temp_dict[key][3]):3.2f}")

In [None]:
print("Number of datapoints")
s = 0
for key in phase_flows:
    if "neutral" not in key:
        s += phase_flows[key]
        print(f"{key}: {phase_flows[key]}")
print("Percentage")
for key in phase_flows:
    if "neutral" not in key:
        print(f"{key}: {phase_flows[key]/s*100:3.2f}")