In [None]:
import numpy as np
import pandas as pd
import wandb
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import trange, tqdm

In [None]:
api = wandb.Api()
entity, project = "wwjbrugger", "Test_NGSR_10_05_21_ckpt"  # Test_NGSR_10_05_21_ckpt  Test_NGSR_12_05
# Example: January 1st, 2024

# created_at (str): ISO timestamp when the run was started
runs = api.runs(entity + "/" + project, )

means_sim = []
stds_sim = []
means_states = []
stds_states = []
experiment_name = []
not_found = []
results_of_experiment_dict = {}
for run in tqdm(runs):
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    # result_dict = run.summary._json_dict
    config_dict = {k: v for k, v in run.config.items() if not k.startswith("_")}
    result_dict = run.summary._json_dict

    if 'sim' in result_dict:
        if np.any(np.array(result_dict["sim"]) < 0):
            print('Hello ')
        name = config_dict['experiment_name']
        experiment_name.append(name)
        results_of_experiment_dict[name] = {}
        results_of_experiment_dict[name]['sim'] = result_dict["sim"]
        results_of_experiment_dict[name]['sim_median'] = np.median(result_dict["sim"])
        results_of_experiment_dict[name]['sim_mean'] = np.mean(result_dict["sim"])
        results_of_experiment_dict[name]['sim_std'] = np.std(result_dict["sim"])
        results_of_experiment_dict[name]['states'] = result_dict["states"]
        results_of_experiment_dict[name]['states_median'] = np.median(result_dict["states"])
        results_of_experiment_dict[name]['states_mean'] = np.mean(result_dict["states"])
        results_of_experiment_dict[name]['states_std'] = np.std(result_dict["states"])
        results_of_experiment_dict[name]['not_found'] = 500 - len(result_dict["states"])
    else:
        print(f"{config_dict['experiment_name']} no sim object")
print('end')

In [None]:
_experiment_name = [experiment for experiment in results_of_experiment_dict.keys()]
_sim_median = [results_of_experiment_dict[experiment]['sim_median'] for experiment in results_of_experiment_dict.keys()]
_sim_mean = [results_of_experiment_dict[experiment]['sim_mean'] for experiment in results_of_experiment_dict]
arg_sort = np.argsort(_sim_mean)
for i in arg_sort:
    print(f"{_experiment_name[i]:<120} sim_mean: {np.round(_sim_mean[i], 2):<10}")

In [None]:
def get_steps(experiment_name):
    if 'None' in experiment_name or 'supervised' in experiment_name:
        return 'supervised'
    if '500__' in experiment_name:
        return int(500)
    if '250__' in experiment_name:
        return int(250)
    if '125__' in experiment_name:
        return int(125)
    if '50__' in experiment_name:
        return int(50)
    if '5__E' in experiment_name:
        return int(5)


def get_class_equation_encoder(string):
    path_to_complete_model = string
    if 'EquationEncoderDummy' in path_to_complete_model:
        return 'EquationEncoderDummy'
    if 'Transformer_Encoder_String' in path_to_complete_model:
        return 'Transformer_Encoder_String'


def get_prior_source(settings):
    path_to_complete_model = settings['path_to_complete_model']
    if 'neural_net' in path_to_complete_model:
        return 'neural_net'
    if 'uniform' in path_to_complete_model:
        return 'uniform'
    if 'grammar' in path_to_complete_model:
        return 'grammar'


def get_class_measurement_encoder(string):
    path_to_complete_model = string
    if '__MeasurementEncoderDummy__' in path_to_complete_model:
        return 'MeasurementEncoderDummy'
    if '__Bi_LSTM_Measurement_Encoder__' in path_to_complete_model:
        return 'Bi_LSTM_Measurement_Encoder'
    if '__LSTM_Measurement_Encoder__' in path_to_complete_model:
        return 'LSTM_Measurement_Encoder'
    if '__MLP_Measurement_Encoder__' in path_to_complete_model:
        return 'MLP_Measurement_Encoder'
    if '__DatasetTransformer__' in path_to_complete_model:
        return 'DatasetTransformer'
    if '__MeasurementEncoderPicture__' in path_to_complete_model:
        return 'MeasurementEncoderPicture'
    if '__TextTransformer__' in path_to_complete_model:
        return 'TextTransformer'
    else:
        raise AssertionError(f" {path_to_complete_model} Could not be passed")

In [None]:
from itertools import product

panda_index = [
    
    'Bi_LSTM_Measurement_Encoder',
    'LSTM_Measurement_Encoder',
    'MeasurementEncoderPicture',
    'MLP_Measurement_Encoder',
    'DatasetTransformer',
    'TextTransformer',
    'MeasurementEncoderDummy',
]
syntax_tree = ['Transformer_Encoder_String', 'EquationEncoderDummy', ] 
mcts =  ['250', '125', '50', '5', 'supervised' ]
panda_column = [''.join(tup) for tup in product(syntax_tree, mcts)]
panda_column.append('grammar')
panda_column.append('uniform')
table = pd.DataFrame(index=panda_index, columns=panda_column)
for experiment_name in results_of_experiment_dict: 
    if experiment_name == 'test_model__grammar':
        table.loc['MeasurementEncoderDummy', 'grammar'] =\
            f"{round(results_of_experiment_dict[experiment_name]['sim_mean'],0)} ä {round(results_of_experiment_dict[experiment_name]['sim_std'],0)}"
    elif experiment_name == 'test_model__uniform':
        table.loc['MeasurementEncoderDummy', 'uniform'] = \
            f"{round(results_of_experiment_dict[experiment_name]['sim_mean'],0)} ä {round(results_of_experiment_dict[experiment_name]['sim_std'],0)}"
    else: 
        steps = get_steps(experiment_name)
        measurement_encoder = get_class_measurement_encoder(experiment_name)
        equation_encoder = get_class_equation_encoder(experiment_name)
        c = ''.join((equation_encoder, str(steps)))
        table.loc[measurement_encoder, c] =\
            f"{round(results_of_experiment_dict[experiment_name]['sim_mean'],0)} ä {round(results_of_experiment_dict[experiment_name]['sim_std'],0)}"
latex_table = table.to_string()#toto_latex()
table= table.fillna('-')
table = table.drop(columns=['EquationEncoderDummy125', 'EquationEncoderDummy250', 'grammar'])
with open('table.tex', 'w') as f:
    f.write(table.filter(regex='Transformer\_Encoder\_String').to_latex())
    f.write('\n')
    f.write(table.filter(regex='EquationEncoderDummy|uniform').to_latex())

# for experiment in results_of_experiment_dict:
#     steps = str(get_steps(experiment_name))
#     equation_encoder = get_class_equation_encoder(experiment_name)
#     measurement_encoder = get_class_measurement_encoder(experiment_name)

In [None]:
## plot histogram 
import matplotlib.pyplot as plt


experiments = {
    'test_model__neural_net__data_grammar_8_run_1__Bi_LSTM_Measurement_Encoder__Transformer_Encoder_String250__Endgame__': '250 MCTS' ,
    'test_model__neural_net__data_grammar_8_run_1__Bi_LSTM_Measurement_Encoder__Transformer_Encoder_String5__Endgame__': '10 MCTS',
        'test_model__uniform': 'Uniform',
    'test_model__neural_net__data_grammar_8_run_1__Bi_LSTM_Measurement_Encoder__Transformer_Encoder_Stringsupervised__Endgame__': ' Supervised' ,
    'test_model__neural_net__data_grammar_8_run_1__Bi_LSTM_Measurement_Encoder__EquationEncoderDummysupervised__Endgame__': ' Supervised No Syntax Tree' ,
    'test_model__neural_net__data_grammar_8_run_1__MeasurementEncoderDummy__Transformer_Encoder_Stringsupervised__Endgame__': ' Supervised No Dataset' ,

}


fig, axs = plt.subplots(2, 3, 
                        figsize=(8, 4),
                        sharey=True,
                        #sharex=True,
                        tight_layout=True,
                        dpi=300)
counts={}
for i, experiment in enumerate(experiments.keys()):
    row = int(i / 3)
    column = i % 3 
    counts[experiment] = axs[row, column].hist(results_of_experiment_dict[experiment]['sim'], bins=50, log=True,
            range=(0, 2500)
            )
    axs[row, column].axvline(300, linestyle='--', color='k', linewidth=0.5)
    
    axs[row, column].spines['top'].set_visible(False)
    axs[row, column].spines['right'].set_visible(False)
    axs[row, column].spines['bottom'].set_visible(False)
    axs[row, column].spines['left'].set_visible(False)
    
    axs[row, column].grid(visible=False, axis='x', which='both')
    if column == 0:
        axs[row, column].set_ylabel('# Runs')
 
    axs[row, column].set_xlabel('# Simulation ')
    axs[row, column].set_title(experiments[experiment])
fig.tight_layout()
fig.savefig('histogram_sim_to_perfect_fit.pdf')
plt.show()

### Get number of elements bigger than threshold  

In [None]:
from IPython.display import display
thresholds = [100,150,200,250,300,350]
example_key = list(counts.keys())[0]
threshold_frame = pd.DataFrame()
for threshold in thresholds: 
    cut_of_index = np.squeeze(np.where(counts[example_key][1] == threshold))
    for i, experiment in enumerate(counts.keys()):
        num_outliers = np.sum(np.array(counts[experiment][0])[cut_of_index:])
        threshold_frame.loc[experiments[experiment] ,threshold] = num_outliers

threshold_frame.style.format(precision=3, thousands=".", decimal=",") 
display(threshold_frame)