In [None]:
import numpy as np
import pandas as pd
import wandb
import seaborn as sns
import matplotlib.pyplot as plt

def get_title(experiment, dataset):
    if 'grammar' in experiment:
        exp = 'Grammar'
    elif 'uniform' in experiment:
        exp = 'Uniform'
    else:
        experiment.replace('results_','')
        exp = experiment
    return exp, dataset


class Results():
    def __init__(self, columns):
        num_seeds = 15
        self.result_dict = {}
        for target in ['num_states_to_perfect_fit_test', 'time to perfect fit', 'num simulation to perfect fit']:
            self.result_dict[target] = {}
            self.result_dict[target]['results_grammar_1_Normal'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_1_Normal'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_grammar_1_AmEx'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_1_AmEx'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)

            self.result_dict[target]['results_grammar_2_Normal'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_2_Normal'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_grammar_2_AmEx'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_2_AmEx'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)

            self.result_dict[target]['results_nn_1'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_nn_5'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_nn_15'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_nn_30'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_nn_45'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_nn_60'] = pd.DataFrame(0,index=list(range(1, num_seeds + 1, 1)), columns=columns)

    def fill_results(self, config, summary):
        seed = config['seed']
        data_path = config['data_path']
        engine = config['MCTS_engine']
        prior = config['prior_source']
        dataset = config['data_path'].split('/')[1]
        if len(config['path_to_complete_model']) > 10:
            mcts_steps_training =int(config['experiment_name'].split('__')[7])
        for target in ['num_states_to_perfect_fit_test', 'time to perfect fit', 'num simulation to perfect fit']:
            if target in summary:
                value = summary[target]
            else:
                value = np.nan
            if 'data_grammar_1' in data_path and engine == 'Endgame' and prior == 'grammar':
                self.result_dict[target]['results_grammar_1_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_1' in data_path and engine == 'Normal' and prior == 'grammar':
                self.result_dict[target]['results_grammar_1_Normal'].loc[seed, dataset] = value
            elif 'data_grammar_1' in data_path and engine == 'Endgame' and prior == 'uniform':
                self.result_dict[target]['results_uniform_1_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_1' in data_path and engine == 'Normal' and prior == 'uniform':
                self.result_dict[target]['results_uniform_1_Normal'].loc[seed, dataset] = value

            elif 'data_grammar_2' in data_path and engine == 'Endgame' and prior == 'grammar':
                self.result_dict[target]['results_grammar_2_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_2' in data_path and engine == 'Normal' and prior == 'grammar':
                self.result_dict[target]['results_grammar_2_Normal'].loc[seed, dataset] = value
            elif 'data_grammar_2' in data_path and engine == 'Endgame' and prior == 'uniform':
                self.result_dict[target]['results_uniform_2_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_2' in data_path and engine == 'Normal' and prior == 'uniform':
                self.result_dict[target]['results_uniform_2_Normal'].loc[seed, dataset] = value

            elif engine == 'Endgame' and prior == 'neural_net' and mcts_steps_training == 1:
                self.result_dict[target]['results_nn_1'].loc[seed, dataset] = value
            elif engine == 'Endgame' and prior == 'neural_net' and mcts_steps_training == 5:
                self.result_dict[target]['results_nn_5'].loc[seed, dataset] = value
            elif engine == 'Endgame' and prior == 'neural_net' and mcts_steps_training == 15:
                self.result_dict[target]['results_nn_15'].loc[seed, dataset] = value
            elif engine == 'Endgame' and prior == 'neural_net' and mcts_steps_training == 30:
                self.result_dict[target]['results_nn_30'].loc[seed, dataset] = value
            elif engine == 'Endgame' and prior == 'neural_net' and mcts_steps_training == 45:
                self.result_dict[target]['results_nn_45'].loc[seed, dataset] = value
            elif engine == 'Endgame' and prior == 'neural_net' and mcts_steps_training == 60:
                self.result_dict[target]['results_nn_60'].loc[seed, dataset] = value

api = wandb.Api()
entity, project = "jgu-wandb", "neural_guided_symbolic_regression_final"
# Example: January 1st, 2024

# created_at (str): ISO timestamp when the run was started
runs = api.runs(entity + "/" + project, filters={"tags": {"$in": ["no_net", "no_net_2" , "neural_net", "neural_net_2"]}})

summary_list, config_list, name_list = [], [], []
columns_nguyen = [f"nguyen_{i}" for i in range(1, 13, 1)]
columns_self = [f"self_{i}" for i in range(0, 10, 1)]
columns = columns_nguyen + columns_self
results_obj = Results(columns)

for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    result_dict = run.summary._json_dict
    try:
        summary_dict = run.summary._json_dict
        config_dict = {k: v for k, v in run.config.items() if not k.startswith("_")}
        results_obj.fill_results(config_dict, summary_dict)
    except Exception as e:
        pass  # print(f"{e} their is a problem with {run.name} ")

print('end')

experiments = ['results_grammar_1_AmEx', 'results_uniform_1_AmEx',
               'results_nn_1', 'results_nn_5', 'results_nn_15',
               'results_nn_30', 'results_nn_45', 'results_nn_60']
columns_to_show=[
    'nguyen_1', 'nguyen_2', 'nguyen_3', 'nguyen_4', 'nguyen_5', 'nguyen_6',
    'nguyen_7', 'nguyen_8', 'nguyen_9', 'nguyen_10', 'nguyen_11',
    'nguyen_12', 'self_0', 'self_1', 'self_2', 'self_3', 'self_4', 'self_5',
    'self_6', 'self_7', 'self_8', 'self_9'
]

In [None]:
def set_all_plots_in_a_row_to_same_y_values( axs, experiments, row, dataset):
    abs_y_max = 0
    for col, experiment in enumerate(experiments):
        y_min, y_max = axs[row, col].get_ylim()
        if y_max > abs_y_max:
            abs_y_max = y_max
    for col, experiment in enumerate(experiments):
        axs[row, col].set_ylim([0, abs_y_max])
        if col == 0: 
            axs[row, 0].set_ylabel(dataset)
        else: 
            axs[row, col].get_yaxis().set_visible(False)
    
target = 'num_states_to_perfect_fit_test'
fig, axs = plt.subplots(len(columns_to_show), len(experiments), figsize=[20, 44.8], sharex=True)
for row, dataset in enumerate(columns_to_show):
    for col, experiment in enumerate(experiments):
        df = results_obj.result_dict[target][experiment]
        if df.loc[:, dataset].isna().sum() != len(df.loc[:, dataset]):
            v = [v for v in df.loc[:, dataset].dropna().tolist() if v != 0]
            if len(v) > 0:
                axs[row, col].scatter(
                    x = np.zeros(shape=(len(v))),
                    y = v
                ) 
                # sns.boxplot(data=df,
                #             y=dataset,
                #             ax=axs[row, col]
                #             )
        exp, data = get_title(experiment, dataset)
        axs[row, col].set_title(f'{exp} {data}\n'
                                f'not fit: {df.loc[:, dataset].isna().sum()} \n'
                                f'missing: {(df.loc[:, dataset]== 0).sum()} ')
    set_all_plots_in_a_row_to_same_y_values( axs, experiments, row, dataset)
fig.tight_layout()
fig.savefig('plot.svg')
fig.show()
print('end')


