In [None]:
import pandas as pd
import wandb
from datetime import datetime
import numpy as np


class Results():
    def __init__(self):
        num_seeds = 5
        columns = [f"nguyen_{i}" for i in range(1, 13, 1)]
        self.result_dict = {}
        for target in ['num_states_to_perfect_fit_test', 'time to perfect fit', 'num simulation to perfect fit']:
            self.result_dict[target] = {}
            self.result_dict[target]['results_grammar_1_Normal'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_1_Normal'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_grammar_1_AmEx'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_1_AmEx'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)

            self.result_dict[target]['results_grammar_2_Normal'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_2_Normal'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_grammar_2_AmEx'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)
            self.result_dict[target]['results_uniform_2_AmEx'] = pd.DataFrame(index=list(range(1, num_seeds + 1, 1)), columns=columns)

    def fill_results(self, config, summary):
        seed = config['seed']
        data_path = config['data_path']
        engine = config['MCTS_engine']
        prior = config['prior_source']
        dataset = config['data_path'].split('/')[1]
        for target in ['num_states_to_perfect_fit_test', 'time to perfect fit', 'num simulation to perfect fit']:
            value = summary[target]
            if 'data_grammar_1' in data_path and engine == 'Endgame' and prior == 'grammar':
                self.result_dict[target]['results_grammar_1_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_1' in data_path and engine == 'Normal' and prior == 'grammar':
                self.result_dict[target]['results_grammar_1_Normal'].loc[seed, dataset] = value
            elif 'data_grammar_1' in data_path and engine == 'Endgame' and prior == 'uniform':
                self.result_dict[target]['results_uniform_1_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_1' in data_path and engine == 'Normal' and prior == 'uniform':
                self.result_dict[target]['results_uniform_1_Normal'].loc[seed, dataset] = value

            elif 'data_grammar_2' in data_path and engine == 'Endgame' and prior == 'grammar':
                self.result_dict[target]['results_grammar_2_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_2' in data_path and engine == 'Normal' and prior == 'grammar':
                self.result_dict[target]['results_grammar_2_Normal'].loc[seed, dataset] = value
            elif 'data_grammar_2' in data_path and engine == 'Endgame' and prior == 'uniform':
                self.result_dict[target]['results_uniform_2_AmEx'].loc[seed, dataset] = value
            elif 'data_grammar_2' in data_path and engine == 'Normal' and prior == 'uniform':
                self.result_dict[target]['results_uniform_2_Normal'].loc[seed, dataset] = value


api = wandb.Api()
entity, project = "jgu-wandb", "neural_guided_symbolic_regression_final"
# Example: January 1st, 2024

# created_at (str): ISO timestamp when the run was started
runs = api.runs(entity + "/" + project, filters={"tags": {"$in": ["no_net"]}})

summary_list, config_list, name_list = [], [], []
results_obj = Results()

for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    result_dict = run.summary._json_dict
    try:
        summary_dict = run.summary._json_dict
        config_dict = {k: v for k, v in run.config.items() if not k.startswith("_")}
        results_obj.fill_results(config_dict, summary_dict)
    except Exception as e:
        print(f"{e} their is a problem with {run.name} ")

print('end')


In [None]:
def fill_final_result(target, approach, final_result):
    df = results_obj.result_dict[target][approach]
    nan_values = df.isna().sum().sum()
    mean_values = df.mean(axis=0).mean(axis=0).round(0)
    final_result.loc[approach][target] = mean_values
    final_result.loc[approach]['unsuccessful_fits'] = nan_values


final_result = pd.DataFrame(index=['results_uniform_1_Normal',
                                   'results_uniform_2_Normal',
                                   'results_uniform_1_AmEx',
                                   'results_uniform_2_AmEx',
                                   ],
                            columns=['num simulation to perfect fit',
                                     'num_states_to_perfect_fit_test',
                                     'time to perfect fit',
                                     'unsuccessful_fits'
                                     ]
                            )

for approch in list(final_result.index):
    for target in ['num_states_to_perfect_fit_test',
                   'time to perfect fit',
                   'num simulation to perfect fit']:
        fill_final_result(target=target,
                          approach=approch,
                          final_result=final_result)

final_result

### Average over seeds

In [None]:
from IPython.display import display, HTML
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

def highlight_min(s):
    is_min = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_min]

for target in ['num simulation to perfect fit','num_states_to_perfect_fit_test','time to perfect fit']:
    result_table = pd.DataFrame(#index=results_obj.result_dict[target].keys(),
                                columns=[f"nguyen_{i}" for i in range(1, 13, 1)])
    for approch in results_obj.result_dict[target].keys(): 
        frame = results_obj.result_dict[target][approch]
        frame_mean = frame.mean(axis=0)
        result_table.loc[approch] = frame_mean
    
    print(target)
    styled_df = result_table.style.apply(highlight_min)
    display(result_table.style.highlight_min(color = 'lightgreen',  
                       axis = 0) )

print('end')
