In [None]:
import pandas as pd
import wandb
from datetime import datetime
import numpy as np


class Results():
    def __init__(self):
        num_seeds = 5
        columns = [f"nguyen_{i}" for i in range(1, 13, 1)]
        self.result_dict = {}
        for target in ['num_states_to_0_999', 'num simulation to 0_999', '_runtime', 'num_productions 0_999]']:
            self.result_dict[target] = {}
            self.result_dict[target]['results_grammar_1_Normal'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)
            self.result_dict[target]['results_uniform_1_Normal'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)
            self.result_dict[target]['results_grammar_1_AmEx'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)
            self.result_dict[target]['results_uniform_1_AmEx'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)

            self.result_dict[target]['results_grammar_2_Normal'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)
            self.result_dict[target]['results_uniform_2_Normal'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)
            self.result_dict[target]['results_grammar_2_AmEx'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)
            self.result_dict[target]['results_uniform_2_AmEx'] = pd.DataFrame(index=list(range(0, num_seeds, 1)), columns=columns)

    def fill_results(self, config, summary):
        seed = config['seed']
        data_path = config['data_path']
        grammar_search = config['grammar_search']
        engine = config['MCTS_engine']
        prior = config['prior_source']
        dataset = config['data_path'].split('/')[1]
        for target in ['num_states_to_0_999', '_runtime', 'num simulation to 0_999', 'num_productions 0_999]']:
            if target in summary:
                value_from_job = summary[target]  # run.history(keys=[target])
                if value_from_job > 0:
                    value = value_from_job
                else:
                    value = np.nan
            else:
                value = np.nan

            if '1' in grammar_search and engine == 'Endgame' and prior == 'grammar':
                self.result_dict[target]['results_grammar_1_AmEx'].loc[seed, dataset] = value
            elif '1' in grammar_search and engine == 'Normal' and prior == 'grammar':
                self.result_dict[target]['results_grammar_1_Normal'].loc[seed, dataset] = value
            elif '1' in grammar_search and engine == 'Endgame' and prior == 'uniform':
                self.result_dict[target]['results_uniform_1_AmEx'].loc[seed, dataset] = value
            elif '1' in grammar_search and engine == 'Normal' and prior == 'uniform':
                self.result_dict[target]['results_uniform_1_Normal'].loc[seed, dataset] = value

            elif '2' in grammar_search and engine == 'Endgame' and prior == 'grammar':
                self.result_dict[target]['results_grammar_2_AmEx'].loc[seed, dataset] = value
            elif '2' in grammar_search and engine == 'Normal' and prior == 'grammar':
                self.result_dict[target]['results_grammar_2_Normal'].loc[seed, dataset] = value
            elif '2' in grammar_search and engine == 'Endgame' and prior == 'uniform':
                self.result_dict[target]['results_uniform_2_AmEx'].loc[seed, dataset] = value
            elif '2' in grammar_search and engine == 'Normal' and prior == 'uniform':
                self.result_dict[target]['results_uniform_2_Normal'].loc[seed, dataset] = value


api = wandb.Api()
entity, project = "wwjbrugger", "Test_NGSR_10_05_Nguyen_with_3_constant"
# Example: January 1st, 2024

# created_at (str): ISO timestamp when the run was started
runs = api.runs(entity + "/" + project)

summary_list, config_list, name_list = [], [], []
results_obj = Results()

for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    summary_dict = run.summary._json_dict
    config_dict = {k: v for k, v in run.config.items() if not k.startswith("_")}
    results_obj.fill_results(config_dict, summary_dict)

print('end')


In [None]:
def fill_dso_dict_simulations(path_to_log, dso_dict):
    dataset_name_found = False
    with open(path_to_log) as fp:
        lines = fp.readlines()
        current_seed = 0
        for line in lines:
            if 'Dataset' in line and not dataset_name_found:
                dataset_name_found = True
                dataset_name = line.split()[2]
                dataset_name = dataset_name.lower().replace('-','_')
                dso_dict[dataset_name]={}
            if 'Starting seed        : ' in line:
                # we search for the 1 in "Starting seed        : 1"
                current_seed = line.split()[3]
            if 'Invalid expressions: ' in line:
                # we search for the 89000 in "Invalid expressions: 42183 of 89000 (47.4%)."
                num_simulations = int(line.split()[4])
                if int(current_seed) <= 5:
                    if num_simulations == 2_000_000:
                        num_simulations = np.nan
                        dso_dict[dataset_name][current_seed] = num_simulations
                    else: 
                         dso_dict[dataset_name][current_seed] = num_simulations
def fill_dso_time(path_to_log, dso_dict):
    dataset_name_found = False
    with open(path_to_log) as fp:
        lines = fp.readlines()
        current_seed = 0
        for line in lines:
            if 'Dataset' in line and not dataset_name_found:
                dataset_name_found = True
                dataset_name = line.split()[2]
                dataset_name = dataset_name.lower().replace('-','_')
                dso_dict[dataset_name]={}
            if 'Starting seed        : ' in line:
                # we search for the 1 in "Starting seed        : 1"
                current_seed = line.split()[3]
            if 'INFO: Completed run 1 of 1 in' in line:
                time = int(line.split()[7])
                if int(current_seed) <= 5:
                    dso_dict[dataset_name][current_seed] = time                   
paths_to_log = [
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_1.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_2.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_3.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_4.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_5.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_6.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_7.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_8.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_9.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_10.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_11.txt',
    '/home/jbrugger/PycharmProjects/deep-symbolic-optimization/logs/output_nguyen_12.txt',
]

dso_dict_sim = {}
for path in paths_to_log: 
   fill_dso_dict_simulations(path, dso_dict_sim)
results_obj.result_dict['num simulation to 0_999']['DSO'] = pd.DataFrame(dso_dict_sim)

dso_dict_time = {}
for path in paths_to_log: 
   fill_dso_time(path, dso_dict_time)
results_obj.result_dict['_runtime']['DSO'] = pd.DataFrame(dso_dict_time)

In [None]:
t = []


def fill_final_result(target, approach, final_result):
    if approach in results_obj.result_dict[target]:
        df = results_obj.result_dict[target][approach]
        nan_values = df.isna().sum().sum()
        
        np.nanstd(np.reshape(df.to_numpy(), -1 ))
        # mean_values = np.around(df.mean(axis=0).mean(axis=0),2)  #.mean(axis=0)#.round(0)
        #t.append( mean_values.to_numpy())
        final_result.loc[approach][target] = round(np.nanmean(np.reshape(df.to_numpy(), -1)),0) # (f"{round(np.nanmean(np.reshape(df.to_numpy(), -1)),0)} pm  {round(np.nanstd(np.reshape(df.to_numpy(), -1 )),0)}")
        final_result.loc[approach]['unsuccessful_fits'] = nan_values


final_result = pd.DataFrame(index=['results_uniform_1_Normal',
                                   'results_uniform_1_AmEx',
                                   'results_uniform_2_Normal',
                                   'results_uniform_2_AmEx',
                                   'DSO'
                                   ],
                            columns=['num simulation to 0_999',
                                     'num_states_to_0_999',
                                     '_runtime',
                                     'unsuccessful_fits'
                                     ]
                            )

for approch in list(final_result.index):
    for target in [
        'num_states_to_0_999',
        '_runtime',
        'num simulation to 0_999']:
        fill_final_result(target=target,
                          approach=approch,
                          final_result=final_result)

final_result

In [None]:
with open('table_Amex_vs_Classic.tex', 'w') as f:
    f.write(final_result.to_latex())

### Average over seeds

In [None]:
from IPython.display import display, HTML

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)


def highlight_min(s):
    is_min = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_min]


for target in ['num simulation to 0_999', 'num_states_to_0_999', '_runtime']:
    result_table = pd.DataFrame(  #index=results_obj.result_dict[target].keys(),
        columns=[f"nguyen_{i}" for i in range(1, 13, 1)])
    for approch in results_obj.result_dict[target].keys():
        frame = results_obj.result_dict[target][approch]
        frame_mean = frame.mean(axis=0)
        result_table.loc[approch] = frame_mean

    print(target)
    styled_df = result_table.style.apply(highlight_min)
    display(result_table.style.highlight_min(color='lightgreen',
                                             axis=0))

print('end')
