In [1]:
import pandas as pd

In [21]:
def print_aggregated_result(aggregated_results, highlighter = None):
    for col in ['error_nq', 'error_topk', 'error_infci', 'error_rnkci']:
        aggregated_results[col] = aggregated_results[col].apply(lambda row: f'{row * 100:.2f}\\%')
    aggregated_results['runtime'] = aggregated_results['runtime'].apply(lambda row: f'{row:.0f}s')
    aggregated_results = aggregated_results[['error_nq', 'error_topk', 'error_infci', 'error_rnkci', 'runtime']]
    aggregated_results = aggregated_results.reset_index()
    for idx, row in aggregated_results.iterrows():
        if highlighter and row[highlighter['attr']] == highlighter['val']:
            print('& ' + ' & '.join([f'\\textbf{{{x}}}' for x in row.tolist()]) + ' \\\\')
        else:
            print('& ' + ' & '.join([f'{x}' for x in row.tolist()]) + ' \\\\')
            
def quick_check(results, param):
    print(f'\t\midrule')
    print(f'\t\multirow{{3}}{{*}}{{{param}}}')
    print_aggregated_result(results.groupby(param).median())
    print()
    
def quick_check_full(results):
    params = ['agg',
            'rho_query', 'rho_topk', 'rho_influ', 'rho_rank', 
              'gamma', 'k', 'split_factor', 'predicate_strategy', 'scale']
    for param in params:
        quick_check(results, param)

In [20]:
results = pd.read_csv('results_default.csv')
quick_check(results, 'predicate_strategy')

	\midrule
	\multirow{3}{*}{predicate_strategy}
& 1-way marginal & 0.11\% & 0.00\% & 0.38\% & 1.71\% & 35s \\



In [22]:
results = pd.read_csv('results_controls.csv')
quick_check_full(results)
results = pd.read_csv('results_predicate.csv')
quick_check(results, 'predicate_strategy')

	\midrule
	\multirow{3}{*}{agg}
& AVG & 0.11\% & 0.00\% & 0.39\% & 1.71\% & 34s \\
& CNT & 10.72\% & 0.00\% & 0.09\% & 3.54\% & 33s \\
& SUM & 13.91\% & 0.00\% & 0.13\% & 1.71\% & 33s \\

	\midrule
	\multirow{3}{*}{rho_query}
& 0.01 & 0.35\% & 0.00\% & 0.38\% & 1.71\% & 34s \\
& 0.1 & 0.11\% & 0.00\% & 0.39\% & 1.71\% & 34s \\
& 1.0 & 0.04\% & 0.00\% & 0.39\% & 1.71\% & 34s \\

	\midrule
	\multirow{3}{*}{rho_topk}
& 0.1 & 0.11\% & 0.00\% & 0.39\% & 1.71\% & 34s \\
& 0.5 & 0.11\% & 0.00\% & 0.39\% & 1.71\% & 34s \\
& 2.0 & 0.11\% & 0.00\% & 0.38\% & 1.71\% & 34s \\

	\midrule
	\multirow{3}{*}{rho_influ}
& 0.1 & 0.11\% & 0.00\% & 0.85\% & 1.71\% & 34s \\
& 0.5 & 0.11\% & 0.00\% & 0.39\% & 1.71\% & 34s \\
& 2.0 & 0.11\% & 0.00\% & 0.20\% & 1.71\% & 34s \\

	\midrule
	\multirow{3}{*}{rho_rank}
& 0.1 & 0.11\% & 0.00\% & 0.39\% & 3.66\% & 34s \\
& 1.0 & 0.11\% & 0.00\% & 0.39\% & 1.71\% & 34s \\
& 10.0 & 0.11\% & 0.00\% & 0.38\% & 0.85\% & 34s \\

	\midrule
	\multirow{3}{*}{gamma}
& 0.9 & 0.

In [19]:
results = pd.read_csv('results_questions.csv')
quick_check(results, ['Agb', 'g1', 'g2'])

	\midrule
	\multirow{3}{*}{['Agb', 'g1', 'g2']}
& age & (40, 50] & (50, 60] & 1.50\% & 0.00\% & 3.11\% & 36.92\% & 15s \\
& age & (60, 70] & (50, 60] & 1.48\% & 0.00\% & 8.73\% & 54.36\% & 8s \\
& credit-amount & (2500, 5000] & (500, 2500] & 0.08\% & 0.00\% & 0.68\% & 2.22\% & 51s \\
& credit-amount & (2500, 5000] & (5000, 10000] & 0.09\% & 0.00\% & 0.80\% & 2.59\% & 26s \\
& credit-history & all credits at this bank paid back duly & no credits taken/all credits paid back duly & 0.11\% & 0.00\% & 0.46\% & 2.10\% & 53s \\
& credit-history & no credits taken/all credits paid back duly & delay in paying off in the past & 0.24\% & 0.22\% & 1.93\% & 21.60\% & 38s \\
& duration & 1 <= ... < 4 yrs & 4 <= ... < 7 yrs & 0.58\% & 2.76\% & 8.54\% & 81.10\% & 42s \\
& duration & < 1 yr & 1 <= ... < 4 yrs & 0.08\% & 0.00\% & 0.57\% & 1.83\% & 65s \\
& employment & 4 <= ... < 7 yrs & 1 <= ... < 4 yrs & 0.09\% & 0.00\% & 1.83\% & 4.69\% & 32s \\
& employment & 4 <= ... < 7 yrs & >= 7 yrs & 0.34\% & 0