In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import collections

import warnings
warnings.filterwarnings('ignore')

In [2]:
data_cats = ['idea', 'design']
data_keys = {'idea': ['bike', 'cheat', 'meeting', 'night', 'visitor'], 
             'design': ['ai_character', 'olympic']}
data_keys_arr = data_keys['idea'] + data_keys['design']

In [3]:
data_dir = Path('../data/')

df = pd.DataFrame(index=data_keys_arr, columns=['num_ideas', 'num_evaluators', 'num_labels'])
for data_cat in data_cats:
    for data_key in data_keys[data_cat]:
        data_df = pd.read_csv(data_dir / data_cat / data_key / 'label.tsv', index_col=0, sep='\t') 
        df.loc[data_key, 'num_ideas'] = len(set(list(data_df.winner_index.unique()) + list(data_df.loser_index.unique())))
        df.loc[data_key, 'num_evaluators'] = len(data_df.evaluator_index.unique())
        df.loc[data_key, 'num_labels'] = data_df.shape[0]

In [4]:
df

Unnamed: 0,num_ideas,num_evaluators,num_labels
bike,81,217,64800
cheat,80,257,63200
meeting,80,177,63200
night,80,171,63200
visitor,81,158,64800
ai_character,66,183,42928
olympic,38,64,14100


In [6]:
data_key_tex = {}
for data_key in data_keys_arr:
    data_key_tex[data_key] = data_key.capitalize()
    if data_key == 'ai_character':
        data_key_tex[data_key] = 'Character'

In [7]:
labels_tex = {'num_ideas': '\\#objects', 'num_evaluators': '\\#evaluators', 'num_labels': '\\#labels'}

In [10]:
lines = []
lines.append('\\begin{tabular}{|l||%s}' % ('c|' * (len(data_keys_arr)) ))
lines.append('\\hline')

# header 1
elems = ['\\multirow{2}{*}{Dataset}']
elems.append('\\multicolumn{5}{c|}{Ideas}')
elems.append('\\multicolumn{2}{c|}{Designs}')
lines.append('&'.join(elems) + '\\\\')
lines.append('\\cline{2-6}')
lines.append('\\cline{7-8}')

# header2
elems = ['']
for data_key in data_keys_arr:
    elems.append(data_key_tex[data_key])
    
lines.append('&'.join(elems) + '\\\\')
lines.append('\\hline')
lines.append('\\hline')

# content
for c in df.columns:
    elems = [labels_tex[c]]
    for data_key in data_keys_arr:
        elems.append('$%d$' % df.loc[data_key, c])
    
    lines.append('&'.join(elems) + '\\\\')

lines.append('\\end{tabular}')

print('\n'.join(lines))

\begin{tabular}{|l||c|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{Dataset}&\multicolumn{5}{c|}{Ideas}&\multicolumn{2}{c|}{Designs}\\
\cline{2-6}
\cline{7-8}
&Bike&Cheat&Meeting&Night&Visitor&Character&Olympic\\
\hline
\hline
\#objects&$81$&$80$&$80$&$80$&$81$&$66$&$38$\\
\#evaluators&$217$&$257$&$177$&$171$&$158$&$183$&$64$\\
\#labels&$64800$&$63200$&$63200$&$63200$&$64800$&$42928$&$14100$\\
\end{tabular}
