In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
from tqdm import trange
from operator import itemgetter
import numpy as np
from pprint import pprint

import matplotlib.pyplot as plt
from matplotlib import rc
gridc = (1., 1., 1)
plt.rcParams['grid.color'] = gridc
plt.rcParams["axes.edgecolor"] = (0.898, 0.925, 0.965, 1)
plt.rc('xtick', labelsize=12)
plt.rc('ytick', labelsize=12)

from database import DataBase

In [None]:
db = DataBase('assets/neurips2021.db')
db.initialize()

### key words

In [None]:
# all submissions 
_cmd = "SELECT keywords FROM submissions;"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()
keywords = {}
for i in trange(len(data)):
    _kw = data[i][0].split(', ')
    _kw = [_k.lower().strip() for _k in _kw]
    for _k in _kw:
        if _k in keywords.keys():
            keywords[_k] += 1
        else:
            keywords[_k] = 1
# sort values
keywords = {k: v for k, v in sorted(keywords.items(), key=lambda item: item[1])[::-1]}

In [None]:
ttl = 40
kws = list(keywords.keys())[:ttl]
freqs = list(keywords.values())[:ttl]


width = 0.5
fig = plt.figure(figsize=[18, 6])

ax = fig.add_subplot(1, 1, 1)
ax.set_facecolor((0.898, 0.925, 0.965, 0.5))
ax.spines['left'].set_color('w')
ax.spines['bottom'].set_color('w')
ax.spines['right'].set_color('w')
ax.spines['top'].set_color('w')

colors = ['#789BFF', '#FE8671'] * (ttl // 2)

# all keywords
ax.bar(np.arange(ttl), freqs, width=width, alpha=0.95, 
       color=colors, capsize=4)
for i, v in zip(np.arange(ttl), freqs):
    ax.text(i - 0.5 if v >= 100 else i - 0.3, v + 4.0, str(v), color=colors[i], fontsize=12)

plt.ylim(0, 215)
plt.xticks(ticks=np.arange(ttl), rotation=45,
           labels=[d for d in kws], ha='right')
for ticklabel, tickcolor in zip(plt.gca().get_xticklabels(), colors):
    ticklabel.set_color(tickcolor)
ax.set_ylabel(r"#", fontsize=14)
ax.set_axisbelow(True)
ax.grid()
plt.savefig('assets/keywords_bar.png', bbox_inches='tight', dpi=150)

### statistics bar

In [None]:
# all decisions
_cmd = "SELECT rating_avg FROM submissions;"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()
rating_avgs = np.array(data)
print("> Total submissions (including CE): {}".format(len(data)))
print(f"    Average ratings: {rating_avgs.mean():.2f}")
print(f"    Max ratings: {rating_avgs.max():.2f}")
print(f"    Min ratings: {rating_avgs.min():.2f}")

# oral
_cmd = "SELECT rating_avg FROM submissions WHERE now_decision LIKE '%Oral%';"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()
oral_avgs = np.array(data)
print("> Oral submissions (including CE): {}".format(oral_avgs.shape[0]))
print(f"    Average ratings: {oral_avgs.mean():.2f}")
print(f"    Max ratings: {oral_avgs.max():.2f}")
print(f"    Min ratings: {oral_avgs.min():.2f}")

# spotlight
_cmd = "SELECT rating_avg FROM submissions WHERE now_decision LIKE '%Spotlight%';"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()
spotlight_avgs = np.array(data)
print("> Soitlight submissions (including CE): {}".format(spotlight_avgs.shape[0]))
print(f"    Average ratings: {spotlight_avgs.mean():.2f}")
print(f"    Max ratings: {spotlight_avgs.max():.2f}")
print(f"    Min ratings: {spotlight_avgs.min():.2f}")

# poster
_cmd = "SELECT rating_avg FROM submissions WHERE now_decision LIKE '%Poster%';"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()
poster_avgs = np.array(data)
print("> Poster submissions (including CE): {}".format(poster_avgs.shape[0]))
print(f"    Average ratings: {poster_avgs.mean():.2f}")
print(f"    Max ratings: {poster_avgs.max():.2f}")
print(f"    Min ratings: {poster_avgs.min():.2f}")

# reject
_cmd = "SELECT rating_avg FROM submissions WHERE now_decision LIKE '%Reject%';"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()
reject_avgs = np.array(data)
print("> Reject submissions (including CE): {}".format(reject_avgs.shape[0]))
print(f"    Average ratings: {reject_avgs.mean():.2f}")
print(f"    Max ratings: {reject_avgs.max():.2f}")
print(f"    Min ratings: {reject_avgs.min():.2f}")

In [None]:
_min, _max = rating_avgs.min(), rating_avgs.max()

width = 0.16
fig = plt.figure(figsize=[16, 6])

ax = fig.add_subplot(1, 1, 1)
ax.set_facecolor((0.898, 0.925, 0.965, 0.5))
ax.spines['left'].set_color('w')
ax.spines['bottom'].set_color('w')
ax.spines['right'].set_color('w')
ax.spines['top'].set_color('w')

# all submissions
hist, bin_edges = np.histogram(rating_avgs, bins=20, range=(_min, _max))
# ax.bar(np.linspace(_min, _max, len(hist)), hist, width=width, alpha=0.95, 
#        color='#789BFF', capsize=4)
for i, v in zip(np.linspace(_min, _max, len(hist)), hist):
    ax.text(i - 0.1 if v >= 100 else i - 0.05, v + 6.0, str(v), color='#1f59ff', fontsize=16)

# reject
hist_reject, _ = np.histogram(reject_avgs, bins=20, range=(_min, _max))
ax.bar(np.linspace(_min, _max, len(hist)), hist_reject, width=width, alpha=0.95, 
       color='#E3E7FF', capsize=4, label='Reject')

# poster
hist_poster, _ = np.histogram(poster_avgs, bins=20, range=(_min, _max))
ax.bar(np.linspace(_min, _max, len(hist)), hist_poster, bottom=hist_reject, width=width, alpha=0.95, 
       color='#A0B5FF', capsize=4, label='Poster')

# spotlight
hist_spotlight, _ = np.histogram(spotlight_avgs, bins=20, range=(_min, _max))
ax.bar(np.linspace(_min, _max, len(hist)), hist_spotlight, bottom=hist_poster + hist_reject, 
       width=width, alpha=0.95, 
       color='#7879FF', capsize=4, label='Spotlight')

# oral
hist_oral, _ = np.histogram(oral_avgs, bins=20, range=(_min, _max))
ax.bar(np.linspace(_min, _max, len(hist)), hist_oral, bottom=hist_poster + hist_reject + hist_spotlight, 
       width=width, alpha=0.95, 
       color='#FF435B', capsize=4, label='Oral')

plt.ylim(0, 550)
plt.xticks(ticks=np.linspace(_min, _max, len(hist)), 
           rotation=40, 
           labels=[f"{d:.2f}" for d in np.linspace(_min, _max, len(hist))])
ax.set_ylabel(r"# submissions", fontsize=14)
ax.set_xlabel("Rating", fontsize=14)
ax.set_axisbelow(True)
ax.grid()
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc=2, fontsize=14)
plt.savefig('assets/stats_bar.png')

In [None]:
# consistency experiment statistics
_cmd = "SELECT * FROM submissions WHERE num_decision == 2 ORDER BY title;"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()
print("> Total submissions with CE: {}".format(len(data) // 2))
num_comflict = 0
num_consist = 0
ces = {}
for i in range(0, len(data), 2):  
    dcs = data[i][6].split(' ')[-1].strip('()')
    dcs_0 = data[i][7].split(' ')[-1].strip('()')
    dcs_1 = data[i + 1][7].split(' ')[-1].strip('()')  
    _key = ", ".join(sorted([dcs_0, dcs_1]))
    _key = dcs + ', ' + _key
    if _key in ces.keys():
        ces[_key] += 1
    else:
        ces.update({_key: 1})
    assert data[i][2] == data[i + 1][2], f'not match for {i}'
    if dcs_0 != dcs_1:
        num_comflict += 1
    else:
        num_consist += 1
print(f"> Decision conflict: {num_comflict}")
print(f"> Decision consist: {num_consist}")
pprint(ces)

### write consistency experiment

In [None]:
check = '&#10004;'
maps = {
    'Oral': 'a',
    'Spotlight': 'b',
    'Poster': 'c',
    'Reject': 'd'
}

In [None]:
_cmd = "SELECT * FROM submissions WHERE num_decision == 2 " \
       "ORDER BY CASE final_decision WHEN 'Accept (oral)' " \
       "THEN 'a' WHEN 'Accept (Spotlight)' THEN 'b' WHEN 'Accept (Poster)' " \
       "THEN 'c' WHEN 'Reject' THEN 'd' END, title DESC;"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()

# read template 
with open('neurips2021_consistency_experiments_template.html', 'r') as f:
    html_temp = f.readlines()
# find insert index
idx = html_temp.index('    <!-- start here -->\n') + 1

# write data
for i in trange(len(data)):
    _data = data[i]
    final_d = _data[6].split(' ')[-1].strip('()')
    now_d = _data[7].split(' ')[-1].strip('()')
    _str = f"<tr><td>{i + 1}</td><td class='td-left'><a href='{_data[1]}'> {_data[2]}</a></td>" \
           f"<td>{_data[9]:.2f}</td><td>{_data[10]:.2f}</td><td data-sort='{_data[8]}'>{_data[11]}</td>" \
           f"<td class='{final_d.lower()}' data-sort='{maps[final_d]}'>{final_d}</td>" \
           f"<td class='{now_d.lower()}' data-sort='{maps[now_d]}'>{now_d}</td>" \
           f"<td class='th-rank'>{check}</td></tr>\n"
    html_temp.insert(idx + i, _str)

with open('neurips2021_consistency_experiments.html' ,'w') as f:
    f.write("".join(html_temp))

### write all submissions

In [None]:
_cmd = "SELECT * FROM submissions ORDER BY rating_avg DESC;"
db.cursor.execute(_cmd)
data = db.cursor.fetchall()

# read template 
with open('neurips2021_submissions_template.html', 'r') as f:
    html_temp = f.readlines()
# find insert index
idx = html_temp.index('    <!-- start here -->\n') + 1

# write data
for i in trange(len(data)):
    _data = data[i]
    final_d = _data[6].split(' ')[-1].strip('()')
    now_d = _data[7].split(' ')[-1].strip('()')
    _str = f"<tr><td>{i + 1}</td><td class='td-left'><a href='{_data[1]}'> {_data[2]}</a></td>" \
           f"<td>{_data[9]:.2f}</td><td>{_data[10]:.2f}</td><td data-sort='{_data[8]}'>{_data[11]}</td>" \
           f"<td class='{final_d.lower()}' data-sort='{maps[final_d]}'>{final_d}</td>" \
           f"<td class='{now_d.lower()}' data-sort='{maps[now_d]}'>{now_d}</td>" \
           f"<td class='th-rank'>{check if _data[5] == 2 else ''}</td></tr>\n"
    html_temp.insert(idx + i, _str)

with open('neurips2021_submissions.html' ,'w') as f:
    f.write("".join(html_temp))

In [None]:
db.close()