In [48]:
%matplotlib notebook

In [109]:
import matplotlib
matplotlib.use('Agg')

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [154]:
import os
import pickle as pkl
import numpy as np
from matplotlib import pyplot as plt

In [155]:
qs = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032]
cascade_ids = list(range(0, 5))
methods = ['no-order', 'tbfs', 'closure']

In [156]:
result_dir = 'outputs/real_cascade_experiment/cascade_{}/'

In [168]:
rows = []
for q in qs:
    row = {}
    for method in methods:
        row[method] = {}
        for i in cascade_ids:
            # gather count statistics across all casacades,
            try:
                path = os.path.join(result_dir.format(i), method, "{}.pkl".format(q))
                r = pkl.load(open(path, 'rb'))
                
                # initialize
                if len(row[method]) == 0:
                    for key in r.columns:
                        row[method][key] = 0
                    row[method]['count'] = 0
            except FileNotFoundError:
                print(path, ": not found")
            count = r['n.correct_nodes']['count']
            
            for key in r.columns:
                row[method][key] += r[key]['mean'] * count
            row[method]['count'] += count

        row[method]['tree-size'] = row[method]['n.pred_edges'] / row[method]['count']
        row[method]['precision'] = row[method]['n.correct_nodes'] / row[method]['n.pred_nodes']
        row[method]['recall'] = row[method]['n.correct_nodes'] / row[method]['n.true_nodes']
        row[method]['order-accuracy'] = row[method]['n.correct_edges'] / row[method]['n.pred_edges']        
    rows.append(row)


In [169]:
measures = ['tree-size', 'precision', 'recall', 'order-accuracy']

In [170]:
# measure x qs x methods
data = np.zeros((4, len(qs), len(methods)))
for i, (q, row) in enumerate(zip(qs, rows)):
    for j, method in enumerate(methods):
        for k, measure in enumerate(measures):
            data[k, i, j] = row[method][measure]


In [171]:
qs = np.array(qs)
x = np.log2(qs / qs[0])

In [172]:
from plot_utils import make_line_cycle
line_cycle = make_line_cycle()

In [190]:
lines = []
for k, measure in enumerate(measures):
    fig = plt.figure(figsize=(3, 2.25))
    ax = fig.add_subplot(111)
    for j, m in enumerate(methods):
        l, = ax.plot(x, data[k,:,j], next(line_cycle), markersize=7.5, alpha=0.75)
        if len(lines) < len(methods):
            lines.append(l)
    ax.set_title(measure, fontsize=14)
    ax.set_xlabel('prop. of reports', fontsize=14)
    max_ticks=3
    ax.yaxis.set_major_locator(plt.MaxNLocator(max_ticks))
    ax.xaxis.set_major_locator(plt.MaxNLocator(max_ticks))
    
    xticks = ax.get_xticks()
    labels = list(map(lambda v: "$2^{}$".format(int(v)), xticks))
    ax.set_xticklabels(labels)
    if measure in ['precision', 'order-accuracy']:
        ax.set_ylim(0.5, 1.0)
    ax.text(0, 0.1, r"$0.001\times$ ", transform=plt.gcf().transFigure)    
    plt.tight_layout()
    fig.savefig('figs/digg/{}.pdf'.format(measure))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [186]:
figlegend = plt.figure(figsize=(2 * len(methods), 0.5))
ax = fig.add_subplot(111)

label_mapping = {'closure': 'closure', 'tbfs': 'delay-bfs', 'no-order': 'baseline'}
figlegend.legend(lines, methods, 'center', ncol=len(methods))
figlegend.show()
figlegend.savefig('figs/digg/legend.pdf')


<IPython.core.display.Javascript object>