In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append('../../src/')
sys.path.append('../../src/')
import axiomatic.explanations.pairsampling as ps

In [None]:
import itertools

def mkpairs(pair_criterion, indexes=np.arange(1000)):
    pairs = (dict(lo=i1, hi=i2) for i1, i2 in itertools.combinations(indexes, 2)
                 if i1 < i2 and pair_criterion(i1, i2))
    df = pd.DataFrame(pairs)
    df['dif'] = df.hi - df.lo
    return df
    

def viz_pairs(pair_criterion, indexes=np.arange(1000), nb=100):
    pdf = mkpairs(pair_criterion, indexes)
    print(len(pdf), 'samples.')
    f, (a1, a2, a3) = plt.subplots(ncols=3)
    kw = dict(bins=nb, legend=False)
    pdf[['lo']].plot.hist(ax=a1, title='Lower Ranks', **kw)
    pdf[['hi']].plot.hist(ax=a2, title='Upper Ranks', **kw)
    pdf[['dif']].plot.hist(ax=a3, title='Rank distances', **kw)
    f.set_size_inches((16, 4))
    f.tight_layout()
    f.set_facecolor('white')


In [None]:
viz_pairs(ps.fixed_ranges)

## examples

In [None]:
def mk_random_sampler(k, n, seed=1):
    npairs = k**2 / 2 - k
    if n >= npairs:
        return lambda a, b: True
    prob = n / npairs
    rnd = np.random.RandomState(seed=seed)
    return lambda a, b: rnd.rand() <= prob

viz_pairs(mk_random_sampler(1000, 50000), np.arange(1000), nb=1113)

In [None]:
def mk_topk_random_sampler(k, maxk, nrand, seed=1):
    rs = mk_random_sampler(maxk, nrand, seed=seed)
    return lambda i1, i2: (i1 <= k and i2 <= k) or rs(i1, i2)

rlen=1000
viz_pairs(mk_topk_random_sampler(20, rlen, 550000, seed=1), np.arange(rlen), nb=1037)

In [None]:
viz_pairs(mk_topk_random_sampler(10, rlen, 200, seed=1), np.arange(rlen), nb=137)

In [None]:
viz_pairs(lambda a, b: a < b and a < 10 and b < 40, np.arange(rlen), nb=137)

In [None]:
21 * 5 * 2

In [None]:
f'{(163781 * 266 * 2) * (2 / 60 / 60) / 1000:.2f} hours'