In [7]:
import numpy as np
import pandas as pd
import tqdm
import time
import pickle

from scipy.special import comb as binom
from lib import svm_proba
from lib import enm_proba_exact
from lib import enm_proba_apprx

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
ERRNUM = 0
EPS = 1e-2

np.random.seed(8098)

In [3]:
def frac(a, b):
    if b:
        return a / b
    else:
        return 0

def plot(X, y, path):
    fig, ax = plt.subplots()
    ax.scatter(X[y == 0, 0], X[y == 0, 1], c="r")
    ax.scatter(X[y == 1, 0], X[y == 1, 1], c="g")

    for i in range(len(y)):
        ax.annotate(str(i), (X[i, 0], X[i, 1]))

    fig.savefig(path)
        
def upper(X, y, errnum, eps):
    n = y.sum() # ones number
    m = len(y) - y.sum() # zeros number
    mlt = 2 * binom(n + m, 2) / binom(n + m, n)
    return mlt

def lower(X, y, errnum, eps):
    n = y.sum() # ones number
    m = len(y) - y.sum() # zeros number
    mlt = (n + m) / binom(n + m, n)
    return mlt

def svm(X, y, errnum, eps):
    return frac(*svm_proba(X, y, errnum, eps))

def approx(X, y, errnum, eps):
    return frac(*enm_proba_apprx(X, y, errnum, eps))

def exact(X, y, errnum, eps):
    return frac(*enm_proba_exact(X, y))

In [4]:
def timer(func, *args, **kwargs):
    start = time.time()
    result = func(*args, **kwargs)
    end = time.time()
    return result, end - start

In [5]:
N_MIN, N_MAX = 2, 15
COV = np.array([
        [1, 0],
        [0, 1]
])

In [None]:
bank = {
    "uprs": [],
    "lwrs": [],
    "svms": [],
    "aprx": [],
    "exct": []
}

map = {
    "uprs": upper,
    "lwrs": lower,
    "svms": svm,
    "aprx": approx,
    "exct": exact
}

for N0 in range(N_MIN, N_MAX + 1):
    for N1 in range(N_MIN, N_MAX + 1):
        start = time.time()
        
        X0 = np.random.multivariate_normal(
            [0, 0],
            COV,
            size=N0
        )
        y0 = np.zeros(N0, dtype=int) + 0

        X1 = np.random.multivariate_normal(
            [0, 0],
            COV,
            size=N1
        )
        y1 = np.zeros(N1, dtype=int) + 1

        X = np.vstack([X0, X1], dtype=np.float32)
        y = np.concatenate([y0, y1], dtype=np.int32)

        for key, func in map.items():
            proba, duration = timer(func, X, y, ERRNUM, EPS)
            bank[key].append((proba, duration))

        print(N0, N1, time.time() - start)

        with open('data/data.pickle', 'wb') as handle:
            pickle.dump(bank, handle)

2 2 3.9422199726104736
2 3 3.9756009578704834
2 4 4.909276008605957
2 5 6.575198173522949
2 6 4.592321872711182
2 7 4.2834861278533936
2 8 4.247262001037598
2 9 5.050250053405762
2 10 5.627826929092407
2 11 4.798563003540039
2 12 4.377273797988892
2 13 4.489012956619263
2 14 4.334591865539551
2 15 4.479417085647583
3 2 3.968886137008667
3 3 4.50193190574646
3 4 5.15831184387207
3 5 5.702532052993774
3 6 6.17874002456665
3 7 7.0396857261657715
3 8 14.83764910697937
3 9 10.250500202178955
3 10 9.96498990058899
3 11 10.272759914398193
3 12 9.759577751159668
3 13 13.513146162033081
3 14 14.128578901290894
3 15 13.498486757278442
4 2 6.30771803855896
4 3 5.533865928649902
4 4 7.188174247741699
