In [6]:
from utilities import *
from plot_utilities import *

In [7]:
def compute_prior_cdf(r, eta, n_samples = 1000, tail_bound = 0.05, n_tail = 5, scale = 1):
    
    if os.path.isfile(f'CDFs/optimize_cdfs_{n_samples}.pickle'):
        with open(f'CDFs/optimize_cdfs_{n_samples}.pickle', 'rb') as handle:
            cdfs = pickle.load(handle)
        if (r, eta) in cdfs:
            return cdfs[(r, eta)]
    else:
        cdfs = dict()

    beta = (eta + 1.5)/r 
    var_prior = scale * scipy.special.gamma((eta + 1.5 + 2)/r)/scipy.special.gamma(beta)
    x_max = min(100, np.round(var_prior/tail_bound)) 
    xs = np.linspace(-x_max, x_max, n_samples-2*n_tail)
    xs = np.append(np.linspace(-(x_max+100), -(x_max+20), n_tail), xs)
    xs = np.append(xs, np.linspace(x_max + 20, x_max + 100, n_tail))
    prior_pdf = np.full(xs.shape, np.nan)

    # Loop over xs
    for j, x in enumerate(xs):

        # Define integrands
        def gauss_density(theta):
            return (1./(np.sqrt(2*np.pi)*theta)) * np.exp(-0.5*(x/theta)**2)

        def gen_gamma_density(theta):
            return (r/scipy.special.gamma(beta)) * (1/scale) * (theta/scale)**(r*beta - 1) * np.exp(-(theta/scale)**r)

        def integrand(theta):
            return gauss_density(theta) * gen_gamma_density(theta)

        # Integrate 
        prior_pdf[j] = integrate.quad(integrand, 0, np.inf)[0]

    prior_cdf = np.zeros_like(prior_pdf)
    for i in range(len(xs) - 1):
        prior_cdf[i] = np.trapz(prior_pdf[:i+1], xs[:i+1]) 
    prior_cdf = np.append(prior_cdf[:-1], 1)

    poly = interpolate.CubicSpline(x = xs, y = prior_cdf)
    
    cdfs[(r, eta)] = poly
    with open(f'CDFs/optimize_cdfs_{n_samples}.pickle', 'wb') as handle:
        pickle.dump(cdfs, handle)
    return poly


In [8]:
os.listdir('CDFs')

['cdfs_100000_0.1-2.9-0.1_0-4-0.2.pickle',
 'cdfs_100000_3.0-7-0.4_0.1-4-0.2.pickle',
 'cdfs_10000_0.1-5-0.1_0-4-0.2.pickle',
 'cdfs_1000_0.1-5-0.1_0-4-0.2.pickle',
 'cdfs_1000_5.1-15-0.1_0-4-0.2.pickle',
 'cdfs_lite.pickle',
 'optimize_cdfs_1000.pickle',
 'optimize_cdfs_10000.pickle',
 'updated_100000.zip',
 'updated_100000_backup.pickle']

In [9]:
cdfs_name = 'cdfs_100000_0.1-2.9-0.1_0-4-0.2'

with open(f'CDFs/{cdfs_name}.pickle', 'rb') as handle:
    all_cdfs = pickle.load(handle)
with open(f'CDFs/cdfs_100000_3.0-7-0.4_0.1-4-0.2.pickle', 'rb') as handle:
    other_cdfs = pickle.load(handle)
all_cdfs = all_cdfs | other_cdfs


with open(f'panoptic/obs_x_dict.pickle', 'rb') as handle:
    obs_x_dict = pickle.load(handle)
with open(f'panoptic/df_dict_{cdfs_name}.pickle', 'rb') as handle:
    df_dict = pickle.load(handle)


In [10]:
best_params_df = pd.read_csv('panoptic/CSVs/best_params_df_updated_100000.csv').set_index(['layer'])
best_params_df

Unnamed: 0_level_0,num_samples,r,eta,kstest_stat,kstest_pval
layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2.0,3180.0,0.6,3.2,0.037981,0.0002016542
3.0,12720.0,0.7,3.6,0.021389,1.73759e-05
4.0,50880.0,0.8,3.0,0.011776,1.474263e-06
5.0,203520.0,0.9,1.6,0.003275,0.0253263
6.0,814080.0,1.0,0.2,0.008551,3.880884e-52


In [17]:
from scipy import optimize

def minimize_kstest_stat(layer = None, best_params_df = None, x = [0], x0 = [0, 0]):
    if layer:
        x = np.sort(obs_x_dict[layer])
        x0 = [best_params_df.loc[layer]['r'], best_params_df.loc[layer]['eta']]
    print(x, x0)
    n = len(x)
    history = []

    def kstest_stat(params):
        r = params[0]
        eta = params[1]
        cdf = compute_prior_cdf(r, eta, 10000)
        history.append((r, eta))
        print(r, eta)
        cdfvals = cdf(x)
        dplus, dminus = np.max(np.arange(1.0, n + 1) / n - cdfvals), np.max(cdfvals - np.arange(0.0, n)/n)
        return max(dplus, dminus)
    
    print(x0, bounds=[(x0[0]-0.1, x0[0]+0.1), (x[1]-0.2, x[1]+0.2)])
    optimized = optimize.minimize(kstest_stat, x0, bounds=[(x0[0]-0.1, x0[0]+0.1), (x[1]-0.2, x[1]+0.2)], tol=1e-3)
    x_prime = optimized['x']
    msg = optimized['message']
    return x_prime, msg, history

In [18]:
params, msg, hist = minimize_kstest_stat(layer = 3, best_params_df=best_params_df)
params, msg

[-76.13432886 -63.02074639 -60.52416619 ...  65.19608846  65.26965934
  69.44498554] [0.7, 3.6]


ValueError: `x` must be strictly increasing sequence.

In [18]:


for layer in np.arange(2,9):
    
    fixed_x = np.sort(obs_x_dict[layer])
    n = len(fixed_x)
    history_dict = dict()

    def kstest_stat(params):
        r = params[0]
        eta = params[1]
        cdf = compute_prior_cdf(r, eta, 10000)
        history_dict[layer].append((r, eta))
        print(r, eta)
        cdfvals = cdf(fixed_x)
        dplus, dminus = np.max(np.arange(1.0, n + 1) / n - cdfvals), np.max(cdfvals - np.arange(0.0, n)/n)
        return max(dplus, dminus)

optimize.minimize(kstest_stat, [0.7, 3.6], bounds=[(0.5, 0.9), (3.5, 3.6)], tol=1e-5)
    


In [39]:
from scipy import optimize
optimize.minimize(kstest_stat, [0.7, 3.6], bounds=[(0.5, 0.9), (3.4, 3.8)], tol=1)['x']

0.7 3.6
0.70000001 3.6
0.7 3.60000001


array([0.7, 3.6])

In [14]:
pd.read_csv('panoptic/CSVs/best_params_df_updated_100000.csv')

Unnamed: 0,layer,num_samples,r,eta,kstest_stat,kstest_pval
0,2.0,3180.0,0.6,3.2,0.037981,0.0002016542
1,3.0,12720.0,0.7,3.6,0.021389,1.73759e-05
2,4.0,50880.0,0.8,3.0,0.011776,1.474263e-06
3,5.0,203520.0,0.9,1.6,0.003275,0.0253263
4,6.0,814080.0,1.0,0.2,0.008551,3.880884e-52
