In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from IPython.display import display
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.io as pio
pio.templates.default = 'plotly_white'
import logging
import logzero
logzero.loglevel(logging.INFO)

In [2]:
from scipy.stats import norm
import matplotlib.pyplot as plt
import numpy as np
from BITS.plot.plotly import make_hist, make_scatter, make_layout, show_plot

In [3]:
def normal_h0(L, p_error, x_min, x_max):
    mean = p_error
    double_var = 2 * p_error * (1 - p_error) / L
    f = lambda x: 1 / np.sqrt(double_var * np.pi) * np.exp(-(x - mean) ** 2 / double_var)
    return [f(x) for x in np.linspace(x_min, x_max, 1000)]

def normal_h1(L, p_error, s_hat, x_min, x_max):
    mean = p_error + s_hat * (1 - p_error)
    double_var = 2 * ((1 - s_hat) * p_error * (1 - p_error) / L)
    f = lambda x: 1 / np.sqrt(double_var * np.pi) * np.exp(-(x - mean) ** 2 / double_var)
    return [f(x) for x in np.linspace(x_min, x_max, 1000)]

In [4]:
def calc_beta(s, L, p):
    mu0 = p
    sigma0 = p * (1 - p) / L
    mu1 = mu0 + s * (1 - p)
    sigma1 = (1 - s) * sigma0
    return norm.cdf(np.sqrt(sigma0 / sigma1) * z_alpha - (mu1 - mu0) / np.sqrt(sigma1))

In [44]:
L = 5000
p_error = 0.25
x_min = 0.22
x_max = 0.3
d1, d2 = 10, 100
s1, s2 = d1 / L, d2 / L

y0 = normal_h0(L=L, p_error=p_error, x_min=x_min, x_max=x_max)
y1_1 = normal_h1(L=L, p_error=p_error, s_hat=s1, x_min=x_min, x_max=x_max)
y1_2 = normal_h1(L=L, p_error=p_error, s_hat=s2, x_min=x_min, x_max=x_max)

In [48]:
show_plot([make_scatter(np.linspace(x_min, x_max, 1000), y0, mode="lines", line_width=2, name="H0"),
           make_scatter(np.linspace(x_min, x_max, 1000), y1_1, mode="lines", line_width=2, name=f"H1 (d={d1})"),
           make_scatter(np.linspace(x_min, x_max, 1000), y1_2, mode="lines", line_width=2, name=f"H1 (d={d2})")],
          make_layout(title=f"L={L}, p_error={p_error}", x_title="Overlap sequence dissimilarity"))

In [10]:
z_alpha = 1.64   # one-sided test with alpha = 5%
s = np.arange(0, 0.04 + 0.001, 0.001)
L = 5000
p = 0.25
beta = calc_beta(s=s, L=L, p=p)

show_plot([make_scatter(beta, s, mode="lines", show_legend=False)],
          make_layout(width=500, height=400, x_range=(-0.1,1), x_title="beta", y_title="s"),
          out_fname="beta_s_plot_l5000_p0.25.svg")

In [11]:
z_alpha = 1.64   # one-sided test with alpha = 5%
s = np.arange(0, 0.15 + 0.001, 0.001)
L = 360
p = 0.25
beta = calc_beta(s=s, L=L, p=p)

show_plot([make_scatter(beta, s, mode="lines", show_legend=False)],
          make_layout(width=500, height=400, x_range=(-0.1,1), x_title="beta", y_title="s"),
          out_fname="beta_s_plot_l360_p0.25.svg")

In [18]:
z_alpha = 1.64   # one-sided test with alpha = 5%
s = np.arange(0, 0.008 + 0.001, 0.0001)
L = 5000
p = 0.01
beta = calc_beta(s=s, L=L, p=p)

show_plot([make_scatter(beta, s, mode="lines", show_legend=False)],
          make_layout(width=500, height=400, x_range=(-0.1,1), x_title="beta", y_title="s"),
          out_fname="beta_s_plot_l360_p0.25.svg")

In [19]:
5000 * 0.0047

23.5

In [22]:
L = 5000
p_error = 0.01
x_min = 0.
x_max = 0.04
d1, d2 = 10, 100
s1, s2 = d1 / L, d2 / L

y0 = normal_h0(L=L, p_error=p_error, x_min=x_min, x_max=x_max)
y1_1 = normal_h1(L=L, p_error=p_error, s_hat=s1, x_min=x_min, x_max=x_max)
y1_2 = normal_h1(L=L, p_error=p_error, s_hat=s2, x_min=x_min, x_max=x_max)

In [23]:
show_plot([make_scatter(np.linspace(x_min, x_max, 1000), y0, mode="lines", line_width=2, name="H0"),
           make_scatter(np.linspace(x_min, x_max, 1000), y1_1, mode="lines", line_width=2, name=f"H1 (d={d1})"),
           make_scatter(np.linspace(x_min, x_max, 1000), y1_2, mode="lines", line_width=2, name=f"H1 (d={d2})")],
          make_layout(title=f"L={L}, p_error={p_error}", x_title="Overlap sequence dissimilarity"))