In [None]:
import numpy as np
from scipy.stats import kstest
import matplotlib.pyplot as plt
import scipy.stats

In [None]:
def mc_summary(sample, alpha):
    mu = sample.mean()
    std = sample.std(ddof = 1)

    z = scipy.stats.norm.ppf(1.0-alpha/2.0)
    uncertainty = z*std/np.sqrt(len(sample))
    print("Estimate\tUncertainty\n"+"-"*27)
    print("%.4f\t\tÂ± %.4f" % (mu, uncertainty))

def importance_sampling(N):
    x_tilde = scipy.stats.cauchy.rvs(scale=2,size=N)
    samples = []
    for i in range(N):
        samples.append(psi(x_tilde[i]) * f(x_tilde[i]) / g(x_tilde[i]))

    return np.array(samples)

# TODO: a)

# b)

# TODO: Study the efficiency of A-R method and think of alternative approaches.

In [None]:
g = lambda x: 1 / (np.pi * (1 + x**2))  # Cauchy PDF
f = lambda x: 2 / (np.pi * ((1 + x**2)**2))  # Target PDF
F = lambda x: 0.5 + (1/np.pi) * (np.arctan(x) + x/(1 + x**2))
C = 2

In [None]:
def sample_f(N):    
    sample = []
    while len(sample) < N:
        Y = np.random.standard_cauchy(1)[0]
        U = np.random.rand()
        if U <= f(Y) / (C * g(Y)):
            sample.append(Y)
        
    return np.array(sample)

In [None]:
N = 1000
sample = sample_f(N)
x = np.linspace(-25, 25, N)
true_cdf = F(x)
true_pdf = f(x)
kstest(sample, F).pvalue # High pvalue is expected

In [None]:
plt.hist(sample, density = True, bins = 100, label = 'Empirical PDF')
plt.plot(x, true_pdf, color='red', label='True PDF')
plt.xlim(-5, 5)
plt.legend()
None

In [None]:
plt.hist(sample, cumulative=True, density=True, bins=100, label='Empirical CDF')
plt.plot(x, F(x), color='red', label='True CDF')
plt.xlim(-5, 5)
plt.legend()
None

In [None]:
# Importance Sampling

psi = lambda x: x

is_sample = importance_sampling(N)

plt.hist(is_sample, density = True, bins = 100, label = 'Empirical PDF')
plt.plot(x, true_pdf, color='red', label='True PDF')
plt.xlim(-5, 5)
plt.legend()
None

# TODO: fix this


# c)

In [None]:
N = 1000
alpha = 0.05

np.random.seed(42)  # For reproducibility

samples_mc = sample**2
mc_summary(samples_mc, alpha)

# d)

In [None]:
plt.hist(samples_mc, density=True, bins=100, label='Empirical PDF')
None

In [None]:
psi = lambda x : x*x
candidate = lambda x : scipy.stats.cauchy.pdf(x, scale=2) # manually tested some functions

y = np.linspace(-100, 100, N)

plt.plot(y, psi(y)*f(y))
plt.plot(y, candidate(y))
plt.xlim(-25,25)
None

In [None]:
g = lambda x : scipy.stats.cauchy.pdf(x, scale=2) # candidate

In [None]:
def importance_sampling(N):
    x_tilde = scipy.stats.cauchy.rvs(scale=2,size=N)
    samples = []
    for i in range(N):
        samples.append(psi(x_tilde[i]) * f(x_tilde[i]) / g(x_tilde[i]))

    return np.array(samples)

In [None]:
samples_is = importance_sampling(N)
plt.hist(samples_is, density=True, bins=100)
None

In [None]:
mc_summary(samples_is, alpha)

# e)

# TODO: Prove minimum is monotone and the distribution is symmetric.

In [None]:
N = 3000
S1 = sample_f(N)
S2 = sample_f(N)
psi = lambda x, y: np.minimum(x, y)

In [None]:
samples = psi(S1, S2)
mc_summary(samples, alpha)

In [None]:
N = 3000
S1 = sample_f(N)
S2 = sample_f(N)
X = np.array([S1, S2]).T
X_anti = -X

In [None]:
psi_x = np.apply_along_axis(lambda row: psi(*row), axis=1, arr=X)
psi_x_anti = np.apply_along_axis(lambda row: psi(*row), axis=1, arr=X_anti)
Z = 0.5*(psi_x + psi_x_anti)

In [None]:
mc_summary(Z, alpha)