In [28]:
import numpy as np
from scipy.stats import rv_continuous
from tqdm import tqdm
import itertools
import matplotlib.pyplot as plt
import scipy.stats as st

In [29]:
true_alpha = 5
n = 200

In [30]:
def pdf(x, alpha=5):
    return (alpha + 1)*np.power(x, alpha)

def get_samples(n, alpha):
    x = np.linspace(0,1,1000000)
    fit_pdf = pdf(x)
    return np.random.choice(x, size=n, p=fit_pdf/np.sum(fit_pdf)) 

## d)

In [31]:
def get_mle(ns):
    return -1-len(ns)/(np.sum(np.log(ns)))

def get_mom(ns):
    avg = np.average(ns)
    return (2*avg - 1)/(1-avg)

In [32]:
ns = get_samples(n, true_alpha)
mle = get_mle(ns)
mom = get_mom(ns)
print(5-mle, 5-mom, (mle-5)**2, (mom-5)**2)

-0.17141304893342024 -0.1970551348232883 0.029382433344651123 0.03883072616022432


## e)

In [33]:
num_exp = 1000
sample_size = 200

In [34]:
mles = np.zeros(num_exp)
moms = np.zeros(num_exp)

for i in tqdm(range(num_exp)):
    ns = get_samples(sample_size, true_alpha)
    mles[i] = get_mle(ns)
    moms[i] = get_mom(ns)

 21%|█████████████▊                                                   | 213/1000 [00:07<00:28, 27.62it/s]


KeyboardInterrupt: 

PLOTS

In [None]:
fig, axs = plt.subplots(2)
fig.set_figheight(10)
fig.set_figwidth(10)
axs[0].hist(moms, color='C1', bins=300)
axs[0].set_title('Moment estimator')
axs[1].hist(mles, color='C2', bins=300)
axs[1].set_title('MLE')

In [None]:
fig = plt.figure(figsize =(5, 5))
ax = fig.add_subplot()
ax.boxplot([mles, moms],widths=0.8)
plt.xticks([1, 2], ['MLE', 'MOM'])
ax.set_aspect(0.2)

In [None]:
import numpy as np
import statsmodels.api as sm
import pylab as py
sm.qqplot(moms, line ='45', fit=False)
py.show()

In [None]:
sm.qqplot(mles, line ='45')
py.show()

In [None]:
print(mles.var())
print(moms.var())
print((5-mles).mean())
print((5-moms).mean())
print(np.sqrt((5-mles)**2).mean())
print(np.sqrt((5-moms)**2).mean())

In [None]:
36/20

In [None]:
st.t.interval(alpha=0.95, df=len(mles)-1,
              loc=np.mean(mles),
              scale=st.sem(mles))

In [None]:
st.t.interval(alpha=0.95, df=len(moms)-1,
              loc=np.mean(moms),
              scale=st.sem(moms))