In [27]:
import numpy as np
import pandas as pd
import math

In [28]:
def normal_distribution(n):
  return np.random.normal(0, 1, n)

def cauchy_distribution(n):
  return np.random.standard_cauchy(n)

def student_distribution(n):
  return np.random.standard_t(3, n)

def poisson_distribution(n):
  return np.random.poisson(10, n)

def uniform_distribution(n):
  return np.random.uniform(-math.sqrt(3), math.sqrt(3), n)

distributions = [
  ('normal', normal_distribution, -10, 10),
  ('cauchy', cauchy_distribution, -10, 10),
  ('student', student_distribution, -10, 10),
  ('poisson', poisson_distribution, -50, 50),
  ('uniform', uniform_distribution, -10, 10),
]

In [29]:
def mean(x):
  return x.mean()

def median(x):
  return np.median(x)

def half_sum_of_extremal_elements(x):
  return (x.min() + x.max()) / 2

def half_sum_of_quartiles(x):
  return (np.quantile(x, 0.25) + np.quantile(x, 0.75)) / 2

def trimmed_mean(x):
  r = round(len(x) / 4)
  trimmed_x = x[r:-r]
  return trimmed_x.mean()

metrics = [
  ('mean', mean),
  ('median', median),
  ('half_sum_of_extremal_elements', half_sum_of_extremal_elements),
  ('half_sum_of_quartiles', half_sum_of_quartiles),
  ('trimmed_mean', trimmed_mean),
]

In [30]:
def E(z):
  return z.mean()

def D(z):
  return (z ** 2).mean() - z.mean() ** 2

In [31]:
ns = np.array([10, 100, 1000])

In [32]:

for distribution_name, distribution_f, min_value, max_value in distributions:
  result_df = pd.DataFrame(columns=[
    'n',
    *list(map(lambda metric: f'E(z) {metric[0]}', metrics)),
    *list(map(lambda metric: f'D(z) {metric[0]}', metrics)),
  ])

  for n in ns:
    metric_values = {}

    for i in range(1000):
      x = distribution_f(n)
      x = x[(x >= min_value) & (x <= max_value)]

      for metric_name, metric_f in metrics:
        if not metric_name in metric_values:
          metric_values[metric_name] = np.array([])

        metric_values[metric_name] = np.append(metric_values[metric_name], metric_f(x))

    data_row = {'n': [n]}

    for metric_name, values in metric_values.items():
      data_row[f'E(z) {metric_name}'] = [E(values)]
      data_row[f'D(z) {metric_name}'] = [D(values)]

    result_df = pd.concat([result_df, pd.DataFrame(data_row)])

  result_df.to_csv(f'results/{distribution_name}.csv', index=False)

  result_df = pd.concat([result_df, pd.DataFrame(data_row)])
  result_df = pd.concat([result_df, pd.DataFrame(data_row)])
  result_df = pd.concat([result_df, pd.DataFrame(data_row)])
  result_df = pd.concat([result_df, pd.DataFrame(data_row)])
  result_df = pd.concat([result_df, pd.DataFrame(data_row)])


In [33]:
pd.read_csv(f'results/normal.csv')

Unnamed: 0,n,E(z) mean,E(z) median,E(z) half_sum_of_extremal_elements,E(z) half_sum_of_quartiles,E(z) trimmed_mean,D(z) mean,D(z) median,D(z) half_sum_of_extremal_elements,D(z) half_sum_of_quartiles,D(z) trimmed_mean
0,10,-0.007983,-0.004386,-0.020997,-0.005517,-0.006625,0.098372,0.143343,0.163284,0.113911,0.163168
1,100,0.003888,0.006077,-0.00053,0.0021,0.005262,0.010964,0.016229,0.092033,0.013306,0.019508
2,1000,0.000841,0.001757,0.002664,0.000549,0.00112,0.001003,0.001594,0.056454,0.001218,0.002025
