In [2]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

In [3]:
ns = np.array([20, 100])
distributions = {
  'Normal': [np.random.normal(loc=0, scale=1, size=n) for n in ns],
  'Student\'s t': [np.random.standard_t(df=10, size=n) for n in ns],
  'Uniform': [np.random.uniform(low=-1, high=1, size=n) for n in ns],
}

In [4]:
def chi2_test(data, bins, alpha=0.05):
  observed_values, _ = np.histogram(data, bins=bins)
  expected_values = [len(data) / bins] * bins
  chi2, p_val = sp.stats.chisquare(f_obs=observed_values, f_exp=expected_values)

  chi2_critical = sp.stats.chi2.ppf(q=1 - alpha, df=bins - 1)

  return chi2, p_val, chi2_critical

In [5]:
alpha = 0.05
bins = 10

for dist_name, datasets in distributions.items():
  for i, data in enumerate(datasets):
    chi2, p_val, chi2_critical = chi2_test(data, bins)
    print(f'\nDistribution: {dist_name}, n={ns[i]}')
    print(f'Chi-squared: {chi2}, chi2-critical: {chi2_critical}')
    if chi2 < chi2_critical:
      print(f'Do not reject null hypothesis, data follows the expected distribution')
    else:
      print(f'Reject null hypothesis, data does not follow the expected distribution')


Distribution: Normal, n=20
Chi-squared: 5.0, chi2-critical: 16.918977604620448
Do not reject null hypothesis, data follows the expected distribution

Distribution: Normal, n=100
Chi-squared: 51.599999999999994, chi2-critical: 16.918977604620448
Reject null hypothesis, data does not follow the expected distribution

Distribution: Student's t, n=20
Chi-squared: 13.0, chi2-critical: 16.918977604620448
Do not reject null hypothesis, data follows the expected distribution

Distribution: Student's t, n=100
Chi-squared: 45.60000000000001, chi2-critical: 16.918977604620448
Reject null hypothesis, data does not follow the expected distribution

Distribution: Uniform, n=20
Chi-squared: 7.0, chi2-critical: 16.918977604620448
Do not reject null hypothesis, data follows the expected distribution

Distribution: Uniform, n=100
Chi-squared: 5.000000000000001, chi2-critical: 16.918977604620448
Do not reject null hypothesis, data follows the expected distribution


In [9]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from tabulate import tabulate

def chi_square_test(alpha, k, distribution, size, name):
  sample = distribution.rvs(size)
  bins = np.linspace(min(sample), max(sample), k+1)

  frequency, _ = np.histogram(sample, bins=bins)

  p = [distribution.cdf(bins[i+1])-distribution.cdf(bins[i]) for i in range(k)]
  chi2_statistic = np.sum((frequency - np.array(p)*size)**2 / (np.array(p)*size))

  chi2_critical = stats.chi2.ppf(q=1-alpha, df=k-1)

  result = chi2_statistic < chi2_critical
  n = frequency.sum()

  print(f'\n{name} distribution, Sample Size: {size}\n')
  print('chi2_statistic', chi2_statistic)
  print('chi2_critical', chi2_critical)
  print('H0 accepted', result)

  for i in range(k):
    print('Limits', bins[i], bins[i+1])
    print('n_i', frequency[i])
    print('p_i', p[i])
    print('np_i', n * p[i])
    print('n_i - np_i', frequency[i] - n * p[i])
    print('(n_i - np_i)^2 / np_i', (frequency[i] - n * p[i]) ** 2 / (n * p[i]))

alpha = 0.05
k = 10
batches = [
  ('Нормальное', stats.norm()),
  ('Стьюдента', stats.t(10)),
  ('Равномерное', stats.uniform()),
]
sizes = [20, 100]

for name, batch in batches:
  for size in sizes:
    chi_square_test(alpha, 5 if size == 20 else 8, batch, size, name)



Нормальное distribution, Sample Size: 20

chi2_statistic 7.07564291940808
chi2_critical 9.487729036781154
H0 accepted True
Limits -2.5687475522572005 -1.5801961956547927
n_i 3
p_i 0.05192763227266356
np_i 1.0385526454532712
n_i - np_i 1.9614473545467288
(n_i - np_i)^2 / np_i 3.7044590291128054
Limits -1.5801961956547927 -0.5916448390523852
n_i 7
p_i 0.22001324931946198
np_i 4.40026498638924
n_i - np_i 2.5997350136107604
(n_i - np_i)^2 / np_i 1.5359579847803024
Limits -0.5916448390523852 0.3969065175500224
n_i 4
p_i 0.3772375803855214
np_i 7.544751607710428
n_i - np_i -3.5447516077104284
(n_i - np_i)^2 / np_i 1.6654310988216605
Limits 0.3969065175500224 1.3854578741524302
n_i 5
p_i 0.26276194640971495
np_i 5.255238928194299
n_i - np_i -0.2552389281942986
(n_i - np_i)^2 / np_i 0.012396564905215226
Limits 1.3854578741524302 2.3740092307548375
n_i 1
p_i 0.07415819721776817
np_i 1.4831639443553635
n_i - np_i -0.4831639443553635
(n_i - np_i)^2 / np_i 0.15739824178809675

Нормальное distribu