In [36]:
import numpy as np
import pandas as pd


population_sizes = [10000, 100000, 1000000]


def simple_random(population, M):
    return np.random.choice(population, M, replace=False)

def bernoulli(population, M):
    N = len(population)
    sample = population[np.random.binomial(1, M/N, N).astype(bool)]
    if len(sample) > M:
        sample = np.random.choice(sample, M, replace=False)
    elif len(sample) < M:
        additional_sample = np.random.choice(population, M - len(sample), replace=False)
        sample = np.concatenate((sample, additional_sample))
    return sample

def systematic(population, M):
    N = len(population)
    step = N // M
    start = np.random.randint(step)
    return population[start::step][:M]

def stratified(population, M):
    strata = np.array_split(np.sort(population), 3)
    strata_sizes = [len(stratum) for stratum in strata]
    sample_sizes = [int(M * (stratum_size / len(population))) for stratum_size in strata_sizes]
    # Ensure the total sample size is M
    sample_sizes[0] += M - sum(sample_sizes)
    return np.concatenate([np.random.choice(stratum, sample_size, replace=False) 
                           for stratum, sample_size in zip(strata, sample_sizes)])

sample_methods = {
    "simple_random": simple_random,
    "bernoulli": bernoulli,
    "systematic": systematic,
    "stratified": stratified
}

for N in population_sizes:
    # Step 1: Simulate the realization of a random variable with a uniform distribution
    population = np.random.uniform(-1, 1, N)

    # Step 2: Calculate the average value and the sum of all elements of the resulting realization
    population_mean = np.mean(population)
    population_sum = np.sum(population)

    print(f"Population size: {N}")
    print(f"Population mean: {population_mean}")
    print(f"Population sum: {population_sum}")

    # Step 3: Form a sample of length M
    M = 1000
    for method_name, method_func in sample_methods.items():
        sample = method_func(population, M)

        # Step 4: Calculate the sum of all items and the mean for the sample
        sample_mean = np.mean(sample)
        sample_sum = np.sum(sample)

        print(f"\nSample method: {method_name}")
        print(f"Sample mean: {sample_mean}")
        print(f"Sample sum: {sample_sum}")

        # Step 5: Estimate the mean and sum of all items for the entire population using the sample data
        estimated_population_mean = sample_mean
        estimated_population_sum = sample_sum * (N / M)

        print(f"Estimated population mean: {estimated_population_mean}")
        print(f"Estimated population sum: {estimated_population_sum}")

    print("\n" + "="*50 + "\n")


Population size: 10000
Population mean: -0.005039073825976743
Population sum: -50.39073825976743

Sample method: simple_random
Sample mean: -0.0033306565612900227
Sample sum: -3.3306565612900227
Estimated population mean: -0.0033306565612900227
Estimated population sum: -33.30656561290023

Sample method: bernoulli
Sample mean: -0.010701077606541595
Sample sum: -10.701077606541595
Estimated population mean: -0.010701077606541595
Estimated population sum: -107.01077606541595

Sample method: systematic
Sample mean: -0.005290226658497031
Sample sum: -5.290226658497031
Estimated population mean: -0.005290226658497031
Estimated population sum: -52.902266584970306

Sample method: stratified
Sample mean: 0.00038337343366930555
Sample sum: 0.38337343366930554
Estimated population mean: 0.00038337343366930555
Estimated population sum: 3.8337343366930554


Population size: 100000
Population mean: -0.0005735980380574555
Population sum: -57.35980380574555

Sample method: simple_random
Sample mean: 