# Random Sampling Techniques and Estimation

## (a) Simple Random Sampling (With and Without Replacement)


Population:  
Y = {12, 15, 18, 20, 22, 25, 27, 30, 32, 35, 14, 16, 19, 21, 23, 26, 28, 31, 33, 36, 
13, 17, 24, 29, 34, 37, 38, 39, 40, 42}


In [None]:

import numpy as np
import pandas as pd

# Store population
population = np.array([12,15,18,20,22,25,27,30,32,35,14,16,19,21,23,26,
                       28,31,33,36,13,17,24,29,34,37,38,39,40,42])

N = len(population)
population


In [None]:

# True population mean and variance
pop_mean = np.mean(population)
pop_var = np.var(population)

pop_mean, pop_var


In [None]:

# Sample with replacement (n=8)
np.random.seed(1)
sample_wr = np.random.choice(population, size=8, replace=True)
sample_wr


In [None]:

# Estimates (with replacement)
np.mean(sample_wr), np.var(sample_wr, ddof=1)


In [None]:

# Sample without replacement (n=8)
sample_wor = np.random.choice(population, size=8, replace=False)
sample_wor


In [None]:

# Estimates (without replacement)
np.mean(sample_wor), np.var(sample_wor, ddof=1)


In [None]:

# Repeated sampling comparison
def repeat_sampling(reps=1000, replace=True):
    means = []
    for _ in range(reps):
        s = np.random.choice(population, size=8, replace=replace)
        means.append(np.mean(s))
    return np.var(means)

var_wr = repeat_sampling(replace=True)
var_wor = repeat_sampling(replace=False)

var_wr, var_wor


## (b) Stratified Random Sampling


Stratum 1: {10,12,14,15,16,18,20,22}  
Stratum 2: {25,27,28,30,32,34,35,36}  
Stratum 3: {40,42,45,47,48,50,52,55}


In [None]:

# Store strata
S1 = np.array([10,12,14,15,16,18,20,22])
S2 = np.array([25,27,28,30,32,34,35,36])
S3 = np.array([40,42,45,47,48,50,52,55])

population_s = np.concatenate([S1,S2,S3])
population_s


In [None]:

# True population mean
np.mean(population_s)


In [None]:

# Proportional allocation (n=6)
n = 6
N1, N2, N3 = len(S1), len(S2), len(S3)
N = N1+N2+N3

n1 = int(n*N1/N)
n2 = int(n*N2/N)
n3 = n - n1 - n2

n1, n2, n3


In [None]:

# Draw stratified sample (proportional)
s1 = np.random.choice(S1, n1, replace=False)
s2 = np.random.choice(S2, n2, replace=False)
s3 = np.random.choice(S3, n3, replace=False)

strat_sample_p = np.concatenate([s1,s2,s3])
strat_sample_p


In [None]:

# Stratified mean (proportional)
np.mean(strat_sample_p)


In [None]:

# Equal allocation (n=2 per stratum)
s1 = np.random.choice(S1, 2, replace=False)
s2 = np.random.choice(S2, 2, replace=False)
s3 = np.random.choice(S3, 2, replace=False)

strat_sample_e = np.concatenate([s1,s2,s3])
strat_sample_e


In [None]:

# Stratified mean (equal)
np.mean(strat_sample_e)


## (c) Cluster Random Sampling


Clusters:  
C1: {10,11,12,13,14}  
C2: {15,16,17,18,19}  
C3: {20,21,22,23,24}  
C4: {30,31,32,33,34}  
C5: {35,36,37,38,39}  
C6: {40,41,42,43,44}


In [None]:

# Store clusters
clusters = {
    'C1':[10,11,12,13,14],
    'C2':[15,16,17,18,19],
    'C3':[20,21,22,23,24],
    'C4':[30,31,32,33,34],
    'C5':[35,36,37,38,39],
    'C6':[40,41,42,43,44]
}

clusters


In [None]:

# Full population
pop_c = np.concatenate(list(clusters.values()))
np.mean(pop_c)


In [None]:

# Select m=2 clusters
np.random.seed(2)
selected = np.random.choice(list(clusters.keys()), size=2, replace=False)
selected


In [None]:

# Observe all units in selected clusters
sample_clusters = np.concatenate([clusters[c] for c in selected])
sample_clusters


In [None]:

# Cluster sampling mean estimate
np.mean(sample_clusters)


In [None]:

# Compare with SRS of same size
srs_sample = np.random.choice(pop_c, size=len(sample_clusters), replace=False)
np.mean(srs_sample)
