In [1]:
import torch

In [7]:
# 1. ACTUAL POPULATION (10,000 people)
# Average height is 160 cm
population_heights = torch.normal(mean=160.0, std=10.0, size=(10000,))

In [8]:
# 2. BIASED SAMPLING (The "How" of the error)
# Imagine we only pick the top 500 tallest people for our dataset
# This simulates a bias where we only collected data from a specific group
biased_sample = torch.topk(population_heights, k=500).values

In [9]:
# 3. UNBIASED RANDOM SAMPLING (The correct way)
# Randomly picking 500 people from the whole population
unbiased_sample = population_heights[torch.randperm(10000)[:500]]

In [10]:
# COMPARISON
print(f"True Population Mean: {population_heights.mean().item():.2f}")
print(f"Biased Sample Mean (Error): {biased_sample.mean().item():.2f}")
print(f"Unbiased Sample Mean (Correct): {unbiased_sample.mean().item():.2f}")

True Population Mean: 159.85
Biased Sample Mean (Error): 180.34
Unbiased Sample Mean (Correct): 159.97
