# Probability Distributions

Understand the shape of data.

## Common Distributions
- **Normal (Gaussian):** Bell curve
- **Binomial:** Success/Failure counts
- **Poisson:** Event counts over time
- **Uniform:** Equal probability

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

sns.set_style('whitegrid')
np.random.seed(42)

## 1. Normal Distribution
Most common distribution in nature.

In [None]:
# Parameters
mu = 0      # Mean
sigma = 1   # Std Dev
x = np.linspace(-4, 4, 100)

# Probability Density Function (PDF)
pdf = stats.norm.pdf(x, mu, sigma)

plt.figure(figsize=(8, 5))
plt.plot(x, pdf, 'b-', lw=3)
plt.fill_between(x, pdf, alpha=0.2, color='b')
plt.title('Standard Normal Distribution')
plt.xlabel('Z-score')
plt.ylabel('Probability Density')
plt.show()

## 2. Binomial Distribution
Number of successes in n trials.

In [None]:
# Parameters
n = 10      # Number of trials (coin flips)
p = 0.5     # Probability of success (heads)

# Generate random numbers
data = stats.binom.rvs(n, p, size=1000)

plt.figure(figsize=(8, 5))
sns.histplot(data, bins=range(n+2), kde=False)
plt.title(f'Binomial Distribution (n={n}, p={p})')
plt.xlabel('Number of Successes')
plt.ylabel('Frequency')
plt.show()

## 3. Poisson Distribution
Number of events in fixed interval.

In [None]:
# Parameter
lam = 3     # Rate (lambda) e.g., emails per hour

# Generate data
data = stats.poisson.rvs(lam, size=1000)

plt.figure(figsize=(8, 5))
sns.histplot(data, bins=15, kde=False, color='g')
plt.title(f'Poisson Distribution (Î»={lam})')
plt.xlabel('Number of Events')
plt.ylabel('Frequency')
plt.show()

## 4. Central Limit Theorem (CLT)
Sampling distribution of mean approaches normal.

In [None]:
# Uniform distribution (non-normal)
population = np.random.uniform(0, 10, 10000)

# Take samples and calculate means
sample_means = []
sample_size = 30

for _ in range(1000):
    sample = np.random.choice(
        population, sample_size
    )
    sample_means.append(sample.mean())

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

axes[0].hist(population, bins=30)
axes[0].set_title('Population (Uniform)')

axes[1].hist(sample_means, bins=30, color='orange')
axes[1].set_title('Sampling Distribution (Normal!)')

plt.show()

## Practice Exercise
Simulate dice rolls and check distribution.

In [None]:
# Role a die 1000 times
# Plot the distribution (Uniform)
# Average 5 dice rolls 1000 times
# Plot the distribution (CLT in action)
# Your code here