In [None]:
# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
import random
from scipy.stats import expon

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# 1. Basics of Probability: Coin Toss and Dice Roll
# a. Simulate tossing a coin 10,000 times
def simulate_coin_toss(n=10000):
    outcomes = [random.choice(['Heads', 'Tails']) for _ in range(n)]
    heads_count = outcomes.count('Heads')
    tails_count = outcomes.count('Tails')
    p_heads = heads_count / n
    p_tails = tails_count / n
    return p_heads, p_tails

# Run coin toss simulation
p_heads, p_tails = simulate_coin_toss()
print(f"1a. Coin Toss Results (10,000 tosses):")
print(f"Probability of Heads: {p_heads:.4f}")
print(f"Probability of Tails: {p_tails:.4f}")

# b. Simulate rolling two dice and compute probability of sum = 7
def simulate_dice_roll(n=10000):
    sum_seven = 0
    for _ in range(n):
        die1 = random.randint(1, 6)
        die2 = random.randint(1, 6)
        if die1 + die2 == 7:
            sum_seven += 1
    return sum_seven / n

# Run dice roll simulation
p_sum_seven = simulate_dice_roll()
print(f"\n1b. Dice Roll Results (10,000 rolls):")
print(f"Probability of Sum = 7: {p_sum_seven:.4f}")

# 2. Probability of at least one "6" in 10 rolls of a die
def simulate_at_least_one_six(n_trials=10000, rolls_per_trial=10):
    success_count = 0
    for _ in range(n_trials):
        rolls = [random.randint(1, 6) for _ in range(rolls_per_trial)]
        if 6 in rolls:
            success_count += 1
    return success_count / n_trials

# Run simulation for at least one "6"
p_at_least_one_six = simulate_at_least_one_six()
print(f"\n2. Probability of at least one '6' in 10 rolls: {p_at_least_one_six:.4f}")

# 3. Conditional Probability and Bayes' Theorem
def simulate_bag_draws(n=1000):
    balls = ['Red'] * 5 + ['Green'] * 7 + ['Blue'] * 8
    draws = [random.choice(balls) for _ in range(n)]
    
    # Count events for conditional probability
    blue_then_red = 0
    blue_count = 0
    red_count = draws.count('Red')
    
    for i in range(1, n):
        if draws[i-1] == 'Blue':
            blue_count += 1
            if draws[i] == 'Red':
                blue_then_red += 1
    
    # Conditional probability: P(Red | Previous Blue)
    p_red_given_blue = blue_then_red / blue_count if blue_count > 0 else 0
    
    # Probabilities for Bayes' Theorem
    p_red = red_count / n
    p_blue = draws.count('Blue') / n
    p_blue_given_red = blue_then_red / red_count if red_count > 0 else 0
    
    return p_red_given_blue, p_red, p_blue, p_blue_given_red

# Run bag draw simulation
p_red_given_blue, p_red, p_blue, p_blue_given_red = simulate_bag_draws()
print(f"\n3. Bag Draw Simulation (1000 draws):")
print(f"a. P(Red | Previous Blue): {p_red_given_blue:.4f}")
print(f"b. Verifying Bayes' Theorem:")
print(f"P(Red) * P(Blue | Red) = {p_red * p_blue_given_red:.4f}")
print(f"P(Blue) * P(Red | Blue) = {p_blue * p_red_given_blue:.4f}")

# 4. Discrete Random Variable
def simulate_discrete_rv(n=1000):
    values = [1, 2, 3]
    probabilities = [0.25, 0.35, 0.4]
    sample = np.random.choice(values, size=n, p=probabilities)
    mean = np.mean(sample)
    variance = np.var(sample)
    std_dev = np.std(sample)
    return sample, mean, variance, std_dev

# Run discrete random variable simulation
sample, mean, variance, std_dev = simulate_discrete_rv()
print(f"\n4. Discrete Random Variable (1000 samples):")
print(f"Empirical Mean: {mean:.4f}")
print(f"Empirical Variance: {variance:.4f}")
print(f"Empirical Standard Deviation: {std_dev:.4f}")

# 5. Continuous Random Variable: Exponential Distribution
def simulate_exponential(n=2000, mean=5):
    samples = np.random.exponential(scale=mean, size=n)
    
    # Plot histogram
    plt.figure(figsize=(10, 6))
    plt.hist(samples, bins=50, density=True, alpha=0.7, label='Histogram')
    
    # Overlay PDF
    x = np.linspace(0, max(samples), 100)
    pdf = expon.pdf(x, scale=mean)
    plt.plot(x, pdf, 'r-', label='PDF')
    
    plt.title('Exponential Distribution (Mean = 5)')
    plt.xlabel('Value')
    plt.ylabel('Density')
    plt.legend()
    plt.show()
    
    return samples

# Run exponential distribution simulation
samples_exp = simulate_exponential()
print("\n5. Exponential Distribution Plot Generated")

# 6. Central Limit Theorem
def simulate_clt(n_samples=1000, sample_size=30, n_total=10000):
    # Generate population from uniform distribution
    population = np.random.uniform(low=0, high=1, size=n_total)
    
    # Generate sample means
    sample_means = []
    for _ in range(n_samples):
        sample = np.random.choice(population, size=sample_size)
        sample_means.append(np.mean(sample))
    
    # Plot population and sample means
    plt.figure(figsize=(12, 6))
    
    # Population distribution
    plt.subplot(1, 2, 1)
    plt.hist(population, bins=50, density=True, alpha=0.7)
    plt.title('Uniform Population Distribution')
    plt.xlabel('Value')
    plt.ylabel('Density')
    
    # Sample means distribution
    plt.subplot(1, 2, 2)
    plt.hist(sample_means, bins=50, density=True, alpha=0.7)
    plt.title('Distribution of Sample Means (n=30)')
    plt.xlabel('Sample Mean')
    plt.ylabel('Density')
    
    plt.tight_layout()
    plt.show()

# Run CLT simulation
simulate_clt()
print("\n6. Central Limit Theorem Plots Generated")


1. Basics of Probability: Coin Toss and Dice Roll
Coin Toss: The program simulates tossing a fair coin 10,000 times using Python’s random.choice to select 'Heads' or 'Tails'. It counts the occurrences of each outcome and computes their experimental probabilities by dividing by the total number of tosses. The expected probabilities are approximately 0.5 for both heads and tails.
Dice Roll: The program simulates rolling two fair six-sided dice 10,000 times. For each roll, it checks if the sum is 7 and calculates the probability as the proportion of successful rolls. The theoretical probability of a sum of 7 is 6/36 = 1/6 ≈ 0.1667.
2. Probability of At Least One "6"
This function simulates rolling a fair die 10 times per trial for 10,000 trials. It checks if at least one roll in each trial results in a 6 and computes the proportion of trials with at least one 6. The theoretical probability can be calculated as (1 - (5/6)^{10}), and the simulation approximates this value.
3. Conditional Probability and Bayes' Theorem
Conditional Probability: The program simulates drawing a ball (with replacement) from a bag with 5 red, 7 green, and 8 blue balls 1,000 times. It tracks sequences where a blue ball is followed by a red ball to estimate P(Red|Previous Blue).
Bayes' Theorem: The program calculates P(Red),P(Blue), P(Red|Blue), and P(Blue|Red) from the simulation and verifies Bayes' theorem by checking if P(Red).P(Blue|Red) \approx P(Blue).P(Red|Blue). The theoretical probability of drawing a red ball is 5/20 = 0.25, and the conditional probability depends on the simulation data.
5. Discrete Random Variable
The program generates 1,000 samples from a discrete random variable with values 1, 2, and 3 and probabilities 0.25, 0.35, and 0.4, respectively, using numpy.random.choice. It computes the empirical mean, variance, and standard deviation using NumPy’s built-in functions. These values approximate the theoretical mean (\(0.25(1 + 0.35 \cdot 2 + 0.4 \cdot 3 = 2.15\)) and variance.
6. Continuous Random Variable: Exponential Distribution
The program generates 2,000 samples from an exponential distribution with a mean of 5 using numpy.random.exponential. It visualizes the samples with a histogram and overlays the theoretical PDF using scipy.stats.expon. The histogram approximates the shape of the exponential distribution, with the PDF showing the expected density curve.
7. Central Limit Theorem
The program generates 10,000 random numbers from a uniform distribution (0, 1) and draws 1,000 samples of size 30. It calculates the mean of each sample and plots the distribution of these sample means alongside the population’s uniform distribution. The sample means form a bell-shaped curve, illustrating the Central Limit Theorem, which states that the distribution of sample means approaches normality as the sample size increases, regardless of the population distribution.