In [1]:
# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
# test. The function should return the F-value and the corresponding p-value for the test.
from scipy.stats import f
import numpy as np

def variance_ratio_test(data1, data2):
    # Calculate sample variances
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)

    # Determine the larger and smaller variance
    if var1 > var2:
        F = var1 / var2
        dfn = len(data1) - 1  # degrees of freedom numerator
        dfd = len(data2) - 1  # degrees of freedom denominator
    else:
        F = var2 / var1
        dfn = len(data2) - 1
        dfd = len(data1) - 1

    # Calculate the p-value
    p_value = 2 * min(f.cdf(F, dfn, dfd), 1 - f.cdf(F, dfn, dfd))  # two-tailed

    return F, p_value
sample1 = [12, 15, 14, 10, 13, 14]
sample2 = [9, 8, 11, 9, 10, 7]

f_val, p_val = variance_ratio_test(sample1, sample2)
print(f"F-value: {f_val:.4f}, p-value: {p_val:.4f}")


F-value: 1.6000, p-value: 0.6186


In [6]:
# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
# F-distribution, write a Python function that returns the critical F-value for a two-tailed test.
def critical_f_value(alpha, dfn, dfd):
    f_ciritcal_right = f.ppf(1 - alpha /2, dfn, dfd)
    f_ciritcal_left = f.ppf(alpha/2, dfn, dfd)
    return (f_ciritcal_left,f_ciritcal_right)

alpha = 0.05
dfn = 3
dfd = 10

critical_left, critical_right = critical_f_value(alpha, dfn, dfd)
print(f"Critcal F-value left: {critical_right}, F-value right: {critical_left}")

Critcal F-value left: 4.825621493405406, F-value right: 0.06935321586551411


In [7]:
# Q3. Write a Python program that generates random samples from two normal distributions with known
# variances and uses an F-test to determine if the variances are equal. The program should output the F-
# value, degrees of freedom, and p-value for the test.
"""
Generates random samples from two normal distributions (with known variances).

Performs an F-test to determine if their variances are significantly different.

Outputs the F-value, degrees of freedom, and p-value.
"""
import numpy as np
from scipy.stats import f

def f_test(sample1, sample2):
    # Calculate sample variances
    var1 = np.var(sample1, ddof=1)
    var2 = np.var(sample2, ddof=1)

    # Determine which variance is larger (F = larger / smaller)
    if var1 > var2:
        F = var1 / var2
        dfn = len(sample1) - 1
        dfd = len(sample2) - 1
    else:
        F = var2 / var1
        dfn = len(sample2) - 1
        dfd = len(sample1) - 1

    # Compute the p-value (two-tailed)
    p_value = 2 * min(f.cdf(F, dfn, dfd), 1 - f.cdf(F, dfn, dfd))

    return F, dfn, dfd, p_value

# Generate random samples
np.random.seed(42)  # for reproducibility
sample1 = np.random.normal(loc=10, scale=2, size=30)  # variance = 4
sample2 = np.random.normal(loc=10, scale=3, size=30)  # variance = 9

# Run F-test
F, dfn, dfd, p_value = f_test(sample1, sample2)

# Display results
print(f"F-value: {F:.4f}")
print(f"Degrees of freedom: ({dfn}, {dfd})")
print(f"P-value: {p_value:.4f}")


F-value: 2.4082
Degrees of freedom: (29, 29)
P-value: 0.0209


### Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.
Ans: \
To conduct an **F-test** for comparing two known population variances, we use:

### **F-statistic formula**:
$$
[
F = \frac{s_1^2}{s_2^2}
]
$$
Where:
- $( s_1^2 ) and ( s_2^2 )$ are the sample variances (we usually place the **larger** variance in the numerator),
- Degrees of freedom are $( df_1 = n_1 - 1 ), ( df_2 = n_2 - 1 )$

---

###  Given:
- Population variances: $( \sigma_1^2 = 10 ), ( \sigma_2^2 = 15 )$
- Sample sizes: $( n_1 = n_2 = 12 )$
- Significance level: $( \alpha = 0.05 )$


---

### Interpretation:

- If **F** lies outside the critical region, we reject the null hypothesis and conclude the variances are significantly different.
- Otherwise, we fail to reject the null.

In [8]:
# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
# each population. Conduct an F-test at the 5% significance level to determine if the variances are
# significantly different.
from scipy.stats import f

# Given variances and sample sizes
var1 = 10
var2 = 15
n1 = n2 = 12

# Always place the larger variance in the numerator
F = max(var1, var2) / min(var1, var2)
dfn = dfd = n1 - 1  # degrees of freedom

# Two-tailed test critical values
alpha = 0.05
f_critical_low = f.ppf(alpha / 2, dfn, dfd)
f_critical_high = f.ppf(1 - alpha / 2, dfn, dfd)

# Check if F is outside the critical region
result = "Reject null hypothesis (variances are significantly different)" if F < f_critical_low or F > f_critical_high else "Fail to reject null hypothesis (variances are not significantly different)"

# Display results
print(f"F-value: {F:.4f}")
print(f"Degrees of freedom: ({dfn}, {dfd})")
print(f"Critical F-values: [{f_critical_low:.4f}, {f_critical_high:.4f}]")
print(f"Conclusion: {result}")


F-value: 1.5000
Degrees of freedom: (11, 11)
Critical F-values: [0.2879, 3.4737]
Conclusion: Fail to reject null hypothesis (variances are not significantly different)


### Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.
Ans: \

To test the manufacturer’s claim about the **variance**, we can use a **Chi-Square test** (not F-test) because we are comparing a **sample variance to a known population variance**.

---

### **Chi-Square Test for Variance**

We use the formula:
$$
[
\chi^2 = \frac{(n - 1) \cdot s^2}{\sigma^2}
]
$$

Where:
- $( n )$ = sample size = 25  
- $( s^2 )$ = sample variance = 0.006  
- $( \sigma^2 )$ = claimed population variance = 0.005  
- Degrees of freedom = $( n - 1 = 24 )$

###  Interpretation:
- If the chi-square value falls **outside** the critical region, we **reject** the null hypothesis.
- Otherwise, we **fail to reject** the claim, meaning the sample supports the manufacturer’s claim.

In [9]:
from scipy.stats import chi2

# Given values
n = 25
sample_variance = 0.006
claimed_variance = 0.005
df = n - 1
alpha = 0.01

# Chi-square statistic
chi2_stat = (df * sample_variance) / claimed_variance

# Critical values for two-tailed test
chi2_critical_low = chi2.ppf(alpha / 2, df)
chi2_critical_high = chi2.ppf(1 - alpha / 2, df)

# Conclusion
if chi2_stat < chi2_critical_low or chi2_stat > chi2_critical_high:
    result = "Reject the null hypothesis — variance is significantly different."
else:
    result = "Fail to reject the null hypothesis — no significant difference in variance."

# Output
print(f"Chi-square Statistic: {chi2_stat:.4f}")
print(f"Critical Values: [{chi2_critical_low:.4f}, {chi2_critical_high:.4f}]")
print(f"Conclusion: {result}")


Chi-square Statistic: 28.8000
Critical Values: [9.8862, 45.5585]
Conclusion: Fail to reject the null hypothesis — no significant difference in variance.


### Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return themean and variance as a tuple.
Ans: \

a simple Python function that calculates the **mean** and **variance** of an F-distribution, given the degrees of freedom for the numerator $((d_1))$ and denominator $((d_2))$:

---

### **F-distribution Mean & Variance Formula**

- **Mean**:  
  $$
  [
  \mu = \frac{d_2}{d_2 - 2}, \quad \text{for } d_2 > 2
  ]
  $$

- **Variance**:  
  $$
  [
  \sigma^2 = \frac{2 \cdot d_2^2 \cdot (d_1 + d_2 - 2)}{d_1 \cdot (d_2 - 2)^2 \cdot (d_2 - 4)}, \quad \text{for } d_2 > 4
  ]
  $$

In [12]:
def f_distribution_stats(d1, d2):
    if d2 <= 2:
        mean = float('inf')  # Mean is undefined
    else:
        mean = d2 / (d2 - 2)

    if d2 <= 4:
        variance = float('inf')  # Variance is undefined
    else:
        numerator = 2 * d2**2 * (d1 + d2 - 2)
        denominator = d1 * (d2 - 2)**2 * (d2 - 4)
        variance = numerator / denominator

    return mean, variance

mean, var = f_distribution_stats(5, 10)
print(f"Mean: {mean:.4f}, Variance: {var:.4f}")

"""
Returns ∞ if the mean or variance is undefined based on degrees of freedom.

Works well for typical F-test scenarios.
"""

Mean: 1.2500, Variance: 1.3542


'\nReturns ∞ if the mean or variance is undefined based on degrees of freedom.\n\nWorks well for typical F-test scenarios.\n'

### Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.
Ans: \
To conduct an **F-test** for comparing two variances, we follow these steps:

---

###  **Given:**

- Sample 1:
  - $( n_1 = 10 ), ( s_1^2 = 25 )$
- Sample 2:
  - $( n_2 = 15 ), ( s_2^2 = 20 )$
- Significance level: $( \alpha = 0.10 )$

---

### **Step-by-step Calculation:**

#### 1. **Calculate the F-statistic**:
We always place the larger variance in the numerator:
$$
[
F = \frac{25}{20} = 1.25
]
$$
#### 2. **Degrees of Freedom**:
- $( df_1 = n_1 - 1 = 9 )$
- $( df_2 = n_2 - 1 = 14 )$


In [13]:
from scipy.stats import f

# Sample data
s1_sq = 25
s2_sq = 20
n1 = 10
n2 = 15

# F-statistic
F = s1_sq / s2_sq
df1 = n1 - 1
df2 = n2 - 1

# Two-tailed p-value
p_value = 2 * min(f.cdf(F, df1, df2), 1 - f.cdf(F, df1, df2))

# Critical values for 10% significance level (two-tailed)
alpha = 0.10
f_critical_low = f.ppf(alpha / 2, df1, df2)
f_critical_high = f.ppf(1 - alpha / 2, df1, df2)

# Print results
print(f"F-statistic: {F:.4f}")
print(f"Degrees of freedom: df1 = {df1}, df2 = {df2}")
print(f"Critical F-values: lower = {f_critical_low:.4f}, upper = {f_critical_high:.4f}")
print(f"p-value: {p_value:.4f}")

# Decision
if F < f_critical_low or F > f_critical_high:
    print("Reject the null hypothesis: Variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: No significant difference in variances.")


F-statistic: 1.2500
Degrees of freedom: df1 = 9, df2 = 14
Critical F-values: lower = 0.3305, upper = 2.6458
p-value: 0.6832
Fail to reject the null hypothesis: No significant difference in variances.


In [14]:
# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
# night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
# significance level to determine if the variances are significantly different.
import numpy as np
from scipy.stats import f

# Data
restaurant_a = np.array([24, 25, 28, 23, 22, 20, 27])
restaurant_b = np.array([31, 33, 35, 30, 32, 36])

# Sample sizes
n1 = len(restaurant_a)
n2 = len(restaurant_b)

# Sample variances
s1_sq = np.var(restaurant_a, ddof=1)
s2_sq = np.var(restaurant_b, ddof=1)

# Choose larger variance for numerator
if s1_sq > s2_sq:
    F = s1_sq / s2_sq
    df1, df2 = n1 - 1, n2 - 1
else:
    F = s2_sq / s1_sq
    df1, df2 = n2 - 1, n1 - 1

# Significance level
alpha = 0.05

# Critical F-values for two-tailed test
f_critical_low = f.ppf(alpha / 2, df1, df2)
f_critical_high = f.ppf(1 - alpha / 2, df1, df2)

# p-value (two-tailed)
p_value = 2 * min(f.cdf(F, df1, df2), 1 - f.cdf(F, df1, df2))

# Output
print(f"Sample Variance A: {s1_sq:.4f}")
print(f"Sample Variance B: {s2_sq:.4f}")
print(f"F-statistic: {F:.4f}")
print(f"Degrees of Freedom: df1 = {df1}, df2 = {df2}")
print(f"Critical F-values: Lower = {f_critical_low:.4f}, Upper = {f_critical_high:.4f}")
print(f"p-value: {p_value:.4f}")

# Decision
if F < f_critical_low or F > f_critical_high:
    print("Reject the null hypothesis: Variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: No significant difference in variances.")
"""
If p<0.05: reject 𝐻0 (variances differ)
else : fail to reject

Sample Variance A: 7.8095
Sample Variance B: 5.3667
F-statistic: 1.4552
Degrees of Freedom: df1 = 6, df2 = 5
Critical F-values: Lower = 0.1670, Upper = 6.9777
p-value: 0.6975
Fail to reject the null hypothesis: No significant difference in variances.


In [16]:
# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
# Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
# are significantly different.
import numpy as np
from scipy.stats import f

# Test scores
group_a = np.array([80, 85, 90, 92, 87, 83])
group_b = np.array([75, 78, 82, 79, 81, 84])

# Sample sizes
n1 = len(group_a)
n2 = len(group_b)

# Sample variances (unbiased)
s1_sq = np.var(group_a, ddof=1)
s2_sq = np.var(group_b, ddof=1)

# Assign larger variance to numerator
if s1_sq > s2_sq:
    F = s1_sq / s2_sq
    df1, df2 = n1 - 1, n2 - 1
else:
    F = s2_sq / s1_sq
    df1, df2 = n2 - 1, n1 - 1

# Significance level
alpha = 0.01

# Critical F-values for two-tailed test
f_crit_low = f.ppf(alpha / 2, df1, df2)
f_crit_high = f.ppf(1 - alpha / 2, df1, df2)

# p-value (two-tailed)
p_value = 2 * min(f.cdf(F, df1, df2), 1 - f.cdf(F, df1, df2))

# Output results
print(f"Variance Group A: {s1_sq:.4f}")
print(f"Variance Group B: {s2_sq:.4f}")
print(f"F-statistic: {F:.4f}")
print(f"Degrees of Freedom: df1 = {df1}, df2 = {df2}")
print(f"Critical F-values: Lower = {f_crit_low:.4f}, Upper = {f_crit_high:.4f}")
print(f"p-value: {p_value:.4f}")

# Decision
if F < f_crit_low or F > f_crit_high:
    print("Reject the null hypothesis: Variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: No significant difference in variances.")
"""
Null hypo(h0): The variance are equal
Alternate hypo(h1): The variance are not equal
if p<0.01: reject 𝐻0 (variances differ)
else : fail to reject
"""


Variance Group A: 19.7667
Variance Group B: 10.1667
F-statistic: 1.9443
Degrees of Freedom: df1 = 5, df2 = 5
Critical F-values: Lower = 0.0669, Upper = 14.9396
p-value: 0.4831
Fail to reject the null hypothesis: No significant difference in variances.


'\nNull hypo(h0): The variance are equal\nAlternate hypo(h1): The variance are not equal\nif p<0.01: reject 𝐻0 (variances differ)\nelse : fail to reject\n'