## Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [1]:
from scipy.stats import f_oneway

def variance_ratio_test(array1, array2):
    """
    Perform a variance ratio test between two arrays of data.

    Parameters:
    - array1: First array of data.
    - array2: Second array of data.

    Returns:
    - f_value: The calculated F-value for the variance ratio test.
    - p_value: The corresponding p-value for the test.
    """
    # Perform variance ratio test (F-test)
    f_value, p_value = f_oneway(array1, array2)

    return f_value, p_value

# Example usage:
data1 = [10, 12, 15, 8, 11]
data2 = [14, 16, 18, 13, 15]

f_value, p_value = variance_ratio_test(data1, data2)

print("F-value:", f_value)
print("p-value:", p_value)


F-value: 7.692307692307691
p-value: 0.02416573061824416


## Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [2]:
from scipy.stats import f

def critical_f_value(alpha, df_num, df_denom):
    """
    Calculate the critical F-value for a two-tailed test.

    Parameters:
    - alpha: Significance level (e.g., 0.05 for a 5% significance level).
    - df_num: Degrees of freedom for the numerator.
    - df_denom: Degrees of freedom for the denominator.

    Returns:
    - critical_f: The critical F-value for the two-tailed test.
    """
    # Calculate critical F-value
    critical_f = f.ppf(1 - alpha/2, df_num, df_denom)

    return critical_f

# Example usage:
alpha = 0.05
df_num = 3
df_denom = 20

critical_f = critical_f_value(alpha, df_num, df_denom)
print(f"Critical F-value for a two-tailed test: {critical_f:.4f}")


Critical F-value for a two-tailed test: 3.8587


## Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F- value, degrees of freedom, and p-value for the test.

In [3]:
import numpy as np
from scipy.stats import f

def compare_variances(data1, data2):
    """
    Perform an F-test to compare the variances of two datasets.

    Parameters:
    - data1: First dataset (array-like).
    - data2: Second dataset (array-like).

    Returns:
    - f_value: The calculated F-value for the variance ratio test.
    - df_num: Degrees of freedom for the numerator.
    - df_denom: Degrees of freedom for the denominator.
    - p_value: The corresponding p-value for the test.
    """
    # Calculate F-test statistics
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    df_num = len(data1) - 1
    df_denom = len(data2) - 1

    # Perform F-test
    f_value = var1 / var2 if var1 > var2 else var2 / var1
    p_value = 2 * min(f.cdf(f_value, df_num, df_denom), 1 - f.cdf(f_value, df_num, df_denom))

    return f_value, df_num, df_denom, p_value

# Example usage:
np.random.seed(0)  # for reproducibility

# Generating random samples from two normal distributions
data1 = np.random.normal(loc=10, scale=5, size=30)
data2 = np.random.normal(loc=12, scale=5, size=30)

# Performing F-test
f_value, df_num, df_denom, p_value = compare_variances(data1, data2)

# Output results
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_denom)
print("p-value:", p_value)


F-value: 1.4485238079262464
Degrees of freedom (numerator): 29
Degrees of freedom (denominator): 29
p-value: 0.32394803632962454


## Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [4]:
from scipy.stats import f

# Given data
variance1 = 10
variance2 = 15
sample_size = 12
alpha = 0.05

# Calculate the F-statistic
f_statistic = variance1 / variance2

# Degrees of freedom
df_num = sample_size - 1
df_denom = sample_size - 1

# Calculate critical F-value
critical_f_value = f.ppf(1 - alpha/2, df_num, df_denom)

# Conduct the F-test
p_value = 2 * min(f.cdf(f_statistic, df_num, df_denom), 1 - f.cdf(f_statistic, df_num, df_denom))

# Output results
print("F-statistic:", f_statistic)
print("Critical F-value:", critical_f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_denom)
print("p-value:", p_value)

# Compare with the critical region
if f_statistic > critical_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference in variances.")


F-statistic: 0.6666666666666666
Critical F-value: 3.473699051085809
Degrees of freedom (numerator): 11
Degrees of freedom (denominator): 11
p-value: 0.5123897987357996
Fail to reject the null hypothesis. There is no significant difference in variances.


## Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [5]:
from scipy.stats import f

# Given data
claimed_variance = 0.005
sample_variance = 0.006
sample_size = 25
alpha = 0.01  # 1% significance level

# Degrees of freedom
df_num = sample_size - 1
df_denom = 1  # Since we are comparing with a specific value (claimed variance)

# Calculate the F-statistic
f_statistic = (sample_variance / claimed_variance) * df_num

# Calculate critical F-value
critical_f_value = f.ppf(1 - alpha, df_num, df_denom)

# Conduct the F-test
p_value = 1 - f.cdf(f_statistic, df_num, df_denom)

# Output results
print("F-statistic:", f_statistic)
print("Critical F-value:", critical_f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_denom)
print("p-value:", p_value)

# Compare with the critical region
if f_statistic > critical_f_value:
    print("Reject the null hypothesis. The claimed variance is not justified.")
else:
    print("Fail to reject the null hypothesis. The claimed variance is justified.")


F-statistic: 28.799999999999997
Critical F-value: 6234.6308935330835
Degrees of freedom (numerator): 24
Degrees of freedom (denominator): 1
p-value: 0.14625497838405432
Fail to reject the null hypothesis. The claimed variance is justified.


## Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.

In [6]:
def f_distribution_mean_variance(df_num, df_denom):
    """
    Calculate the mean and variance of an F-distribution.

    Parameters:
    - df_num: Degrees of freedom for the numerator.
    - df_denom: Degrees of freedom for the denominator.

    Returns:
    - mean: Mean of the F-distribution.
    - variance: Variance of the F-distribution.
    """
    mean = df_denom / (df_denom - 2)
    variance = (2 * df_denom**2 * (df_num + df_denom - 2)) / (df_num * (df_denom - 2)**2 * (df_denom - 4))

    return mean, variance

# Example usage:
df_num = 3
df_denom = 20

mean, variance = f_distribution_mean_variance(df_num, df_denom)

print(f"Mean of F-distribution: {mean:.4f}")
print(f"Variance of F-distribution: {variance:.4f}")


Mean of F-distribution: 1.1111
Variance of F-distribution: 1.0802


## Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [7]:
from scipy.stats import f

# Given data
sample_variance1 = 25
sample_size1 = 10
sample_variance2 = 20
sample_size2 = 15
alpha = 0.10  # 10% significance level

# Degrees of freedom
df_num1 = sample_size1 - 1
df_num2 = sample_size2 - 1

# Calculate the F-statistic
f_statistic = sample_variance1 / sample_variance2 if sample_variance1 > sample_variance2 else sample_variance2 / sample_variance1

# Degrees of freedom for the numerator and denominator
df_num = max(df_num1, df_num2)
df_denom = min(df_num1, df_num2)

# Calculate critical F-value
critical_f_value = f.ppf(1 - alpha/2, df_num, df_denom)

# Conduct the F-test
p_value = 2 * min(f.cdf(f_statistic, df_num, df_denom), 1 - f.cdf(f_statistic, df_num, df_denom))

# Output results
print("F-statistic:", f_statistic)
print("Critical F-value:", critical_f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_denom)
print("p-value:", p_value)

# Compare with the critical region
if f_statistic > critical_f_value or f_statistic < 1/critical_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference in variances.")


F-statistic: 1.25
Critical F-value: 3.0254727242822095
Degrees of freedom (numerator): 14
Degrees of freedom (denominator): 9
p-value: 0.7537416997292505
Fail to reject the null hypothesis. There is no significant difference in variances.


## Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [9]:
from scipy.stats import f

# Given data
data_a = [24, 25, 28, 23, 22, 20, 27]
data_b = [31, 33, 35, 30, 32, 36]
alpha = 0.05  # 5% significance level

# Calculate sample variances
variance_a = sum((x - sum(data_a) / len(data_a))**2 for x in data_a) / (len(data_a) - 1)
variance_b = sum((x - sum(data_b) / len(data_b))**2 for x in data_b) / (len(data_b) - 1)

# Calculate the F-statistic
f_statistic = variance_a / variance_b if variance_a > variance_b else variance_b / variance_a

# Degrees of freedom
df_num_a = len(data_a) - 1
df_num_b = len(data_b) - 1
df_num = max(df_num_a, df_num_b)
df_denom = min(df_num_a, df_num_b)

# Calculate critical F-value
critical_f_value = f.ppf(1 - alpha/2, df_num, df_denom)

# Conduct the F-test
p_value = 2 * min(f.cdf(f_statistic, df_num, df_denom), 1 - f.cdf(f_statistic, df_num, df_denom))

# Output results
print("F-statistic:", f_statistic)
print("Critical F-value:", critical_f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_denom)
print("p-value:", p_value)

# Compare with the critical region
if f_statistic > critical_f_value or f_statistic < 1/critical_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference in variances.")


F-statistic: 1.4551907719609583
Critical F-value: 6.977701858535566
Degrees of freedom (numerator): 6
Degrees of freedom (denominator): 5
p-value: 0.6974815747937484
Fail to reject the null hypothesis. There is no significant difference in variances.


## Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

In [10]:
from scipy.stats import f

# Given data
group_a = [80, 85, 90, 92, 87, 83]
group_b = [75, 78, 82, 79, 81, 84]
alpha = 0.01  # 1% significance level

# Calculate sample variances
variance_a = sum((x - sum(group_a) / len(group_a))**2 for x in group_a) / (len(group_a) - 1)
variance_b = sum((x - sum(group_b) / len(group_b))**2 for x in group_b) / (len(group_b) - 1)

# Calculate the F-statistic
f_statistic = variance_a / variance_b if variance_a > variance_b else variance_b / variance_a

# Degrees of freedom
df_num_a = len(group_a) - 1
df_num_b = len(group_b) - 1
df_num = max(df_num_a, df_num_b)
df_denom = min(df_num_a, df_num_b)

# Calculate critical F-value
critical_f_value = f.ppf(1 - alpha/2, df_num, df_denom)

# Conduct the F-test
p_value = 2 * min(f.cdf(f_statistic, df_num, df_denom), 1 - f.cdf(f_statistic, df_num, df_denom))

# Output results
print("F-statistic:", f_statistic)
print("Critical F-value:", critical_f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_denom)
print("p-value:", p_value)

# Compare with the critical region
if f_statistic > critical_f_value or f_statistic < 1/critical_f_value:
    print("Reject the null hypothesis. The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference in variances.")


F-statistic: 1.9442622950819677
Critical F-value: 14.939605459912224
Degrees of freedom (numerator): 5
Degrees of freedom (denominator): 5
p-value: 0.4831043549070688
Fail to reject the null hypothesis. There is no significant difference in variances.
