Q1. Calculate the 95% confidence interval for a sample of data with a mean of 50 and a standard deviation
of 5 using Python. Interpret the results.

In [None]:
import scipy.stats as stats

# Sample data
mean = 50
std_dev = 5
sample_size = 100  # Assuming sample size of 100 for demonstration purposes

# Calculate the confidence interval
confidence_level = 0.95
z_score = stats.norm.ppf((1 + confidence_level) / 2)  # Two-tailed test
margin_of_error = z_score * (std_dev / (sample_size ** 0.5))
confidence_interval = (mean - margin_of_error, mean + margin_of_error)

print("95% Confidence Interval:", confidence_interval)

Q2. Conduct a chi-square goodness of fit test to determine if the distribution of colors of M&Ms in a bag
matches the expected distribution of 20% blue, 20% orange, 20% green, 10% yellow, 10% red, and 20%
brown. Use Python to perform the test with a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chisquare

# Observed frequencies
observed = np.array([18, 22, 16, 12, 10, 22])  # Blue, Orange, Green, Yellow, Red, Brown

# Expected frequencies
expected = np.array([20, 20, 20, 10, 10, 20])  # Expected percentages for each color

# Perform chi-square goodness of fit test
chi2_stat, p_val = chisquare(observed, expected)

print("Chi-square statistic:", chi2_stat)
print("p-value:", p_val)

alpha = 0.05
if p_val < alpha:
    print("Reject the null hypothesis: The distribution of colors in the bag does not match the expected distribution.")
else:
    print("Fail to reject the null hypothesis: The distribution of colors in the bag matches the expected distribution.")

Q3. Use Python to calculate the chi-square statistic and p-value for a contingency table with the following
data:

Interpret the results of the test.

Q4. A study of the prevalence of smoking in a population of 500 individuals found that 60 individuals
smoked. Use Python to calculate the 95% confidence interval for the true proportion of individuals in the
population who smoke.

In [None]:
import scipy.stats as stats

# Sample data
n = 500  # Total population size
smokers = 60  # Number of smokers

# Calculate the proportion of smokers
p_hat = smokers / n

# Calculate the confidence interval
confidence_level = 0.95
z_score = stats.norm.ppf((1 + confidence_level) / 2)  # Two-tailed test
margin_of_error = z_score * ((p_hat * (1 - p_hat)) / n) ** 0.5
confidence_interval = (p_hat - margin_of_error, p_hat + margin_of_error)

print("95% Confidence Interval for the proportion of smokers:", confidence_interval)

Q5. Calculate the 90% confidence interval for a sample of data with a mean of 75 and a standard deviation
of 12 using Python. Interpret the results.

In [None]:
import scipy.stats as stats

# Sample data
mean = 75
std_dev = 12
sample_size = 100  # Assuming sample size of 100 for demonstration purposes

# Calculate the confidence interval
confidence_level = 0.90
z_score = stats.norm.ppf((1 + confidence_level) / 2)  # Two-tailed test
margin_of_error = z_score * (std_dev / (sample_size ** 0.5))
confidence_interval = (mean - margin_of_error, mean + margin_of_error)

print("90% Confidence Interval:", confidence_interval)

Q6. Use Python to plot the chi-square distribution with 10 degrees of freedom. Label the axes and shade the
area corresponding to a chi-square statistic of 15.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

# Degrees of freedom
df = 10

# Generate chi-square values
x = np.linspace(0, 30, 1000)
y = stats.chi2.pdf(x, df)

# Plot the chi-square distribution
plt.plot(x, y, 'b-', lw=2)

# Shade the area corresponding to chi-square statistic of 15
x_fill = np.linspace(15, 30, 100)
y_fill = stats.chi2.pdf(x_fill, df)
plt.fill_between(x_fill, y_fill, color='gray', alpha=0.5)

# Label the axes
plt.xlabel('Chi-square Value')
plt.ylabel('Probability Density Function')
plt.title('Chi-square Distribution (df=10)')

plt.show()

Q7. A random sample of 1000 people was asked if they preferred Coke or Pepsi. Of the sample, 520
preferred Coke. Calculate a 99% confidence interval for the true proportion of people in the population who
prefer Coke.

In [None]:
import scipy.stats as stats

# Sample data
n = 1000  # Total sample size
coke_preferred = 520  # Number of people who prefer Coke

# Calculate the proportion of people who prefer Coke
p_hat = coke_preferred / n

# Calculate the confidence interval
confidence_level = 0.99
z_score = stats.norm.ppf((1 + confidence_level) / 2)  # Two-tailed test
margin_of_error = z_score * ((p_hat * (1 - p_hat)) / n) ** 0.5
confidence_interval = (p_hat - margin_of_error, p_hat + margin_of_error)

print("99% Confidence Interval for the proportion of people who prefer Coke:", confidence_interval)

Q8. A researcher hypothesizes that a coin is biased towards tails. They flip the coin 100 times and observe
45 tails. Conduct a chi-square goodness of fit test to determine if the observed frequencies match the
expected frequencies of a fair coin. Use a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chisquare

# Observed frequencies
observed = np.array([45, 55])  # Tails, Heads

# Expected frequencies for a fair coin
expected = np.array([50, 50])  # Expected frequencies for fair coin

# Perform chi-square goodness of fit test
chi2_stat, p_val = chisquare(observed, expected)

print("Chi-square statistic:", chi2_stat)
print("p-value:", p_val)

alpha = 0.05
if p_val < alpha:
    print("Reject the null hypothesis: The observed frequencies do not match the expected frequencies of a fair coin.")
else:
    print("Fail to reject the null hypothesis: The observed frequencies match the expected frequencies of a fair coin.")

Q11. A random sample of 30 people was selected from a population with an unknown mean and standard
deviation. The sample mean was found to be 72 and the sample standard deviation was found to be 10.
Conduct a hypothesis test to determine if the population mean is significantly different from 70. Use a
significance level of 0.05.

In [None]:
import scipy.stats as stats

# Given data
sample_mean = 72
sample_std_dev = 10
population_mean = 70
sample_size = 30
alpha = 0.05

# Calculate the t-statistic
t_statistic = (sample_mean - population_mean) / (sample_std_dev / (sample_size ** 0.5))

# Find the critical t-value
critical_t_value = stats.t.ppf(1 - alpha/2, df=sample_size - 1)  # Two-tailed test

# Determine if we reject the null hypothesis
if abs(t_statistic) > critical_t_value:
    print("Reject the null hypothesis: The population mean is significantly different from 70.")
else:
    print("Fail to reject the null hypothesis: There is not enough evidence to conclude that the population mean is significantly different from 70.")