In [None]:
  #Answer: 1
   
A point estimate is a single value estimate of a parameter. For instance, a sample mean is a point estimate of 
a population mean. An interval estimate gives you a range of values where the parameter is expected to lie. 
A confidence interval is the most common type of interval estimate.

In [None]:
  #Answer: 2
   
import math
import scipy.stats as stats

def estimate_population_mean(sample_mean, sample_std, sample_size, confidence_level=0.95):
    """
    Estimate the population mean given the sample mean, sample standard deviation,
    sample size, and confidence level.
    
    Parameters:
    sample_mean (float): Mean of the sample
    sample_std (float): Standard deviation of the sample
    sample_size (int): Size of the sample
    confidence_level (float): Confidence level for the interval (default is 0.95)
    
    Returns:
    tuple: (estimated_mean, confidence_interval)
    """
    # Calculate the standard error of the mean
    standard_error = sample_std / math.sqrt(sample_size)
    
    # Determine the critical value from the t-distribution
    degrees_of_freedom = sample_size - 1
    critical_value = stats.t.ppf((1 + confidence_level) / 2, degrees_of_freedom)
    
    # Calculate the margin of error
    margin_of_error = critical_value * standard_error
    
    # Calculate the confidence interval
    confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)
    
    return sample_mean, confidence_interval

# Example usage:
sample_mean = 100
sample_std = 15
sample_size = 30
confidence_level = 0.95

estimated_mean, confidence_interval = estimate_population_mean(sample_mean, sample_std, sample_size, confidence_level)
print(f"Estimated Population Mean: {estimated_mean}")
print(f"95% Confidence Interval: {confidence_interval}")


In [None]:
  #Answer: 3
    
The purpose of hypothesis testing is to test whether the null hypothesis (there is no difference, no effect) can
be rejected or approved. If the null hypothesis is rejected, then the research hypothesis can be accepted. If 
the null hypothesis is accepted, then the research hypothesis is rejected.    

In [None]:
  #Answer: 4
   
To create a hypothesis regarding the average weight of male and female college students, you would typically use a two-sample hypothesis test. Here's how you can state the hypotheses:

### Null Hypothesis (H₀)
The null hypothesis states that there is no difference in the average weight between male and female college students.

\[ H_0: \mu_m = \mu_f \]

Where \( \mu_m \) is the average weight of male college students, and \( \mu_f \) is the average weight of female college students.

### Alternative Hypothesis (H₁)
The alternative hypothesis states that the average weight of male college students is greater than the average weight of female college students.

\[ H_1: \mu_m > \mu_f \]

This is a one-tailed test since we are only interested in whether the average weight of male college students is greater than that of female college students.

### Steps to Test the Hypothesis
1. **Collect Sample Data**: Gather a sample of weights from male and female college students.
2. **Calculate Sample Means and Standard Deviations**: Compute the mean and standard deviation for the weights of both groups.
3. **Conduct a t-test**: Use an independent t-test to compare the means of the two groups.
4. **Determine the p-value**: Check the p-value against your significance level (commonly \( \alpha = 0.05 \)).
5. **Make a Decision**: If the p-value is less than the significance level, reject the null hypothesis in favor of the alternative hypothesis.

### Example in Python
Here's an example of how you might perform this hypothesis test in Python using sample data:

```python
import numpy as np
import scipy.stats as stats

# Sample data
weights_male = np.array([70, 75, 80, 85, 90, 95, 100])
weights_female = np.array([60, 65, 70, 75, 80, 85, 90])

# Calculate the sample means
mean_male = np.mean(weights_male)
mean_female = np.mean(weights_female)

# Calculate the sample standard deviations
std_male = np.std(weights_male, ddof=1)
std_female = np.std(weights_female, ddof=1)

# Conduct the independent t-test
t_statistic, p_value = stats.ttest_ind(weights_male, weights_female, alternative='greater')

print(f"T-statistic: {t_statistic}")
print(f"P-value: {p_value}")

# Significance level
alpha = 0.05

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: The average weight of male college students is greater than that of female college students.")
else:
    print("Fail to reject the null hypothesis: There is not enough evidence to say that the average weight of male college students is greater than that of female college students.")
```

In this example, the `alternative='greater'` argument specifies that we are conducting a one-tailed test to determine if the average weight of male college students is greater than that of female college students.

In [None]:
  #Answer: 5
   
import numpy as np
import scipy.stats as stats

def conduct_hypothesis_test(sample1, sample2, alpha=0.05, alternative='two-sided'):
    """
    Conducts a hypothesis test on the difference between two population means.
    
    Parameters:
    sample1 (array-like): Sample data from the first population
    sample2 (array-like): Sample data from the second population
    alpha (float): Significance level (default is 0.05)
    alternative (str): Alternative hypothesis ('two-sided', 'greater', 'less')
    
    Returns:
    dict: A dictionary with the t-statistic, p-value, and test result
    """
    # Calculate the sample means and standard deviations
    mean1 = np.mean(sample1)
    mean2 = np.mean(sample2)
    std1 = np.std(sample1, ddof=1)
    std2 = np.std(sample2, ddof=1)
    
    # Perform the independent two-sample t-test
    t_statistic, p_value = stats.ttest_ind(sample1, sample2, alternative=alternative)
    
    # Determine the result of the hypothesis test
    if alternative == 'two-sided':
        reject_null = p_value < alpha
    elif alternative == 'greater':
        reject_null = p_value < alpha and mean1 > mean2
    elif alternative == 'less':
        reject_null = p_value < alpha and mean1 < mean2
    else:
        raise ValueError("Alternative hypothesis must be 'two-sided', 'greater', or 'less'")
    
    # Create the result dictionary
    result = {
        't_statistic': t_statistic,
        'p_value': p_value,
        'reject_null': reject_null,
        'sample1_mean': mean1,
        'sample2_mean': mean2,
        'sample1_std': std1,
        'sample2_std': std2,
        'alpha': alpha,
        'alternative': alternative
    }
    
    return result

# Example usage
sample1 = np.array([70, 75, 80, 85, 90, 95, 100])
sample2 = np.array([60, 65, 70, 75, 80, 85, 90])

# Conduct the hypothesis test
test_result = conduct_hypothesis_test(sample1, sample2, alpha=0.05, alternative='greater')

print(f"T-statistic: {test_result['t_statistic']}")
print(f"P-value: {test_result['p_value']}")
print(f"Reject null hypothesis: {test_result['reject_null']}")
print(f"Sample 1 Mean: {test_result['sample1_mean']}")
print(f"Sample 2 Mean: {test_result['sample2_mean']}")
print(f"Sample 1 Std: {test_result['sample1_std']}")
print(f"Sample 2 Std: {test_result['sample2_std']}")
print(f"Alpha: {test_result['alpha']}")
print(f"Alternative Hypothesis: {test_result['alternative']}")


In [None]:
  #Answer: 6
   
Examples: Null Hypothesis: H0: There is no difference in the salary of factory workers based on gender. 
Alternative Hypothesis: Ha: Male factory workers have a higher salary than female factory workers.

In [None]:
  #Answer: 7
   
Step 1: State the Null Hypothesis. 
Step 2: State the Alternative Hypothesis.
Step 3: Set α
Step 4: Collect Data.
Step 5: Calculate a test statistic.
Step 6: Construct Acceptance / Rejection regions.
Step 7: Based on steps 5 and 6, draw a conclusion about H0.

In [None]:
  #Answer: 8
   
A p-value is a statistical measurement used to validate a hypothesis against observed data. A p-value measures
the probability of obtaining the observed results, assuming that the null hypothesis is true. The lower the
p-value, the greater the statistical significance of the observed difference.

In [None]:
  #Answer: 9
   
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

# Degrees of freedom
df = 10

# Generate x values
x = np.linspace(-4, 4, 1000)

# Generate y values for the t-distribution
y = stats.t.pdf(x, df)

# Plot the t-distribution
plt.figure(figsize=(10, 6))
plt.plot(x, y, label=f't-distribution (df={df})', color='blue')

# Add title and labels
plt.title("Student's t-Distribution")
plt.xlabel('x')
plt.ylabel('Probability Density')

# Add a legend
plt.legend()

# Show the plot
plt.grid(True)
plt.show()


In [None]:
  #Answer: 10
   
import numpy as np
import scipy.stats as stats

def two_sample_t_test(sample1, sample2, alpha=0.05):
    """
    Conducts a two-sample t-test for independent samples.
    
    Parameters:
    sample1 (array-like): Sample data from the first population
    sample2 (array-like): Sample data from the second population
    alpha (float): Significance level (default is 0.05)
    
    Returns:
    dict: A dictionary with the t-statistic, p-value, and test result
    """
    # Calculate the sample means and standard deviations
    mean1 = np.mean(sample1)
    mean2 = np.mean(sample2)
    std1 = np.std(sample1, ddof=1)
    std2 = np.std(sample2, ddof=1)
    
    # Perform the independent two-sample t-test
    t_statistic, p_value = stats.ttest_ind(sample1, sample2)
    
    # Determine the result of the hypothesis test
    reject_null = p_value < alpha
    
    # Create the result dictionary
    result = {
        't_statistic': t_statistic,
        'p_value': p_value,
        'reject_null': reject_null,
        'sample1_mean': mean1,
        'sample2_mean': mean2,
        'sample1_std': std1,
        'sample2_std': std2,
        'alpha': alpha
    }
    
    return result

# Example usage
sample1 = np.random.normal(50, 10, 30)  # Random sample from population 1
sample2 = np.random.normal(55, 10, 30)  # Random sample from population 2

# Conduct the hypothesis test
test_result = two_sample_t_test(sample1, sample2, alpha=0.05)

print(f"T-statistic: {test_result['t_statistic']}")
print(f"P-value: {test_result['p_value']}")
print(f"Reject null hypothesis: {test_result['reject_null']}")
print(f"Sample 1 Mean: {test_result['sample1_mean']}")
print(f"Sample 2 Mean: {test_result['sample2_mean']}")
print(f"Sample 1 Std: {test_result['sample1_std']}")
print(f"Sample 2 Std: {test_result['sample2_std']}")
print(f"Alpha: {test_result['alpha']}")


In [None]:
  #Answer: 11
   
Student's t-distribution, also known as the t-distribution, is a probability distribution that is used in
statistics for making inferences about the population mean when the sample size is small or when the population
standard deviation is unknown.

In [None]:
  #Answer: 12
   
In statistics, the t-statistic is the ratio of the difference in a number's estimated value from its assumed 
value to its standard error. It is used in hypothesis testing via Student's t-test. The t-statistic is used in
a t-test to determine whether to support or reject the null hypothesis.

In [None]:
  #Answer: 13
   
import math

# Given data
sample_mean = 500
sample_std = 50
sample_size = 50
confidence_level = 0.95

# Critical value for 95% confidence level
z_alpha_2 = 1.96  # for 95% confidence level

# Standard error of the mean
standard_error = sample_std / math.sqrt(sample_size)

# Margin of error
margin_of_error = z_alpha_2 * standard_error

# Confidence interval
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

# Print the result
print(f"Estimated Population Mean: {sample_mean}")
print(f"95% Confidence Interval: {confidence_interval}")


In [None]:
  #Answer: 14
   
import scipy.stats as stats

# Given data
mu_0 = 10
sample_mean = 8
sample_std = 3
sample_size = 100
alpha = 0.05

# Calculate the test statistic
t_statistic = (sample_mean - mu_0) / (sample_std / (sample_size ** 0.5))

# Calculate the critical value for a two-tailed test
critical_value = stats.t.ppf(1 - alpha / 2, df=sample_size - 1)

# Calculate the p-value
p_value = 2 * (1 - stats.t.cdf(abs(t_statistic), df=sample_size - 1))

# Determine whether to reject the null hypothesis
reject_null = p_value < alpha

# Print the results
print(f"T-statistic: {t_statistic:.2f}")
print(f"Critical value: ±{critical_value:.2f}")
print(f"P-value: {p_value:.4f}")
print(f"Reject null hypothesis: {reject_null}")


In [None]:
  #Answer: 15
   
import scipy.stats as stats

# Given data
mu_0 = 5
sample_mean = 4.8
sample_std = 0.5
sample_size = 25
alpha = 0.01

# Calculate the test statistic
t_statistic = (sample_mean - mu_0) / (sample_std / (sample_size ** 0.5))

# Calculate the critical value for a one-tailed test
critical_value = stats.t.ppf(alpha, df=sample_size - 1)

# Calculate the p-value
p_value = stats.t.cdf(t_statistic, df=sample_size - 1)

# Determine whether to reject the null hypothesis
reject_null = p_value < alpha

# Print the results
print(f"T-statistic: {t_statistic:.2f}")
print(f"Critical value: {critical_value:.2f}")
print(f"P-value: {p_value:.4f}")
print(f"Reject null hypothesis: {reject_null}")


In [None]:
  #Answer: 16
   
import math
import scipy.stats as stats

# Given data
n1 = 30
mean1 = 80
std1 = 10

n2 = 40
mean2 = 75
std2 = 8

alpha = 0.01

# Calculate the pooled variance
pooled_variance = (((n1 - 1) * std1**2) + ((n2 - 1) * std2**2)) / (n1 + n2 - 2)

# Calculate the test statistic
t_statistic = (mean1 - mean2) / math.sqrt(pooled_variance * (1/n1 + 1/n2))

# Degrees of freedom
df = n1 + n2 - 2

# Calculate the critical value for a two-tailed test
critical_value = stats.t.ppf(1 - alpha/2, df)

# Calculate the p-value
p_value = (1 - stats.t.cdf(abs(t_statistic), df)) * 2

# Determine whether to reject the null hypothesis
reject_null = p_value < alpha

# Print the results
print(f"T-statistic: {t_statistic:.2f}")
print(f"Critical value: ±{critical_value:.2f}")
print(f"P-value: {p_value:.4f}")
print(f"Reject null hypothesis: {reject_null}")


In [None]:
  #Answer: 17
   
import math
import scipy.stats as stats

# Given data
sample_mean = 4
sample_std = 1.5
sample_size = 50
confidence_level = 0.99

# Critical value for 99% confidence level
z_alpha_2 = stats.norm.ppf(1 - (1 - confidence_level) / 2)

# Standard error of the mean
standard_error = sample_std / math.sqrt(sample_size)

# Margin of error
margin_of_error = z_alpha_2 * standard_error

# Confidence interval
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

# Print the result
print(f"Sample Mean: {sample_mean}")
print(f"99% Confidence Interval: {confidence_interval}")
