In [14]:
import numpy as np
import pandas as pd
import scipy.stats as stats

# Your data
data = {'Male': [12, 7, 7, 10, 8, 10, 11, 9, 9, 13, 4, 9, 12, 11, 9, 9, 7, 12, 10, 13, 11, 10, 6, 12, 11, 9, 10, 12, 8, 9, 13, 10, 9, 7, 10, 15, 8, 9, 11, 13, 10, 13],
        'Female': [11, 10, 11, 10, 11, 12, 12, 10, 9, 9, 9, 10, 8, 7, 12, 9, 7, 8, 9, 8, 7, 7, 9, 9, 10, 9, 13, 9, 10, 8, 9, 9, 10, 6, 12, 8, 11, 11, 12, 9, 10, 11]}

df = pd.DataFrame(data)

# Hypothesis Test
t_statistic, p_value = stats.ttest_ind(df['Male'], df['Female'])
print(f'T-statistic: {t_statistic}')
print(f'P-value: {p_value}')

# Descriptive Statistics
print(df.describe())



T-statistic: 0.9474088433196485
P-value: 0.34621415334112615
            Male     Female
count  42.000000  42.000000
mean    9.952381   9.547619
std     2.230087   1.640975
min     4.000000   6.000000
25%     9.000000   9.000000
50%    10.000000   9.000000
75%    11.750000  11.000000
max    15.000000  13.000000


In [15]:
# Confidence Intervals
ci_male = stats.t.interval(0.95, len(df['Male']) - 1, loc=df['Male'].mean(), scale=stats.sem(df['Male']))
ci_female = stats.t.interval(0.95, len(df['Female']) - 1, loc=df['Female'].mean(), scale=stats.sem(df['Female']))
mean_difference = np.mean(ci_male) - np.mean(ci_female)
ci_difference = stats.t.interval(0.95, len(ci_male) + len(ci_female) - 2, loc=mean_difference, scale=stats.sem(ci_male + ci_female))

print(f'95% CI for Male: {ci_male}')
print(f'95% CI for Female: {ci_female}')
print(f'95% CI of their difference:{ci_difference}')

95% CI for Male: (9.25743700418314, 10.647324900578765)
95% CI for Female: (9.036255229565832, 10.058982865672263)
95% CI of their difference:(-1.1920152138533542, 2.0015390233771644)


In [7]:
# Assumptions Testing
# You can use Shapiro-Wilk test for normality and Levene's test for equal variances
shapiro_stat_male, shapiro_p_male = stats.shapiro(df['Male'])
print(f'Shapiro-Wilk test for Male: p-value = {shapiro_p_male}')

Shapiro-Wilk test for Male: p-value = 0.42676302790641785


In [8]:
shapiro_stat_female, shapiro_p_female = stats.shapiro(df['Female'])
print(f'Shapiro-Wilk test for Female: p-value = {shapiro_p_female}')

Shapiro-Wilk test for Female: p-value = 0.14825962483882904


In [11]:
levene_stat, levene_p = stats.levene(df['Male'], df['Female'])
print(f'Levene\'s test for equal variances: p-value = {levene_p}')

Levene's test for equal variances: p-value = 0.1468943320200691
