In [3]:
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import numpy as np

# Load dataset
df = pd.read_csv('placement.csv')  # Upload placement.csv in your Colab session

# Example: inferential stats on CGPA
cgpa = df['cgpa']

# 1. Descriptive stats
mean_cgpa = np.mean(cgpa)
std_cgpa = np.std(cgpa, ddof=1)
n = len(cgpa)

print(f"Mean CGPA: {mean_cgpa:.2f}")
print(f"Standard Deviation: {std_cgpa:.2f}")
print(f"Sample Size: {n}")

# 2. Confidence Interval for the mean (95%) - updated for latest scipy
conf_int = stats.t.interval(confidence=0.95, df=n-1, loc=mean_cgpa, scale=stats.sem(cgpa))
print(f"95% Confidence Interval for mean CGPA: {conf_int}")


# 3. One-sample t-test (test if mean CGPA is significantly different from 7.0)
t_stat, p_value = stats.ttest_1samp(cgpa, popmean=7.0)
print(f"\nOne-sample t-test (H0: mean = 7.0):")
print(f"t-statistic = {t_stat:.3f}, p-value = {p_value:.3f}")

# 4. Two-sample t-test (compare CGPA of placed vs not placed students)
placed = df[df['placed'] == 1]['cgpa']
not_placed = df[df['placed'] == 0]['cgpa']

t_stat2, p_value2 = stats.ttest_ind(placed, not_placed, equal_var=False)  # Welch’s t-test
print(f"\nTwo-sample t-test (Placed vs Not Placed):")
print(f"t-statistic = {t_stat2:.3f}, p-value = {p_value2:.3f}")


Mean CGPA: 6.96
Standard Deviation: 0.62
Sample Size: 1000
95% Confidence Interval for mean CGPA: (np.float64(6.923020650971303), np.float64(6.999459349028697))

One-sample t-test (H0: mean = 7.0):
t-statistic = -1.990, p-value = 0.047

Two-sample t-test (Placed vs Not Placed):
t-statistic = 0.860, p-value = 0.390
