# Linear Regression on Cleaned Admissions Data
This notebook loads the cleaned admissions dataset, trains a Linear Regression model, evaluates its performance, and saves predictions.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
# 1. Load cleaned dataset
data_path = '../data/admission_data_standardized.csv'
df = pd.read_csv(data_path)
df.rename(columns={'chance_of_admit': 'Chance_of_Admit'}, inplace=True)
df.head()

In [None]:
# 2. Prepare features and target
X = df.drop(columns=['Chance_of_Admit'])
y = df['Chance_of_Admit']

In [None]:
# 3. Split into training and test sets (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# 4. Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# 5. Evaluate performance
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
print(f'Training Mean Squared Error: {mse_train:.5f}')
print(f'Test Mean Squared Error:     {mse_test:.5f}')

In [None]:
# 6. Save test predictions to CSV
pred_df = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_test_pred})
output_csv = '../results/regression_predictions.csv'
pred_df.to_csv(output_csv, index=False)
print(f'Saved predictions to {output_csv}')

## Summary of Results
- **Training MSE**: 0.00387  
- **Test MSE**: 0.00462