# Student Performance Predictor

This notebook contains EDA, preprocessing, model building, and evaluation for predicting student performance.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error


## üì• Load Data

In [None]:
# Replace with your CSV file path
df = pd.read_csv('../data/student_performance.csv')
df.head()

## üîç Exploratory Data Analysis (EDA)

In [None]:
df.describe()
df.info()
sns.pairplot(df)
plt.show()

## üõ†Ô∏è Data Preprocessing

In [None]:
# Example: Handle missing values
df = df.dropna()

# Example: Encoding categorical variables
# df['column'] = df['column'].map({'A': 0, 'B': 1})

# Split features and target
X = df.drop('target_column', axis=1)
y = df['target_column']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## ü§ñ Model Building

In [None]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)

# Random Forest Regressor
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)


## üìä Evaluation

In [None]:
print("Linear Regression R^2:", r2_score(y_test, lr_preds))
print("Linear Regression MAE:", mean_absolute_error(y_test, lr_preds))

print("Random Forest R^2:", r2_score(y_test, rf_preds))
print("Random Forest MAE:", mean_absolute_error(y_test, rf_preds))

# Visualization
plt.figure(figsize=(10,5))
plt.scatter(y_test, lr_preds, label='Linear Regression', alpha=0.7)
plt.scatter(y_test, rf_preds, label='Random Forest', alpha=0.7)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.legend()
plt.title('Actual vs Predicted')
plt.show()
