# Student Performance Prediction

This notebook demonstrates the end-to-end ML pipeline for predicting student performance.

In [None]:
import pandas as pd
import os
import sys

# Add the parent directory to sys.path to import src modules
sys.path.append(os.path.abspath(os.path.join('..')))

from src.data_preprocessing import load_data, preprocess_data
from src.visualization import plot_study_hours_vs_marks, plot_attendance_vs_result
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

## 1. Load Data

In [None]:
data_path = os.path.join("..", "data", "students_data.csv")
df = load_data(data_path)
df.head()

## 2. Data Visualization

In [None]:
plot_study_hours_vs_marks(df)

In [None]:
plot_attendance_vs_result(df)

## 3. Preprocessing

In [None]:
X, y = preprocess_data(df)
print("Features:", X.shape)
print("Target:", y.shape)

## 4. Model Training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)
print("Model Trained!")

## 5. Evaluation

In [None]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))