In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("k12.csv")

# Drop irrelevant columns
df = df.drop(columns=['Name', 'Phone Number'])

# Create a target column 'Performance' based on Score
def categorize_performance(score):
    if score < 60:
        return "Low"
    elif 60 <= score <= 80:
        return "Medium"
    else:
        return "High"

df['Performance'] = df['Score'].apply(categorize_performance)

# Drop original Score column (optional)
df = df.drop(columns=['Score'])

# Encode categorical variables
label_encoders = {}
for col in ['Gender', 'Region', 'Performance']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split features and target
X = df.drop(columns=['Performance'])
y = df['Performance']

# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Save model and encoders (optional for deployment)
import joblib
joblib.dump(model, 'ai_tutor_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')


Accuracy: 0.53

Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.39      0.45        41
           1       0.50      0.18      0.27        11
           2       0.53      0.73      0.61        48

    accuracy                           0.53       100
   macro avg       0.52      0.43      0.44       100
weighted avg       0.53      0.53      0.51       100



['label_encoders.pkl']