In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor

In [None]:
df = pd.read_excel(r"c:\Users\phunk\Desktop\MyProject\StudentsPerformance.xlsx")
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [None]:
X = df.drop(["math score", "reading score", "writing score"],axis=1)
y = df[["math score", "reading score", "writing score"]]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
cat_cols = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(drop='first'), cat_cols)
    ],
    remainder="passthrough"
)

pipeline = {
    'lr': Pipeline([('pre', preprocess), ('scal', StandardScaler(with_mean=False)), ('model', LinearRegression())]),
    'dt': Pipeline([('pre', preprocess), ('scal', StandardScaler(with_mean=False)), ('model', DecisionTreeRegressor())]),
    'rf': Pipeline([('pre', preprocess), ('scal', StandardScaler(with_mean=False)), ('model', RandomForestRegressor())]),
    'gb': Pipeline([('pre', preprocess), ('scal', StandardScaler(with_mean=False)), ('model', MultiOutputRegressor(GradientBoostingRegressor()))])
}

for name, pipe in pipeline.items():
    pipe.fit(X_train, y_train)
    print(f"{name} accuracy : {pipe.score(X_test, y_test):.3f}")

lr accuracy : 0.251
dt accuracy : -0.028
rf accuracy : 0.022
gb accuracy : 0.196
