In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [3]:
df=pd.read_csv('stud.csv')

In [4]:
df.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [5]:
df.nunique()

gender                          2
race_ethnicity                  5
parental_level_of_education     6
lunch                           2
test_preparation_course         2
math_score                     81
reading_score                  72
writing_score                  77
dtype: int64

In [6]:
df.isnull().sum()

gender                         0
race_ethnicity                 0
parental_level_of_education    0
lunch                          0
test_preparation_course        0
math_score                     0
reading_score                  0
writing_score                  0
dtype: int64

In [7]:
df.isna().sum()

gender                         0
race_ethnicity                 0
parental_level_of_education    0
lunch                          0
test_preparation_course        0
math_score                     0
reading_score                  0
writing_score                  0
dtype: int64

In [8]:
df.describe()

Unnamed: 0,math_score,reading_score,writing_score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [10]:
X=df.drop(['math_score'],axis=1)
Y=df['math_score']

In [11]:
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=42)

In [12]:
from sklearn.compose import ColumnTransformer

In [17]:
num_features=X_train.select_dtypes(exclude='object').columns
cat_features=X_train.select_dtypes(include='object').columns

In [18]:
cat_features

Index(['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch',
       'test_preparation_course'],
      dtype='object')

In [19]:
stTransformer=StandardScaler()
ohEncoder=OneHotEncoder()
pipeline=ColumnTransformer([
    ('OneHotEncoder',ohEncoder,cat_features),
    ('StandardScaler',stTransformer,num_features)
]
)

In [20]:
X_train=pipeline.fit_transform(X_train)

In [22]:
X_train

array([[ 0.        ,  1.        ,  1.        , ...,  0.        ,
        -1.59453623, -0.98844821],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.8575186 ,  0.62886547],
       [ 1.        ,  0.        ,  0.        , ...,  1.        ,
        -0.33347946, -0.51673172],
       ...,
       [ 1.        ,  0.        ,  0.        , ...,  1.        ,
        -0.54365559, -1.05583628],
       [ 0.        ,  1.        ,  0.        , ...,  0.        ,
        -1.52447752, -1.05583628],
       [ 1.        ,  0.        ,  0.        , ...,  1.        ,
         1.48804699,  1.37013424]])

In [23]:
X_test=pipeline.transform(X_test)

In [24]:
X_test.shape

(250, 19)

In [None]:

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor


from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
import warnings



In [26]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-win_amd64.whl (101.8 MB)
     -------------------------------------- 101.8/101.8 MB 6.2 MB/s eta 0:00:00
Collecting graphviz
  Downloading graphviz-0.20.3-py3-none-any.whl (47 kB)
     ---------------------------------------- 47.1/47.1 kB 2.5 MB/s eta 0:00:00
Collecting plotly
  Downloading plotly-6.0.1-py3-none-any.whl (14.8 MB)
     --------------------------------------- 14.8/14.8 MB 10.1 MB/s eta 0:00:00
Collecting narwhals>=1.15.1
  Downloading narwhals-1.34.0-py3-none-any.whl (325 kB)
     ------------------------------------- 325.3/325.3 kB 10.2 MB/s eta 0:00:00
Installing collected packages: narwhals, graphviz, plotly, catboost
Successfully installed catboost-1.2.7 graphviz-0.20.3 narwhals-1.34.0 plotly-6.0.1


In [27]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.0-py3-none-win_amd64.whl (150.0 MB)
     -------------------------------------- 150.0/150.0 MB 4.8 MB/s eta 0:00:00
Installing collected packages: xgboost
Successfully installed xgboost-3.0.0


In [None]:
models={
    "LinearRegression":LinearRegression(),
    "Ridge":Ridge(),
    "DecisionTreeRegressor":DecisionTreeRegressor(),
    "RandomForestRegressor":RandomForestRegressor(),
    "SVR":SVR(),
    "KNeighborsRegressor":KNeighborsRegressor(),
    "AdaBoostRegressor":AdaBoostRegressor(),
    "Lasso":Lasso(),
    "CatBoostRegressor":CatBoostRegressor(),
    "XGBRegressor":XGBRegressor()
}

In [46]:
for i in range(len(list(models))):
    models = list(models.values())[i]
    
    models.fit(X_train,y_train)

    y_train_pred=models.predict(X_train)
    y_test_pred=models.predict(X_test)

    print("----------------------------------------------------")
    print(f"r2_score for train:{r2_score(y_train_pred,y_train)}")
    print(f"r2_score for test:{r2_score(y_test_pred,y_test)}")
    print("----------------------------------------------------")
    print(f"mae_score for train:{mean_absolute_error(y_train_pred,y_train)}")
    print(f"mae_score for test:{mean_absolute_error(y_test_pred,y_test)}")
    print("----------------------------------------------------")
    print(f"mse_score for train:{mean_squared_error(y_train_pred,y_train)}")
    print(f"mse_score for test:{mean_squared_error(y_test_pred,y_test)}")
    print("----------------------------------------------------")


----------------------------------------------------
r2_score for train:0.8561928279517288
r2_score for test:0.8643028953760341
----------------------------------------------------
mae_score for train:4.238267465725307
mae_score for test:4.337930600167408
----------------------------------------------------
mse_score for train:28.060804235684667
mse_score for test:30.0581146267204
----------------------------------------------------


AttributeError: 'LinearRegression' object has no attribute 'values'