# Automation of ML Algorithm Tasks

In [76]:
!pip install xgboost
!pip install lightgbm
!pip install catboost



In [59]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection

from sklearn.linear_model import LinearRegression

from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

In [60]:
df = pd.read_csv("/Users/User/Hitters.csv")
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
0,293,66,1,30,29,14,1,293,66,1,30,29,14,A,E,446,33,20,,A
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N


In [61]:
df.dtypes

AtBat          int64
Hits           int64
HmRun          int64
Runs           int64
RBI            int64
Walks          int64
Years          int64
CAtBat         int64
CHits          int64
CHmRun         int64
CRuns          int64
CRBI           int64
CWalks         int64
League        object
Division      object
PutOuts        int64
Assists        int64
Errors         int64
Salary       float64
NewLeague     object
dtype: object

In [62]:
df.dropna(inplace=True)
dms = pd.get_dummies(df[["League","Division","NewLeague"]])
dms.head()

Unnamed: 0,League_A,League_N,Division_E,Division_W,NewLeague_A,NewLeague_N
1,0,1,0,1,0,1
2,1,0,0,1,1,0
3,0,1,1,0,0,1
4,0,1,1,0,0,1
5,1,0,0,1,1,0


In [70]:

def autoML (df, y, algorithm):

    y = df[y]
    X_= df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')

    X = pd.concat([X_, dms[["League_N","Division_W","NewLeague_N"]]], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    model = algorithm().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
    return RMSE


In [71]:
autoML (df,"Salary", RandomForestRegressor)

341.68450680020015

In [68]:
models = [LGBMRegressor,
          XGBRegressor,
          GradientBoostingRegressor,
          RandomForestRegressor,
          DecisionTreeRegressor,
          MLPRegressor,
          KNeighborsRegressor,
          SVR]

In [73]:
for i in models:
    print(i, autoML (df,"Salary", i))

<class 'lightgbm.sklearn.LGBMRegressor'> 363.8712087611089
<class 'xgboost.sklearn.XGBRegressor'> 355.4651481224188
<class 'sklearn.ensemble._gb.GradientBoostingRegressor'> 356.2470442988647
<class 'sklearn.ensemble._forest.RandomForestRegressor'> 341.0294357310888
<class 'sklearn.tree._classes.DecisionTreeRegressor'> 489.2315872957243
<class 'sklearn.neural_network._multilayer_perceptron.MLPRegressor'> 461.1323307694991
<class 'sklearn.neighbors._regression.KNeighborsRegressor'> 426.6570764525201
<class 'sklearn.svm._classes.SVR'> 460.0032657244849


The function below looks better.

In [74]:
def compML (df, y, alg):
    
    # train test ayrimi
    y=df[y]
    X_= df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
    X = pd.concat([X_, dms[["League_N","Division_W", "NewLeague_N" ]]], axis=1)
    X_train,X_test,y_train,y_test = train_test_split(X,
                                                     y, 
                                                     test_size=.25, 
                                                     random_state=42)
    
    # modeling
    model = alg().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
    model_name = alg.__name__
    print(model_name,"model test error:", RMSE)

In [75]:
models = [LGBMRegressor,
          XGBRegressor,
          GradientBoostingRegressor,
          RandomForestRegressor,
          DecisionTreeRegressor,
          MLPRegressor,
          KNeighborsRegressor,
          SVR]

for i in models:
    compML(df, "Salary", i)

LGBMRegressor model test error: 363.8712087611089
XGBRegressor model test error: 355.4651481224188
GradientBoostingRegressor model test error: 362.77252988338705
RandomForestRegressor model test error: 341.87560029670453
DecisionTreeRegressor model test error: 472.4589235658322
MLPRegressor model test error: 360.20741368178534
KNeighborsRegressor model test error: 426.6570764525201
SVR model test error: 460.0032657244849


