In [14]:
import pandas as pd
import numpy as np

In [8]:
# Import PCA DATA
def readData(df,dtype):
    if dtype == 'pickle':
        df1 = pd.read_pickle(df+".pickle")
        return df1
    else:
        df1 = pd.read_parquet(df+".parquet")
        return df1

In [15]:
x_train = readData('x_train_pca','pickle')
x_valid = readData('x_valid_pca','pickle')
x_test = readData('x_test_pca','pickle')

In [16]:
y_train = readData('scaled_y_train','parquet')
y_valid = readData('scaled_y_valid','parquet')
y_test = readData('scaled_y_test','parquet')

In [19]:
from time import time
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.cross_decomposition import PLSRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error, explained_variance_score
from sklearn.svm import SVR
import warnings
warnings.filterwarnings("ignore")
import catboost
from catboost import CatBoostRegressor

In [28]:
regressors = [SVR(kernel="poly"),LinearRegression(), Lasso(), Ridge(), ElasticNet(),
                PLSRegression(), KNeighborsRegressor(),GradientBoostingRegressor(),ExtraTreesRegressor(),
                RandomForestRegressor(),DecisionTreeRegressor(),LGBMRegressor(),XGBRegressor(),catboost.CatBoostRegressor()]

In [29]:
for model in regressors:
    start = time()
    model.fit(x_train,y_train)
    train_time = time() - start
    start = time()
    y_pred = model.predict(x_valid)
    predict_time = time()-start    
    print(model)
    print("\tTraining time: %0.3fs" % train_time)
    print("\tPrediction time: %0.3fs" % predict_time)
    print("\tExplained variance:", explained_variance_score(y_valid, y_pred))
    print('\tRMSE: %f' % np.sqrt(mean_squared_error(y_valid, y_pred)))
    print("\tMean absolute error:", mean_absolute_error(y_valid, y_pred))
    print("\tR2 score:", r2_score(y_valid, y_pred))
    print()

SVR(kernel='poly')
	Training time: 100.934s
	Prediction time: 5.860s
	Explained variance: 0.3107341712868271
	RMSE: 0.830442
	Mean absolute error: 0.5183154880662719
	R2 score: 0.3103665925642327

LinearRegression()
	Training time: 0.020s
	Prediction time: 0.003s
	Explained variance: 0.2528871466578597
	RMSE: 0.864368
	Mean absolute error: 0.5639412237938757
	R2 score: 0.2528679279997681

Lasso()
	Training time: 0.022s
	Prediction time: 0.002s
	Explained variance: 0.0
	RMSE: 1.000000
	Mean absolute error: 0.6970455718461427
	R2 score: 0.0

Ridge()
	Training time: 0.009s
	Prediction time: 0.002s
	Explained variance: 0.25288729329277837
	RMSE: 0.864368
	Mean absolute error: 0.5639405444130942
	R2 score: 0.2528680756277355

ElasticNet()
	Training time: 0.015s
	Prediction time: 0.001s
	Explained variance: 0.031353247697016196
	RMSE: 0.984199
	Mean absolute error: 0.682760939832268
	R2 score: 0.031353029607490646

PLSRegression()
	Training time: 0.032s
	Prediction time: 0.002s
	Explained va

KeyboardInterrupt: 