# Regression Model Evaluation

## Import the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Import the dataset

In [2]:
dataset = pd.read_csv("data/Data.csv")
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

In [3]:
print(X)

         AT      V       AP     RH
0     14.96  41.76  1024.07  73.17
1     25.18  62.96  1020.04  59.08
2      5.11  39.40  1012.16  92.14
3     20.86  57.32  1010.24  76.64
4     10.82  37.50  1009.23  96.62
...     ...    ...      ...    ...
9563  16.65  49.69  1014.01  91.00
9564  13.19  39.18  1023.67  66.78
9565  31.32  74.33  1012.92  36.48
9566  24.48  69.45  1013.86  62.39
9567  21.60  62.52  1017.23  67.87

[9568 rows x 4 columns]


In [4]:
print(y)

0       463.26
1       444.37
2       488.56
3       446.48
4       473.90
         ...  
9563    460.03
9564    469.62
9565    429.57
9566    435.74
9567    453.28
Name: PE, Length: 9568, dtype: float64


## Splitting the dataset into the Training Set and Test Set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [6]:
print(len(X_train), len(X_test), len(y_train), len(y_test))

7654 1914 7654 1914


## Feature Scaling

In [7]:
# We need to do feature scaling for fitting SVR model to training data

from sklearn.preprocessing import StandardScaler

# Create different scaler from X & y
sc_x = StandardScaler()
sc_y = StandardScaler()

# Fit the scaler to the dataset
X_train_std = sc_x.fit_transform(X_train)
X_test_std = sc_x.transform(X_test)
y_train_std = sc_y.fit_transform(y_train.values.reshape(-1, 1)).reshape(-1)
y_test_std = sc_y.transform(y_test.values.reshape(-1, 1)).reshape(-1)

## Training the various Regression Model on the Training Set

### Multiple Linear Regression

In [8]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

LinearRegression()

### Polynomial Regression

In [9]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

# Convert X_train and X_test into Polynomian Features
poly_feat = PolynomialFeatures(degree=4)
X_train_poly = poly_feat.fit_transform(X_train)
X_test_poly = poly_feat.transform(X_test)

poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)

LinearRegression()

### Support Vector Regression

In [10]:
from sklearn.svm import SVR

svr_reg = SVR(kernel='rbf')
svr_reg.fit(X_train_std, y_train_std)

SVR()

### Decision Tree Regression

In [11]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor(random_state=0)
tree_reg.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

### Random Forrest Regression

In [12]:
from sklearn.ensemble import RandomForestRegressor

forest_reg = RandomForestRegressor(random_state=0)
forest_reg.fit(X_train, y_train)

RandomForestRegressor(random_state=0)

## Predicting the Test Result

In [13]:
y_pred_lin_reg = lin_reg.predict(X_test)
y_pred_poly_reg = poly_reg.predict(X_test_poly)
y_pred_svr_reg = sc_y.inverse_transform(svr_reg.predict(X_test_std))
y_pred_dtree_reg = tree_reg.predict(X_test)
y_pred_rforest_reg = forest_reg.predict(X_test)

pd.set_option("display.precision", 2)
result_dataset = pd.DataFrame({"y pred <Lin Reg>" : y_pred_lin_reg, 
                               "y pred <Poly Reg>" : y_pred_poly_reg,
                               "y pred <SVR Reg>" : y_pred_svr_reg, 
                               "y pred <DTree Reg>" : y_pred_dtree_reg, 
                               "y pred <RForest Reg>" : y_pred_rforest_reg, 
                               "y test" : y_test})
print(result_dataset.head(10).to_string(index=False))

 y pred <Lin Reg>  y pred <Poly Reg>  y pred <SVR Reg>  y pred <DTree Reg>  y pred <RForest Reg>  y test
           431.43             433.94            434.05              431.28                433.64  431.23
           458.56             457.90            457.94              459.59                457.71  460.01
           462.75             460.52            461.03              460.06                463.41  461.14
           448.60             447.57            448.63              452.71                446.45  445.90
           457.87             457.22            457.83              459.61                459.34  451.29
           429.69             433.74            433.65              434.30                435.20  432.68
           473.04             473.59            473.91              467.83                473.53  477.50
           456.51             456.55            457.15              458.64                457.86  459.68
           474.34             474.75            474.65 

## Evaluation the Model Performance

In [14]:
from sklearn.metrics import r2_score

pd.set_option("display.precision", 5)

score_list = [["Multiple Linear Regression", r2_score(y_test, y_pred_lin_reg)],
              ["Polynomial Regression", r2_score(y_test, y_pred_poly_reg)],
              ["Support Vector Regression", r2_score(y_test, y_pred_svr_reg)],
              ["Decision Tree Regression", r2_score(y_test, y_pred_dtree_reg)],
              ["Random Forest Regression", r2_score(y_test, y_pred_rforest_reg)]]

df_model_score = pd.DataFrame(score_list, columns=["Model Name","R2 Score"])

df_model_score.sort_values(by=["R2 Score"], ascending=False, ignore_index=True, inplace=True)

df_model_score

Unnamed: 0,Model Name,R2 Score
0,Random Forest Regression,0.96511
1,Support Vector Regression,0.94808
2,Polynomial Regression,0.94582
3,Multiple Linear Regression,0.93253
4,Decision Tree Regression,0.92291
