# Model evaluation polynomial linear regression

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt

## Load the dataset

In [2]:
raw_data_file_path = '/Users/vishalsaxena/Documents/DS_Workspace/udemy_ml_algo_python/data/raw'
dataset = pd.read_csv(os.path.join(raw_data_file_path, 'model-evaluation.csv'))
dataset

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.40,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.50,1009.23,96.62,473.90
...,...,...,...,...,...
9563,16.65,49.69,1014.01,91.00,460.03
9564,13.19,39.18,1023.67,66.78,469.62
9565,31.32,74.33,1012.92,36.48,429.57
9566,24.48,69.45,1013.86,62.39,435.74


In [3]:
X = dataset.iloc[:,:-1].values
X

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]], shape=(9568, 4))

In [5]:
y = dataset.iloc[:,-1].values
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28], shape=(9568,))

## Spliting the dataset into train, test

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_test

(array([[  11.22,   43.13, 1017.24,   80.9 ],
        [  13.67,   54.3 , 1015.92,   75.42],
        [  32.84,   77.95, 1014.68,   45.8 ],
        ...,
        [  16.81,   38.52, 1018.26,   75.21],
        [  12.8 ,   41.16, 1022.43,   86.19],
        [  32.32,   67.9 , 1006.08,   37.93]], shape=(7654, 4)),
 array([[  28.66,   77.95, 1009.56,   69.07],
        [  17.48,   49.39, 1021.51,   84.53],
        [  14.86,   43.14, 1019.21,   99.14],
        ...,
        [  12.24,   44.92, 1023.74,   88.21],
        [  27.28,   47.93, 1003.46,   59.22],
        [  17.28,   39.99, 1007.09,   74.25]], shape=(1914, 4)))

In [7]:
y_train, y_test

(array([473.93, 467.87, 431.97, ..., 459.01, 462.72, 428.12], shape=(7654,)),
 array([431.23, 460.01, 461.14, ..., 473.26, 438.  , 463.28], shape=(1914,)))

## Transform the X for polynomial linear model

In [9]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=4)
X_train_poly = poly.fit_transform(X_train)
X_train_poly

array([[1.00000000e+00, 1.12200000e+01, 4.31300000e+01, ...,
        6.77242028e+09, 5.38603280e+08, 4.28345379e+07],
       [1.00000000e+00, 1.36700000e+01, 5.43000000e+01, ...,
        5.87072958e+09, 4.35831980e+08, 3.23553508e+07],
       [1.00000000e+00, 3.28400000e+01, 7.79500000e+01, ...,
        2.15967876e+09, 9.74822477e+07, 4.40009357e+06],
       ...,
       [1.00000000e+00, 1.68100000e+01, 3.85200000e+01, ...,
        5.86500714e+09, 4.33197009e+08, 3.19964912e+07],
       [1.00000000e+00, 1.28000000e+01, 4.11600000e+01, ...,
        7.76570573e+09, 6.54642544e+08, 5.51858229e+07],
       [1.00000000e+00, 3.23200000e+01, 6.79000000e+01, ...,
        1.45623249e+09, 5.49010997e+07, 2.06981424e+06]], shape=(7654, 70))

## Training the polynomial linear regression

In [11]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train_poly, y_train)

## Prediction with test split dataset

In [19]:
y_pred = np.round(regressor.predict(poly.fit_transform(X_test)),2)
y_pred

array([434.16, 458.26, 460.72, ..., 469.49, 438.53, 461.62], shape=(1914,))

In [20]:
y_pred, y_test

(array([434.16, 458.26, 460.72, ..., 469.49, 438.53, 461.62], shape=(1914,)),
 array([431.23, 460.01, 461.14, ..., 473.26, 438.  , 463.28], shape=(1914,)))

In [21]:
np.concatenate((np.reshape(y_pred, shape=(len(y_pred),1)), np.reshape(y_test, shape=(len(y_test),1))), 1)

array([[434.16, 431.23],
       [458.26, 460.01],
       [460.72, 461.14],
       ...,
       [469.49, 473.26],
       [438.53, 438.  ],
       [461.62, 463.28]], shape=(1914, 2))

## Evaluate the model

In [22]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9455269065745328