# Section 26: Model Deployment

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import joblib

## Model Persistence

In [2]:
df = pd.read_csv('Advertising.csv')

In [3]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
df.describe()

Unnamed: 0,TV,radio,newspaper,sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [5]:
X = df.drop('sales', axis=1)
y = df['sales']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [7]:
X_val, X_hold, y_val, y_hold = train_test_split(X_test, y_test, test_size=0.5, random_state=101)

In [8]:
len(X)

200

In [9]:
len(X_train)

140

In [10]:
len(X_val)

30

In [11]:
len(X_hold)

30

In [12]:
model = RandomForestRegressor(n_estimators=30, random_state=101)

In [13]:
model.fit(X_train, y_train)

In [14]:
pred_val = model.predict(X_val)

In [15]:
pred_val

array([14.40333333,  5.47333333,  4.14      , 15.72666667, 11.66666667,
        9.93      , 10.83333333, 11.48      , 18.02      ,  7.60333333,
       10.9       , 21.44333333, 14.08333333,  7.53333333, 11.81333333,
        6.83      , 13.51      , 13.62      , 11.01333333,  7.99666667,
       12.53333333, 21.63      , 19.49      , 15.73      , 16.05666667,
       24.21666667, 20.17666667,  9.50666667, 14.50333333, 19.36333333])

In [16]:
mean_absolute_error(y_val, pred_val)

0.6575555555555552

In [17]:
root_mean_squared_error(y_val, pred_val)

0.8542009478215644

In [18]:
df.describe()['sales']

count    200.000000
mean      14.022500
std        5.217457
min        1.600000
25%       10.375000
50%       12.900000
75%       17.400000
max       27.000000
Name: sales, dtype: float64

In [19]:
pred_hold = model.predict(X_hold)

In [20]:
mean_absolute_error(y_hold, pred_hold)

0.5937777777777775

In [21]:
root_mean_squared_error(y_hold, pred_hold)

0.745323693040418

In [22]:
final_model = RandomForestRegressor(n_estimators=30, random_state=101)

In [23]:
final_model.fit(X, y)

In [24]:
joblib.dump(final_model, 'final_model.pkl')

['final_model.pkl']

In [25]:
column_names = list(X.columns)

In [26]:
joblib.dump(column_names, 'column_names.pkl')

['column_names.pkl']

In [27]:
loaded_columns = joblib.load('column_names.pkl')

In [28]:
loaded_columns

['TV', 'radio', 'newspaper']

In [29]:
loaded_model = joblib.load('final_model.pkl')

In [30]:
loaded_model

In [31]:
new_input = pd.DataFrame([[230.1, 37.8, 69.2]], columns=loaded_columns)

In [32]:
loaded_model.predict(new_input)

array([21.99])