This notebook goes along with the Deep Learning using Keras tutorial on Medium.

### Importing libraries

In [14]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from keras.utils.vis_utils import plot_model

In this first model, we are going to predict the hourly wages of employees.

### Loading the data...

In [4]:
#read in training data
train_df = pd.read_csv('Desktop/codeasylums_material/data/hourly_wages_data.csv', 
                       header = 0, index_col = None)

#view data structure
train_df.head()

Unnamed: 0,wage_per_hour,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,5.1,0,8,21,35,1,1,0,1,0
1,4.95,0,9,42,57,1,1,0,1,0
2,6.67,0,12,1,19,0,0,0,1,0
3,4.0,0,12,4,22,0,0,0,0,0
4,7.5,0,12,17,35,0,1,0,0,0


### Preprocessing

In [5]:
#create a dataframe with all training data except the target column
train_X = train_df.drop(columns = ['wage_per_hour'])

#check that the target variable has been removed
train_X.head()


Unnamed: 0,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,0,8,21,35,1,1,0,1,0
1,0,9,42,57,1,1,0,1,0
2,0,12,1,19,0,0,0,1,0
3,0,12,4,22,0,0,0,0,0
4,0,12,17,35,0,1,0,0,0


In [6]:
#create a dataframe with only the target column
train_y = train_df[['wage_per_hour']]

#view dataframe
train_y.head()

Unnamed: 0,wage_per_hour
0,5.1
1,4.95
2,6.67
3,4.0
4,7.5


### Model building

In [11]:
#create model
model = Sequential()

#get number of columns in training data
n_cols = train_X.shape[1]

#add model layers
model.add(Dense(10, activation = 'relu', input_shape = (n_cols,)))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(1))

#compile model using mse as a measure of model performance
model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mse'])

#set early stopping monitor so the model stops training when it won't improve anymore
early_stopping_monitor = EarlyStopping(patience = 3)

In [12]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 10)                100       
_________________________________________________________________
dense_5 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 11        
Total params: 221
Trainable params: 221
Non-trainable params: 0
_________________________________________________________________


### Model Training

In [13]:
#train model
model.fit(train_X, train_y, validation_split = 0.2, 
          epochs = 30, callbacks = [early_stopping_monitor]);

Train on 427 samples, validate on 107 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


### Increasing Model Capacity

In [133]:
#training a new model on the same data to show the effect of increasing model capacity

#create model
model_mc = Sequential()

#add model layers
model_mc.add(Dense(200, activation='relu', input_shape=(n_cols,)))
model_mc.add(Dense(200, activation='relu'))
model_mc.add(Dense(200, activation='relu'))
model_mc.add(Dense(1))

#compile model using mse as a measure of model performance
model_mc.compile(optimizer='adam', loss='mean_squared_error')

In [134]:
#train model
model_mc.fit(train_X, train_y, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

Train on 427 samples, validate on 107 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30


<keras.callbacks.History at 0x1878785e48>