### Regression Model with Keras

### Download and Clean Dataset

In [1]:
#Download and clean dataset
import pandas as pd
import numpy as np
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Waiting for a Spark session to start...
Spark Initialization Done! ApplicationId = app-20191225133339-0000
KERNEL_ID = 7aee22d4-4927-47a5-9d7f-dcec5221d9a9


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
concrete_data.shape

(1030, 9)

In [4]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [13]:
#Split data into predicators X and target y

concrete_data_columns = concrete_data.columns

X = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
y = concrete_data['Strength'] # Strength column

In [15]:
#The last step is to normalize the data by substracting the mean and dividing by the standard deviation.
X_norm = (X - X.mean()) / X.std()
X_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [16]:

#1 Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_norm, y, train_size=0.70,test_size=0.30)
print ("X_train: ", X_train)
print ("y_train: ", y_train)

X_train:          Cement  Blast Furnace Slag   Fly Ash     Water  Superplasticizer  \
177   0.779207            1.334087 -0.846733 -0.780515          0.903161   
104   0.360094            1.606458 -0.846733 -1.211343          1.355131   
703  -0.776679            0.685033 -0.846733  0.488555         -1.038638   
393   0.505540           -0.652483  1.622133  0.348068          1.522528   
169   1.376300            0.375573 -0.846733 -1.314367          1.723404   
698  -0.743188            2.682035 -0.846733  1.027091         -1.038638   
513   1.366731           -0.601486  1.215864 -0.635344          0.451190   
202  -0.865668           -0.856472  1.112734 -0.911636          0.267054   
142   1.376300            0.375573 -0.846733 -1.412708          2.074936   
488   1.012686           -0.624667  0.622086 -1.150465          1.355131   
782   0.141926           -0.856472 -0.846733  0.488555         -1.038638   
279  -0.284843           -0.856472  1.001791  0.324653          0.032699   
52

### Build Neural Network

In [21]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error

#One hidden layer of 10 nodes, and a ReLU activation function
#Use the adam optimizer and the mean squared error as the loss function.
n_cols = X_train.shape[1] # number of predictors
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
    return model

In [22]:
#2 Train the model on the training data using 50 epochs.
# build the model
model = regression_model()
# fit the model
model.fit(X_train, y_train, epochs=50, verbose=2)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


Epoch 1/50
 - 1s - loss: 1558.4523 - mean_squared_error: 1558.4523
Epoch 2/50
 - 0s - loss: 1540.6674 - mean_squared_error: 1540.6674
Epoch 3/50
 - 0s - loss: 1522.7935 - mean_squared_error: 1522.7935
Epoch 4/50
 - 0s - loss: 1504.8489 - mean_squared_error: 1504.8489
Epoch 5/50
 - 0s - loss: 1486.9597 - mean_squared_error: 1486.9597
Epoch 6/50
 - 0s - loss: 1468.6905 - mean_squared_error: 1468.6905
Epoch 7/50
 - 0s - loss: 1449.9323 - mean_squared_error: 1449.9323
Epoch 8/50
 - 0s - loss: 1431.0448 - mean_squared_error: 1431.0448
Epoch 9/50
 - 0s - loss: 1411.1807 - mean_squared_error: 1411.1807
Epoch 10/50
 - 0s - loss: 1390.7859 - mean_squared_error: 1390.7859
Epoch 11/50
 - 0s - loss: 1369.8486 - mean_squared_error: 1369.8486
Epoch 12/50
 - 0s - loss: 1347.9189 - mean_squared_error: 1347.9189
Epoch 13/50
 - 0s - loss: 1325.9137 - mean_squared_error: 1325.9137
Epoch 14/50
 - 0s - loss: 1302.4493 - mean_squared_error: 1302.4493
Epoch 15/50
 - 0s - loss: 1278.8460 - mean_squared_error:

<keras.callbacks.History at 0x7f74c2b11eb8>

In [23]:
#3 Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. 
#You can use the mean_squared_error function from Scikit-learn.

#Report the mean and the standard deviation of the mean squared errors.
pred_train= model.predict(X_train)
print(mean_squared_error(y_train,pred_train))

pred= model.predict(X_test)
print(mean_squared_error(y_test,pred)) 

425.54482591013254
413.04549426208195


In [26]:
#4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.
listmse=[]; 
for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, train_size=0.70,test_size=0.30)
    model.fit(X_train, y_train, epochs=50, verbose=2)
    pred= model.predict(X_test)
    listmse.append(mean_squared_error(y_test,pred)) 
    


Epoch 1/50
 - 0s - loss: 30.1148 - mean_squared_error: 30.1148
Epoch 2/50
 - 0s - loss: 29.9751 - mean_squared_error: 29.9751
Epoch 3/50
 - 0s - loss: 29.8923 - mean_squared_error: 29.8923
Epoch 4/50
 - 0s - loss: 29.9045 - mean_squared_error: 29.9045
Epoch 5/50
 - 0s - loss: 29.8979 - mean_squared_error: 29.8979
Epoch 6/50
 - 0s - loss: 29.9101 - mean_squared_error: 29.9101
Epoch 7/50
 - 0s - loss: 29.8522 - mean_squared_error: 29.8522
Epoch 8/50
 - 0s - loss: 29.9179 - mean_squared_error: 29.9179
Epoch 9/50
 - 0s - loss: 29.7360 - mean_squared_error: 29.7360
Epoch 10/50
 - 0s - loss: 29.8576 - mean_squared_error: 29.8576
Epoch 11/50
 - 0s - loss: 29.7435 - mean_squared_error: 29.7435
Epoch 12/50
 - 0s - loss: 29.7287 - mean_squared_error: 29.7287
Epoch 13/50
 - 0s - loss: 29.7195 - mean_squared_error: 29.7195
Epoch 14/50
 - 0s - loss: 29.7040 - mean_squared_error: 29.7040
Epoch 15/50
 - 0s - loss: 29.6558 - mean_squared_error: 29.6558
Epoch 16/50
 - 0s - loss: 29.7227 - mean_squared_

In [28]:
#5. Report the mean and the standard deviation of the mean squared errors.
print("Mean of the mean_squared_error as:", np.mean(listmse)) 
print("Std of the mean_squared_error as:", np.std(listmse)) 

Mean of the mean_squared_error as: 30.843071624137675
Std of the mean_squared_error as: 2.659319200394782
