# Peer-graded Assignment: Build a Regression Model in Keras

# Download and Clean Dataset

Let's start by importing the pandas and the Numpy libraries.

In [4]:
import pandas as pd
import numpy as np

We will be using the dataset provided in the assignment

The dataset is about the compressive strength of different samples of concrete based on the volumes of the different ingredients that were used to make them. Ingredients include:

1. Cement

2. Blast Furnace Slag

3. Fly Ash

4. Water

5. Superplasticizer

6. Coarse Aggregate

7. Fine Aggregate

Let's read the dataset into a pandas dataframe.

In [5]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [6]:
concrete_data.shape

(1030, 9)

In [7]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [8]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

Split data into predictors and target

In [9]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != "Strength"]] # all columns except Strength
target = concrete_data['Strength']

In [10]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [11]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

normalize the data by subtracting the mean and dividing by the standard deviation. 

In [12]:
predictors_norm = (predictors - predictors.mean() / predictors.std())
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,537.309562,-0.856472,-0.846733,153.497358,1.461362,1027.48721,666.351468,27.277154
1,537.309562,-0.856472,-0.846733,153.497358,1.461362,1042.48721,666.351468,27.277154
2,329.809562,141.643528,-0.846733,219.497358,-1.038638,919.48721,584.351468,269.277154
3,329.809562,141.643528,-0.846733,219.497358,-1.038638,919.48721,584.351468,364.277154
4,195.909562,131.543528,-0.846733,183.497358,-1.038638,965.88721,815.851468,359.277154


In [13]:
n_cols = predictors_norm.shape[1] # number of predictors
n_cols

8

# Import Keras

In [14]:
import keras

import packages

In [15]:
from keras.models import Sequential
from keras.layers import Dense

In [16]:
# define regression model
def regression_model():
#     create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
#     compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

The above function creates a model that has one hidden layer with 10 neurons and a ReLU activation function. It uses the adam optimizer and the mean squared error as the loss function.

Split the data into a training and test sets by holding 30% of the data for testing

In [17]:
from sklearn.model_selection import train_test_split

In [18]:
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)

# Train and Test the Network

In [19]:
# build the model 
model = regression_model()

train the model for 100 epochs

In [20]:
# fit the model
epochs = 100
model.fit(X_train, y_train, epochs=epochs, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 100/100


<keras.callbacks.History at 0x212855c4040>

evaluate the model on the test data

In [21]:
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



100.0109634399414

compute the mean squared error between the predicted concrete strength and the actual concrete strength.

import the mean_squared_error function from Scikit-learn.

In [22]:
from sklearn.metrics import mean_squared_error

In [23]:
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

100.01096985076101 0.0


create a list of 50 mean squared errors and report mean and the standard deviation of the mean squared errors.

In [24]:
total_mean_squared_errors = 50
epochs = 100
mean_squared_errors = []
for i in range(0, total_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("MSE " + str(i+1) + ": " + str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)
    
mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)

print('\n')
print("Below is the mean and standard deviation of " +str(total_mean_squared_errors) + " mean squared errors with normalized data. Total number of epochs for each training is: " +str(epochs) + "\n")
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))
    

MSE 1: 63.74424743652344
MSE 2: 72.38067626953125
MSE 3: 51.79737854003906
MSE 4: 56.04531478881836
MSE 5: 48.40529251098633
MSE 6: 52.60075759887695
MSE 7: 52.49094009399414
MSE 8: 44.19750213623047
MSE 9: 46.262962341308594
MSE 10: 52.855499267578125
MSE 11: 46.6246223449707
MSE 12: 46.600929260253906
MSE 13: 50.86151885986328
MSE 14: 51.10682678222656
MSE 15: 46.64724349975586
MSE 16: 37.449520111083984
MSE 17: 47.3490104675293
MSE 18: 47.42112731933594
MSE 19: 46.19331359863281
MSE 20: 44.390567779541016
MSE 21: 40.773155212402344
MSE 22: 44.05078887939453
MSE 23: 40.299560546875
MSE 24: 44.743038177490234
MSE 25: 51.95021057128906
MSE 26: 45.45945739746094
MSE 27: 47.197288513183594
MSE 28: 42.387168884277344
MSE 29: 48.38642883300781
MSE 30: 46.693241119384766
MSE 31: 46.02763366699219
MSE 32: 39.13624954223633
MSE 33: 44.51093673706055
MSE 34: 51.99589538574219
MSE 35: 51.06875228881836
MSE 36: 51.192047119140625
MSE 37: 50.62652587890625
MSE 38: 48.79283142089844
MSE 39: 50.058