# Part A: Build a baseline model


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import keras
from keras.models import Sequential
from keras.layers import Dense

In [2]:
data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.

In [3]:
X = data.drop('Strength', axis=1)
y = data['Strength']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

Use the Keras library to build a neural network with the following:

- One hidden layer of 10 nodes, and a ReLU activation function

- Use the adam optimizer and the mean squared error  as the loss function.


In [4]:
def regression_model():
  model = Sequential()
  model.add(Dense(10, activation='relu', input_shape=(X.shape[1], )))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mean_squared_error')

  return model

2. Train the model on the training data using 50 epochs

In [5]:
model = regression_model()
model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

Epoch 1/50
16/16 - 1s - loss: 442889.2812 - val_loss: 377071.0938
Epoch 2/50
16/16 - 0s - loss: 321326.6875 - val_loss: 267433.0938
Epoch 3/50
16/16 - 0s - loss: 223992.2500 - val_loss: 182907.2812
Epoch 4/50
16/16 - 0s - loss: 149407.5625 - val_loss: 118599.7500
Epoch 5/50
16/16 - 0s - loss: 93660.5469 - val_loss: 71115.8594
Epoch 6/50
16/16 - 0s - loss: 53386.1484 - val_loss: 38083.8086
Epoch 7/50
16/16 - 0s - loss: 27253.6738 - val_loss: 18506.1113
Epoch 8/50
16/16 - 0s - loss: 12850.9473 - val_loss: 8631.7871
Epoch 9/50
16/16 - 0s - loss: 6069.8931 - val_loss: 4384.0186
Epoch 10/50
16/16 - 0s - loss: 3440.6272 - val_loss: 2833.8914
Epoch 11/50
16/16 - 0s - loss: 2572.0408 - val_loss: 2356.3401
Epoch 12/50
16/16 - 0s - loss: 2338.8354 - val_loss: 2189.4355
Epoch 13/50
16/16 - 0s - loss: 2234.7632 - val_loss: 2110.8633
Epoch 14/50
16/16 - 0s - loss: 2169.1782 - val_loss: 2045.6344
Epoch 15/50
16/16 - 0s - loss: 2104.7605 - val_loss: 1985.5708
Epoch 16/50
16/16 - 0s - loss: 2040.5990 

<tensorflow.python.keras.callbacks.History at 0x7fce33dae490>

3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

In [6]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('The mean squared error between the predicted concrete strength and the actual concrete strength is: {}'.format(mse))

The mean squared error between the predicted concrete strength and the actual concrete strength is: 713.7385897431449


4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.



In [7]:
def build_baseline_model():
  X = data.drop('Strength', axis=1)
  y = data['Strength']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

  model = Sequential()
  model.add(Dense(10, activation='relu', input_shape=(X.shape[1], )))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mean_squared_error')
  model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

  y_pred = model.predict(X_test)
  mse = mean_squared_error(y_test, y_pred)
  
  return mse

In [8]:
mse_list = []
for i in range(50):
  mse_list.append(build_baseline_model())
mse_list

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 1/50
16/16 - 1s - loss: 699318.3750 - val_loss: 546970.6250
Epoch 2/50
16/16 - 0s - loss: 436155.0312 - val_loss: 314380.0312
Epoch 3/50
16/16 - 0s - loss: 237305.3594 - val_loss: 154114.9531
Epoch 4/50
16/16 - 0s - loss: 109224.0703 - val_loss: 63371.5312
Epoch 5/50
16/16 - 0s - loss: 43242.0742 - val_loss: 24346.7168
Epoch 6/50
16/16 - 0s - loss: 18343.2480 - val_loss: 13048.1113
Epoch 7/50
16/16 - 0s - loss: 11751.0566 - val_loss: 11526.5283
Epoch 8/50
16/16 - 0s - loss: 10910.4072 - val_loss: 11430.2314
Epoch 9/50
16/16 - 0s - loss: 10677.9561 - val_loss: 11181.3252
Epoch 10/50
16/16 - 0s - loss: 10503.7705 - val_loss: 10920.0811
Epoch 11/50
16/16 - 0s - loss: 10299.0420 - val_loss: 10731.6260
Epoch 12/50
16/16 - 0s - loss: 10107.0977 - val_loss: 10497.3574
Epoch 13/50
16/16 - 0s - loss: 9937.7656 - val_loss: 10274.7695
Epoch 14/50
16/16 - 0s - loss: 9745.6699 - val_loss: 10077.5371
Epoch 15/50
16/16 - 0s - loss

[4456.276572862634,
 154.8551268837429,
 111.27941778570768,
 111.49415577463394,
 204.71331387715486,
 156.01294847672554,
 324.7636458271681,
 1225.4914366034254,
 284.3067719174856,
 113.30262180529911,
 1172.1061982467302,
 1303.2079659965602,
 367.5786346737995,
 102.82408036821379,
 250.93905884034365,
 188.76071010071192,
 563.0976067616247,
 1792.8920599543674,
 99.98516492864141,
 200.2632604672474,
 255.5824582797783,
 90.11877128846741,
 223.05400429179198,
 166.78486605854593,
 424.43391979204534,
 681.0792316802075,
 821.4148769853863,
 146.35704740995644,
 2120.063584991416,
 1102.3686912507847,
 180.77937774408394,
 122.59440821321307,
 1793.0231752916516,
 117.08335112249138,
 10642.4193342072,
 1620.2745764300548,
 177.97184633885354,
 1631.2005052349912,
 581.2980828740241,
 179.52028736333133,
 277.7442406048167,
 634.9019369890362,
 78.60012685896055,
 567.3158543207555,
 180.65242305720923,
 360.3702826180154,
 1169.6692121869814,
 301.46710464052643,
 239.46642881

5. Report the mean and the standard deviation of the mean squared errors.

In [9]:
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)
print("The mean of mean squred erros is {}: ".format(mean_mse))
print("The standard deviation of mean squred erros is {}: ".format(std_mse))

The mean of mean squred erros is 855.7181259391645: 
The standard deviation of mean squred erros is 1620.1232634876837: 
