In [3]:
import pandas as pd
import numpy as np

In [4]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [5]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [9]:
#Split data into predictor (X), and target (y)
concrete_data_columns = concrete_data.columns

X = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
y = concrete_data['Strength'] # Strength column

In [20]:
# import train_test_split from scikit learn library
from sklearn.model_selection import train_test_split

# split the data into training set and test set by hodling 30% of data for testing.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
X_train.shape

(721, 8)

In [7]:
X_test.shape

(309, 8)

## Part A (without normalizing)

In [21]:
n_cols = X_train.shape[1] # number of x train.

In [6]:
# import Keras library, models and layers 

import keras
from keras.models import Sequential
from keras.layers import Dense

In [22]:
# define regression model, 1 hidden layers with ten nodes
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [24]:
# predict mean_square error of the model using scikitlearn library 50 times.
from sklearn.metrics import mean_squared_error
df = pd.DataFrame(columns=['mse'])
for i in range(50):
    # build the model 
    model_a = regression_model()
    # fit the model with test set and epochs of 50.
    model_a.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=2)
    # predict using the test set
    y_pred = model_a.predict(X_test)
    # calculate mse
    mse = np.sqrt(mean_squared_error(y_test, y_pred))
    df = df.append({'mse': mse}, ignore_index=True)


mean = df['mse'].mean()
std = df['mse'].std()
print(mean)
print(std)

Epoch 1/50
23/23 - 1s - loss: 545193.6875 - val_loss: 423488.8125
Epoch 2/50
23/23 - 0s - loss: 359987.6250 - val_loss: 270092.0312
Epoch 3/50
23/23 - 0s - loss: 226885.2812 - val_loss: 165320.3281
Epoch 4/50
23/23 - 0s - loss: 137385.7031 - val_loss: 96821.5156
Epoch 5/50
23/23 - 0s - loss: 79920.1562 - val_loss: 53766.6055
Epoch 6/50
23/23 - 0s - loss: 44174.8359 - val_loss: 28455.7148
Epoch 7/50
23/23 - 0s - loss: 23646.5273 - val_loss: 14438.9502
Epoch 8/50
23/23 - 0s - loss: 12610.8818 - val_loss: 7624.2275
Epoch 9/50
23/23 - 0s - loss: 7290.3330 - val_loss: 4780.2402
Epoch 10/50
23/23 - 0s - loss: 5028.3818 - val_loss: 3726.1221
Epoch 11/50
23/23 - 0s - loss: 4155.0073 - val_loss: 3394.9878
Epoch 12/50
23/23 - 0s - loss: 3801.3394 - val_loss: 3295.8042
Epoch 13/50
23/23 - 0s - loss: 3650.5017 - val_loss: 3243.9097
Epoch 14/50
23/23 - 0s - loss: 3561.4941 - val_loss: 3193.5764
Epoch 15/50
23/23 - 0s - loss: 3478.1821 - val_loss: 3135.3240
Epoch 16/50
23/23 - 0s - loss: 3403.2812 -

## PART B (normalizing)

In [25]:
#normalizing the data
X_norm = (X - X.mean()) / X.std()

In [26]:
X_norm

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.551340
3,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
...,...,...,...,...,...,...,...,...
1025,-0.045623,0.487998,0.564271,-0.092126,0.451190,-1.322363,-0.065861,-0.279597
1026,0.392628,-0.856472,0.959602,0.675872,0.702285,-1.993711,0.496651,-0.279597
1027,-1.269472,0.759210,0.850222,0.521336,-0.017520,-1.035561,0.080068,-0.279597
1028,-1.168042,1.307430,-0.846733,-0.279443,0.852942,0.214537,0.191074,-0.279597


In [27]:
# import train_test_split from scikit learn library
from sklearn.model_selection import train_test_split

# split the data into training set and test set by hodling 30% of data for testing.
X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(X_norm, y, test_size=0.3, random_state=42)

In [16]:
n_cols = X_norm.shape[1] # number of predictors

In [17]:
# fit the model with test set and epochs of 50.
from sklearn.metrics import mean_squared_error
df_b = pd.DataFrame(columns=['mse'])
for i in range(50):
    model_b = regression_model()
    model_b.fit(X_train_norm, y_train_norm, validation_data=(X_test_norm, y_test_norm), epochs=50, verbose=2)
    y_pred = model_b.predict(X_test_norm)
    mse = np.sqrt(mean_squared_error(y_test_norm, y_pred))
    df_b = df_b.append({'mse': mse}, ignore_index=True)


mean = df_b['mse'].mean()
std = df_b['mse'].std()
print(mean)
print(std)

Epoch 1/50
23/23 - 9s - loss: 1655.2032 - val_loss: 1550.0781
Epoch 2/50
23/23 - 0s - loss: 1638.9806 - val_loss: 1534.7527
Epoch 3/50
23/23 - 0s - loss: 1623.0950 - val_loss: 1519.9290
Epoch 4/50
23/23 - 0s - loss: 1607.5754 - val_loss: 1505.2317
Epoch 5/50
23/23 - 0s - loss: 1592.1688 - val_loss: 1490.0475
Epoch 6/50
23/23 - 0s - loss: 1576.3540 - val_loss: 1474.8022
Epoch 7/50
23/23 - 0s - loss: 1560.4149 - val_loss: 1459.3048
Epoch 8/50
23/23 - 0s - loss: 1544.0189 - val_loss: 1443.5178
Epoch 9/50
23/23 - 0s - loss: 1527.2524 - val_loss: 1427.0927
Epoch 10/50
23/23 - 0s - loss: 1509.8425 - val_loss: 1410.1725
Epoch 11/50
23/23 - 0s - loss: 1491.8185 - val_loss: 1392.6101
Epoch 12/50
23/23 - 0s - loss: 1473.0079 - val_loss: 1374.3055
Epoch 13/50
23/23 - 0s - loss: 1453.3635 - val_loss: 1355.2124
Epoch 14/50
23/23 - 0s - loss: 1432.9214 - val_loss: 1335.1611
Epoch 15/50
23/23 - 0s - loss: 1411.2325 - val_loss: 1314.2612
Epoch 16/50
23/23 - 0s - loss: 1388.5321 - val_loss: 1292.1045
E

In [18]:
df_b.describe()

Unnamed: 0,mse
count,50.0
mean,19.015944
std,2.38825
min,15.184602
25%,17.347942
50%,18.324296
75%,20.561358
max,25.386336


In [None]:
#y_pred = model_b.predict(X_test)
#mean_squared_error(y_test, y_pred, squared=False)

## PART C (100 epochs)

In [28]:
# fit the model c  with test set and epochs of 50.

mse_c = []
for i in range(50):
    model_c = regression_model()
    model_c.fit(X_train_norm, y_train_norm, validation_data=(X_test_norm, y_test_norm), epochs=100, verbose=2)
    y_pred = model_c.predict(X_test_norm)
    mse = np.sqrt(mean_squared_error(y_test_norm, y_pred))
    mse_c.append(mse)


mean = np.mean(mse_c)
std = np.std(mse_c)
print(mean)
print(std)

Epoch 1/100
23/23 - 0s - loss: 1607.8800 - val_loss: 1502.1530
Epoch 2/100
23/23 - 0s - loss: 1591.8374 - val_loss: 1486.9197
Epoch 3/100
23/23 - 0s - loss: 1576.3031 - val_loss: 1472.1790
Epoch 4/100
23/23 - 0s - loss: 1561.1753 - val_loss: 1457.5812
Epoch 5/100
23/23 - 0s - loss: 1546.2871 - val_loss: 1442.8923
Epoch 6/100
23/23 - 0s - loss: 1531.2761 - val_loss: 1428.2578
Epoch 7/100
23/23 - 0s - loss: 1516.1317 - val_loss: 1413.2885
Epoch 8/100
23/23 - 0s - loss: 1500.8450 - val_loss: 1397.8341
Epoch 9/100
23/23 - 0s - loss: 1485.1414 - val_loss: 1381.9098
Epoch 10/100
23/23 - 0s - loss: 1468.7628 - val_loss: 1365.8303
Epoch 11/100
23/23 - 0s - loss: 1451.8668 - val_loss: 1349.5068
Epoch 12/100
23/23 - 0s - loss: 1434.7216 - val_loss: 1332.2021
Epoch 13/100
23/23 - 0s - loss: 1416.8579 - val_loss: 1314.2533
Epoch 14/100
23/23 - 0s - loss: 1398.1171 - val_loss: 1295.7125
Epoch 15/100
23/23 - 0s - loss: 1378.6793 - val_loss: 1276.6969
Epoch 16/100
23/23 - 0s - loss: 1358.7928 - val_l

## PART D new model

In [29]:
# define new regression model, 3 hidden layers with ten nodes
def regression_model_new():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [30]:


mse_d = []
for i in range(50):
    model_d = regression_model_new()
    model_d.fit(X_train_norm, y_train_norm, validation_data=(X_test_norm, y_test_norm), epochs=100, verbose=2)
    y_pred = model_c.predict(X_test_norm)
    mse = np.sqrt(mean_squared_error(y_test_norm, y_pred))
    mse_d.append(mse)


mean = np.mean(mse_d)
std = np.std(mse_d)
print(mean)
print(std)

Epoch 1/100
23/23 - 1s - loss: 1592.0779 - val_loss: 1493.7350
Epoch 2/100
23/23 - 0s - loss: 1579.3646 - val_loss: 1478.4890
Epoch 3/100
23/23 - 0s - loss: 1561.0042 - val_loss: 1454.8352
Epoch 4/100
23/23 - 0s - loss: 1531.6555 - val_loss: 1416.8896
Epoch 5/100
23/23 - 0s - loss: 1485.5496 - val_loss: 1359.0017
Epoch 6/100
23/23 - 0s - loss: 1414.2941 - val_loss: 1271.4851
Epoch 7/100
23/23 - 0s - loss: 1309.4646 - val_loss: 1148.7721
Epoch 8/100
23/23 - 0s - loss: 1162.5997 - val_loss: 984.8154
Epoch 9/100
23/23 - 0s - loss: 972.7263 - val_loss: 786.6211
Epoch 10/100
23/23 - 0s - loss: 757.0618 - val_loss: 580.1639
Epoch 11/100
23/23 - 0s - loss: 543.5704 - val_loss: 405.4917
Epoch 12/100
23/23 - 0s - loss: 380.0921 - val_loss: 294.0445
Epoch 13/100
23/23 - 0s - loss: 281.3748 - val_loss: 245.4207
Epoch 14/100
23/23 - 0s - loss: 238.2183 - val_loss: 227.6821
Epoch 15/100
23/23 - 0s - loss: 218.6203 - val_loss: 216.1747
Epoch 16/100
23/23 - 0s - loss: 206.1926 - val_loss: 207.0308
Ep