# Prelims

Let's get the data downloaded and converted into a dataframe first.

In [5]:
import urllib.request

url = 'https://cocl.us/concrete_data'

urllib.request.urlretrieve(url, 'concrete_data.csv')

('concrete_data.csv', <http.client.HTTPMessage at 0x1d9a4f02b48>)

In [25]:
import pandas as pd
import numpy as np

df = pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# Part A

Baseline model

In [33]:
from sklearn.model_selection import train_test_split

pred_cols = [col for col in df.columns if col != 'Strength']
X = df[pred_cols].to_numpy()
y = df['Strength'].to_numpy()

In [36]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

resultsA = []

for i in range(50):
    print("starting run {}...".format(i))
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3)
    modelA = Sequential()
    modelA.add(Dense(10, activation='relu', input_shape=(X_train.shape[1],)))
    modelA.add(Dense(1))
    modelA.compile(optimizer='adam', loss='mse')

    modelA.fit(X_train, y_train, epochs=50, verbose=0)
    resultsA.append(modelA.evaluate(X_test, y_test, verbose=0))
resultsA = np.array(resultsA)

starting run 0...
starting run 1...
starting run 2...
starting run 3...
starting run 4...
starting run 5...
starting run 6...
starting run 7...
starting run 8...
starting run 9...
starting run 10...
starting run 11...
starting run 12...
starting run 13...
starting run 14...
starting run 15...
starting run 16...
starting run 17...
starting run 18...
starting run 19...
starting run 20...
starting run 21...
starting run 22...
starting run 23...
starting run 24...
starting run 25...
starting run 26...
starting run 27...
starting run 28...
starting run 29...
starting run 30...
starting run 31...
starting run 32...
starting run 33...
starting run 34...
starting run 35...
starting run 36...
starting run 37...
starting run 38...
starting run 39...
starting run 40...
starting run 41...
starting run 42...
starting run 43...
starting run 44...
starting run 45...
starting run 46...
starting run 47...
starting run 48...
starting run 49...


In [38]:

print('Mean of MSEs:', resultsA.mean())
print('Stddev of MSEs:', resultsA.std())

Mean of MSEs: 400.47796578317786
Stddev of MSEs: 423.0974670369794


# Part B

Normalized data

In [39]:
X_normalized = (X - X.mean(axis=0))/X.std(axis=0)

resultsB = []

for i in range(50):
    print("starting run {}...".format(i))
    X_train, X_test, y_train, y_test = train_test_split(X_normalized,y, test_size = 0.3)
    modelA = Sequential()
    modelA.add(Dense(10, activation='relu', input_shape=(X_train.shape[1],)))
    modelA.add(Dense(1))
    modelA.compile(optimizer='adam', loss='mse')

    modelA.fit(X_train, y_train, epochs=50, verbose=0)
    resultsB.append(modelA.evaluate(X_test, y_test, verbose=0))
resultsB = np.array(resultsB)

starting run 0...
starting run 1...
starting run 2...
starting run 3...
starting run 4...
starting run 5...
starting run 6...
starting run 7...
starting run 8...
starting run 9...
starting run 10...
starting run 11...
starting run 12...
starting run 13...
starting run 14...
starting run 15...
starting run 16...
starting run 17...
starting run 18...
starting run 19...
starting run 20...
starting run 21...
starting run 22...
starting run 23...
starting run 24...
starting run 25...
starting run 26...
starting run 27...
starting run 28...
starting run 29...
starting run 30...
starting run 31...
starting run 32...
starting run 33...
starting run 34...
starting run 35...
starting run 36...
starting run 37...
starting run 38...
starting run 39...
starting run 40...
starting run 41...
starting run 42...
starting run 43...
starting run 44...
starting run 45...
starting run 46...
starting run 47...
starting run 48...
starting run 49...


In [45]:

print('Mean of MSEs:', resultsB.mean())
print('Stddev of MSEs:', resultsB.std())

print('Compare to...')
print('Part A MSE mean:', resultsA.mean())
print('Part A MSE std:', resultsA.std())

Mean of MSEs: 327.9287613053306
Stddev of MSEs: 75.50634621907868
Compare to...
Part A MSE mean: 400.47796578317786
Part A MSE std: 423.0974670369794


Normalizing helped decrease the MSE on average and enormously reduced the standard deviation of the prediction error.

# Part C

In [41]:
resultsC = []

for i in range(50):
    print("starting run {}...".format(i))
    X_train, X_test, y_train, y_test = train_test_split(X_normalized,y, test_size = 0.3)
    modelA = Sequential()
    modelA.add(Dense(10, activation='relu', input_shape=(X_train.shape[1],)))
    modelA.add(Dense(1))
    modelA.compile(optimizer='adam', loss='mse')

    modelA.fit(X_train, y_train, epochs=100, verbose=0)
    resultsC.append(modelA.evaluate(X_test, y_test, verbose=0))
resultsC = np.array(resultsC)

starting run 0...
starting run 1...
starting run 2...
starting run 3...
starting run 4...
starting run 5...
starting run 6...
starting run 7...
starting run 8...
starting run 9...
starting run 10...
starting run 11...
starting run 12...
starting run 13...
starting run 14...
starting run 15...
starting run 16...
starting run 17...
starting run 18...
starting run 19...
starting run 20...
starting run 21...
starting run 22...
starting run 23...
starting run 24...
starting run 25...
starting run 26...
starting run 27...
starting run 28...
starting run 29...
starting run 30...
starting run 31...
starting run 32...
starting run 33...
starting run 34...
starting run 35...
starting run 36...
starting run 37...
starting run 38...
starting run 39...
starting run 40...
starting run 41...
starting run 42...
starting run 43...
starting run 44...
starting run 45...
starting run 46...
starting run 47...
starting run 48...
starting run 49...


In [46]:
print('Mean of MSEs:', resultsC.mean())
print('Stddev of MSEs:', resultsC.std())
print('Compare to...')
print('Part B MSE mean:', resultsB.mean())
print('Part B MSE std:', resultsB.std())

Mean of MSEs: 163.45172728948992
Stddev of MSEs: 9.99178536813674
Compare to...
Part B MSE mean: 327.9287613053306
Part B MSE std: 75.50634621907868


Additional runtime halved the MSE and dramatically reduced the standard deviation of the errors.

# Part D

In [43]:
resultsD = []

for i in range(50):
    print("starting run {}...".format(i))
    X_train, X_test, y_train, y_test = train_test_split(X_normalized,y, test_size = 0.3)
    modelD = Sequential()
    modelD.add(Dense(10, activation='relu', input_shape=(X_train.shape[1],)))
    modelD.add(Dense(10, activation='relu'))
    modelD.add(Dense(10, activation='relu'))
    modelD.add(Dense(1))
    modelD.compile(optimizer='adam', loss='mse')

    modelD.fit(X_train, y_train, epochs=50, verbose=0)
    resultsD.append(modelA.evaluate(X_test, y_test, verbose=0))
resultsD = np.array(resultsD)

starting run 0...
starting run 1...
starting run 2...
starting run 3...
starting run 4...
starting run 5...
starting run 6...
starting run 7...
starting run 8...
starting run 9...
starting run 10...
starting run 11...
starting run 12...
starting run 13...
starting run 14...
starting run 15...
starting run 16...
starting run 17...
starting run 18...
starting run 19...
starting run 20...
starting run 21...
starting run 22...
starting run 23...
starting run 24...
starting run 25...
starting run 26...
starting run 27...
starting run 28...
starting run 29...
starting run 30...
starting run 31...
starting run 32...
starting run 33...
starting run 34...
starting run 35...
starting run 36...
starting run 37...
starting run 38...
starting run 39...
starting run 40...
starting run 41...
starting run 42...
starting run 43...
starting run 44...
starting run 45...
starting run 46...
starting run 47...
starting run 48...
starting run 49...


In [47]:
print('Mean of MSEs:', resultsD.mean())
print('Stddev of MSEs:', resultsD.std())
print('Compare to...')
print('Part B MSE mean:', resultsB.mean())
print('Part B MSE std:', resultsB.std())

Mean of MSEs: 156.10441887321596
Stddev of MSEs: 9.878436398598264
Compare to...
Part B MSE mean: 327.9287613053306
Part B MSE std: 75.50634621907868


Adding depth to the neural net has a similar effect as adding training time: the mean MSE is halved and the standard deviation of the error is much smaller.