In [34]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import statistics 

In [3]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [4]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [7]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']

In [12]:
n_cols = predictors.shape[1]

pandas.core.frame.DataFrame

In [14]:
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu',input_shape=(n_cols,)))
    model.add(Dense(1))
    
    model.compile(optimizer='adam',loss='mean_squared_error')
    return model

**A. Build a baseline model**

In [32]:
model = regression_model()
error_scores = []

for iteration in range(1,50):
    x_train, x_test, y_train, y_test = train_test_split(predictors,target,test_size=0.3)
    model.fit(X_train,y_train,validation_split=0.3,epochs=50, verbose=0)
    predictions = model.predict(x_test)
    error_scores.append(mean_squared_error(predictions,y_test))

In [46]:
print('The mean and standard deviation of the mean squared errors are {:2f} and {:2f}, respectively.'.format(statistics.mean(error_scores),statistics.pstdev(error_scores)))

The mean and standard deviation of the mean squared errors are 278.959100 and 25.015408, respectively.


**B. Normalize the Data**

In [51]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
target_norm = (target - statistics.mean(target)) / statistics.pstdev(target)

In [54]:
model_norm = regression_model()
error_scores_norm = []

for iteration in range(1,50):
    x_train, x_test, y_train, y_test = train_test_split(predictors_norm,target_norm,test_size=0.3)
    model_norm.fit(X_train,y_train,validation_split=0.3,epochs=50, verbose=0)
    predictions_norm = model.predict(x_test)
    error_scores_norm.append(mean_squared_error(predictions_norm,y_test))

In [55]:
print('The mean and standard deviation of the mean squared errors are {:2f} and {:2f}, respectively.'.format(statistics.mean(error_scores_norm),statistics.pstdev(error_scores_norm)))

The mean and standard deviation of the mean squared errors are 1.691108 and 0.105143, respectively.


**C. Increase the number of epochs**

In [56]:
model_norm = regression_model()
error_scores_norm = []

for iteration in range(1,50):
    x_train, x_test, y_train, y_test = train_test_split(predictors_norm,target_norm,test_size=0.3)
    model_norm.fit(X_train,y_train,validation_split=0.3,epochs=100, verbose=0)
    predictions_norm = model.predict(x_test)
    error_scores_norm.append(mean_squared_error(predictions_norm,y_test))

In [57]:
print('The mean and standard deviation of the mean squared errors are {:2f} and {:2f}, respectively.'.format(statistics.mean(error_scores_norm),statistics.pstdev(error_scores_norm)))

The mean and standard deviation of the mean squared errors are 1.653146 and 0.098345, respectively.


**D. Increase the number of hidden layers**

In [58]:
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu',input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu',input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu',input_shape=(n_cols,)))
    model.add(Dense(1))
    
    model.compile(optimizer='adam',loss='mean_squared_error')
    return model

In [59]:
model_norm = regression_model()
error_scores_norm = []

for iteration in range(1,50):
    x_train, x_test, y_train, y_test = train_test_split(predictors_norm,target_norm,test_size=0.3)
    model_norm.fit(X_train,y_train,validation_split=0.3,epochs=50, verbose=0)
    predictions_norm = model.predict(x_test)
    error_scores_norm.append(mean_squared_error(predictions_norm,y_test))

In [60]:
print('The mean and standard deviation of the mean squared errors are {:2f} and {:2f}, respectively.'.format(statistics.mean(error_scores_norm),statistics.pstdev(error_scores_norm)))

The mean and standard deviation of the mean squared errors are 1.655310 and 0.097246, respectively.
