## Download and Clean Dataset

Let's start by importing the <em>pandas</em> and the Numpy libraries.

In [4]:
import pandas as pd
import numpy as np

Let's download the data and read it into a <em>pandas</em> dataframe.

In [5]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


## Problem A

#### Split data into predictors and target

The target variable in this problem is the concrete sample strength. Therefore, our predictors will be all the other columns.

In [6]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

### Import Karas 

In [9]:
import keras

Using TensorFlow backend.


In [10]:
from keras.models import Sequential
from keras.layers import Dense

In [27]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [28]:
n_cols = X_train.shape[1]

In [29]:
n_cols

8

In [30]:
model = regression_model()

In [31]:
model.fit(X_train, y_train,validation_data=(X_test, y_test),epochs=50,verbose=2)
scores = model.evaluate(X_test, y_test, verbose=0)

Train on 721 samples, validate on 309 samples
Epoch 1/50
 - 0s - loss: 65618.4432 - val_loss: 49524.3869
Epoch 2/50
 - 0s - loss: 39677.4023 - val_loss: 27688.3178
Epoch 3/50
 - 0s - loss: 20203.6887 - val_loss: 11980.6822
Epoch 4/50
 - 0s - loss: 7720.1759 - val_loss: 3649.4771
Epoch 5/50
 - 0s - loss: 2098.9773 - val_loss: 815.5679
Epoch 6/50
 - 0s - loss: 576.7658 - val_loss: 409.6789
Epoch 7/50
 - 0s - loss: 408.0838 - val_loss: 400.2332
Epoch 8/50
 - 0s - loss: 402.3851 - val_loss: 395.5539
Epoch 9/50
 - 0s - loss: 398.1500 - val_loss: 390.8469
Epoch 10/50
 - 0s - loss: 393.4453 - val_loss: 386.2074
Epoch 11/50
 - 0s - loss: 388.6418 - val_loss: 382.4837
Epoch 12/50
 - 0s - loss: 384.7418 - val_loss: 378.0051
Epoch 13/50
 - 0s - loss: 379.2015 - val_loss: 372.8403
Epoch 14/50
 - 0s - loss: 374.8247 - val_loss: 368.2506
Epoch 15/50
 - 0s - loss: 371.1124 - val_loss: 364.2951
Epoch 16/50
 - 0s - loss: 366.0217 - val_loss: 359.1058
Epoch 17/50
 - 0s - loss: 360.5713 - val_loss: 354.4

In [41]:
from sklearn.metrics import mean_squared_error
y_pred = model.predict(X_test)
mean_squared_error(y_test, y_pred)

690034.0473301428

# Problem B Normalize the data 

In [32]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [33]:
X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)

In [34]:
model.fit(X_train_norm, y_train_norm,validation_data=(X_test_norm, y_test_norm),epochs=50,verbose=2)
scores = model.evaluate(X_test_norm, y_test_norm, verbose=0)

Train on 721 samples, validate on 309 samples
Epoch 1/50
 - 0s - loss: 1617.2603 - val_loss: 1506.3004
Epoch 2/50
 - 0s - loss: 1581.5634 - val_loss: 1477.3604
Epoch 3/50
 - 0s - loss: 1558.1595 - val_loss: 1457.2654
Epoch 4/50
 - 0s - loss: 1540.8604 - val_loss: 1439.7062
Epoch 5/50
 - 0s - loss: 1524.9142 - val_loss: 1423.0531
Epoch 6/50
 - 0s - loss: 1509.2631 - val_loss: 1406.5546
Epoch 7/50
 - 0s - loss: 1493.4781 - val_loss: 1389.9360
Epoch 8/50
 - 0s - loss: 1477.3497 - val_loss: 1373.4669
Epoch 9/50
 - 0s - loss: 1461.0926 - val_loss: 1356.7770
Epoch 10/50
 - 0s - loss: 1444.9514 - val_loss: 1340.0785
Epoch 11/50
 - 0s - loss: 1428.5564 - val_loss: 1323.2554
Epoch 12/50
 - 0s - loss: 1412.0104 - val_loss: 1306.2165
Epoch 13/50
 - 0s - loss: 1395.2854 - val_loss: 1289.3819
Epoch 14/50
 - 0s - loss: 1378.5983 - val_loss: 1272.9324
Epoch 15/50
 - 0s - loss: 1362.2993 - val_loss: 1255.9041
Epoch 16/50
 - 0s - loss: 1345.4372 - val_loss: 1239.2791
Epoch 17/50
 - 0s - loss: 1328.7570

In [42]:
y_pred_norm = model.predict(X_test_norm)
mean_squared_error(y_test_norm, y_pred_norm)

96.73186725804555

### Mean square error decreased compared to A 

# Problem C 

In [43]:
model.fit(X_train_norm, y_train_norm,validation_data=(X_test_norm, y_test_norm),epochs=100,verbose=2)
scores = model.evaluate(X_test_norm, y_test_norm, verbose=0)

Train on 721 samples, validate on 309 samples
Epoch 1/100
 - 0s - loss: 90.2095 - val_loss: 96.5532
Epoch 2/100
 - 0s - loss: 89.9590 - val_loss: 96.3199
Epoch 3/100
 - 0s - loss: 89.6868 - val_loss: 96.1621
Epoch 4/100
 - 0s - loss: 89.4233 - val_loss: 95.8896
Epoch 5/100
 - 0s - loss: 89.1521 - val_loss: 95.7205
Epoch 6/100
 - 0s - loss: 88.8700 - val_loss: 95.5258
Epoch 7/100
 - 0s - loss: 88.6379 - val_loss: 95.3579
Epoch 8/100
 - 0s - loss: 88.3271 - val_loss: 95.1272
Epoch 9/100
 - 0s - loss: 88.1389 - val_loss: 95.0690
Epoch 10/100
 - 0s - loss: 87.9175 - val_loss: 94.6419
Epoch 11/100
 - 0s - loss: 87.6791 - val_loss: 94.5947
Epoch 12/100
 - 0s - loss: 87.3558 - val_loss: 94.3040
Epoch 13/100
 - 0s - loss: 87.1890 - val_loss: 93.9984
Epoch 14/100
 - 0s - loss: 86.8646 - val_loss: 94.0594
Epoch 15/100
 - 0s - loss: 86.6351 - val_loss: 93.8163
Epoch 16/100
 - 0s - loss: 86.4463 - val_loss: 93.6897
Epoch 17/100
 - 0s - loss: 86.2171 - val_loss: 93.5432
Epoch 18/100
 - 0s - loss: 8

In [44]:
y_pred_norm = model.predict(X_test_norm)
mean_squared_error(y_test_norm, y_pred_norm)

79.67727318646112

### Mean square erroe decreased compared to B 

# Problem D 

In [46]:
# define regression model
def regression_model_new():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [47]:
model_new = regression_model_new()

In [48]:
model_new.fit(X_train_norm, y_train_norm,validation_data=(X_test_norm, y_test_norm),epochs=100,verbose=2)
scores = model.evaluate(X_test_norm, y_test_norm, verbose=0)

Train on 721 samples, validate on 309 samples
Epoch 1/100
 - 0s - loss: 1552.8862 - val_loss: 1443.9380
Epoch 2/100
 - 0s - loss: 1517.2082 - val_loss: 1399.9967
Epoch 3/100
 - 0s - loss: 1462.2405 - val_loss: 1331.6108
Epoch 4/100
 - 0s - loss: 1378.4965 - val_loss: 1226.2649
Epoch 5/100
 - 0s - loss: 1252.1612 - val_loss: 1080.9394
Epoch 6/100
 - 0s - loss: 1081.8602 - val_loss: 891.1842
Epoch 7/100
 - 0s - loss: 869.0626 - val_loss: 684.4579
Epoch 8/100
 - 0s - loss: 652.7277 - val_loss: 492.5281
Epoch 9/100
 - 0s - loss: 470.9462 - val_loss: 362.8299
Epoch 10/100
 - 0s - loss: 353.6359 - val_loss: 284.5052
Epoch 11/100
 - 0s - loss: 281.5572 - val_loss: 237.4742
Epoch 12/100
 - 0s - loss: 239.0847 - val_loss: 212.5453
Epoch 13/100
 - 0s - loss: 213.6581 - val_loss: 197.0247
Epoch 14/100
 - 0s - loss: 200.5852 - val_loss: 187.0808
Epoch 15/100
 - 0s - loss: 190.0141 - val_loss: 181.5073
Epoch 16/100
 - 0s - loss: 183.9461 - val_loss: 177.3468
Epoch 17/100
 - 0s - loss: 179.0691 - va

In [49]:
y_pred_norm = model_new.predict(X_test_norm)
mean_squared_error(y_test_norm, y_pred_norm)

106.60239503266381

### Mean square erroe increased compared to B 

<a id="item2"></a>