# Regression Models with Keras

In [2]:
# To suppress the warning messages due to use of CPU architechture for tensoflow.
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [7]:
import pandas as pd
import numpy as np
import keras
import ssl
import certifi
import urllib.request

import warnings
warnings.simplefilter('ignore', FutureWarning)

## Dataset

The dataset is about the compressive strength of different samples of concrete based on the volumes of the different ingredients that were used to make them. Ingredients include:

* Cement
* Blast furnace slag
* Fly ash
* Water
* Superplasticizer
* Coarse aggregate
* Fine aggregate

In [10]:
filepath='https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv'
ssl_context = ssl.create_default_context(cafile=certifi.where())

# Open the URL with the secure context
with urllib.request.urlopen(filepath, context=ssl_context) as response:
    concrete_data = pd.read_csv(response)
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Check how many data points we have

In [11]:
concrete_data.shape

(1030, 9)

Check how many missing values we have

In [12]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [13]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

Now we split the data into predictors and target

In [18]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']

print(predictors.head())
print('-------')
print(target.head())

   Cement  Blast Furnace Slag  Fly Ash  Water  Superplasticizer  \
0   540.0                 0.0      0.0  162.0               2.5   
1   540.0                 0.0      0.0  162.0               2.5   
2   332.5               142.5      0.0  228.0               0.0   
3   332.5               142.5      0.0  228.0               0.0   
4   198.6               132.4      0.0  192.0               0.0   

   Coarse Aggregate  Fine Aggregate  Age  
0            1040.0           676.0   28  
1            1055.0           676.0   28  
2             932.0           594.0  270  
3             932.0           594.0  365  
4             978.4           825.5  360  
-------
0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64


Finally, the last step is to normalize the data by substracting the mean and dividing by the standard deviation.

In [19]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [23]:
# Let's save the number of predictors to n_cols since we 
# will need this number when building our network.
n_cols = predictors_norm.shape[1]

## Build a Neural Network

In [24]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input


Let's define a function that defines our regression model for us so that we can conveniently call it to create our model.

In [30]:
def regression_model():
  model = Sequential()
  model.add(Input(shape=(n_cols,)))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(1))

  model.compile(optimizer='adam', loss='mean_squared_error')
  return model

## Train and test the network

In [31]:
model = regression_model()
model.fit(predictors_norm, target, validation_split=0.3, epochs=100, verbose=2)

Epoch 1/100
23/23 - 0s - 21ms/step - loss: 1697.9752 - val_loss: 1186.8013
Epoch 2/100
23/23 - 0s - 2ms/step - loss: 1610.6658 - val_loss: 1095.2500
Epoch 3/100
23/23 - 0s - 2ms/step - loss: 1463.1030 - val_loss: 939.0676
Epoch 4/100
23/23 - 0s - 2ms/step - loss: 1213.6877 - val_loss: 716.4503
Epoch 5/100
23/23 - 0s - 2ms/step - loss: 874.0939 - val_loss: 467.4026
Epoch 6/100
23/23 - 0s - 2ms/step - loss: 530.2600 - val_loss: 268.3879
Epoch 7/100
23/23 - 0s - 2ms/step - loss: 311.0789 - val_loss: 182.7302
Epoch 8/100
23/23 - 0s - 2ms/step - loss: 239.1408 - val_loss: 168.0819
Epoch 9/100
23/23 - 0s - 2ms/step - loss: 217.4831 - val_loss: 169.5829
Epoch 10/100
23/23 - 0s - 2ms/step - loss: 204.8132 - val_loss: 167.7389
Epoch 11/100
23/23 - 0s - 2ms/step - loss: 194.8447 - val_loss: 169.2755
Epoch 12/100
23/23 - 0s - 2ms/step - loss: 187.6478 - val_loss: 169.3872
Epoch 13/100
23/23 - 0s - 2ms/step - loss: 182.0750 - val_loss: 168.7930
Epoch 14/100
23/23 - 0s - 2ms/step - loss: 176.7248 -

<keras.src.callbacks.history.History at 0x15f25b230>

## Exercise 1


Now using the same dateset,try to recreate regression model featuring five hidden layers, each with 50 nodes and ReLU activation functions, a single output layer, optimized using the Adam optimizer.

In [33]:
def regression_model_ex():
  model = Sequential()
  model.add(Input(shape=(n_cols,)))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(1))

  model.compile(optimizer='adam', loss='mean_squared_error')
  return model

## Exercise 2


Train and evaluate the model simultaneously using the fit() method by reserving 10% of the data for validation and training the model for 100 epochs

In [34]:
model_ex = regression_model_ex()
model_ex.fit(predictors_norm, target, validation_split=0.1, epochs=100, verbose=2)

Epoch 1/100
29/29 - 1s - 26ms/step - loss: 1376.9922 - val_loss: 626.6790
Epoch 2/100
29/29 - 0s - 2ms/step - loss: 428.2206 - val_loss: 232.9767
Epoch 3/100
29/29 - 0s - 2ms/step - loss: 222.8099 - val_loss: 191.9066
Epoch 4/100
29/29 - 0s - 2ms/step - loss: 196.0170 - val_loss: 186.6780
Epoch 5/100
29/29 - 0s - 2ms/step - loss: 174.8233 - val_loss: 180.7628
Epoch 6/100
29/29 - 0s - 4ms/step - loss: 169.8015 - val_loss: 173.8017
Epoch 7/100
29/29 - 0s - 2ms/step - loss: 144.9415 - val_loss: 157.3925
Epoch 8/100
29/29 - 0s - 2ms/step - loss: 130.7881 - val_loss: 140.7136
Epoch 9/100
29/29 - 0s - 2ms/step - loss: 116.1257 - val_loss: 121.1679
Epoch 10/100
29/29 - 0s - 2ms/step - loss: 103.9326 - val_loss: 113.8750
Epoch 11/100
29/29 - 0s - 2ms/step - loss: 94.2289 - val_loss: 88.6700
Epoch 12/100
29/29 - 0s - 2ms/step - loss: 84.6326 - val_loss: 89.7174
Epoch 13/100
29/29 - 0s - 2ms/step - loss: 77.4291 - val_loss: 69.5840
Epoch 14/100
29/29 - 0s - 2ms/step - loss: 69.2429 - val_loss: 6

<keras.src.callbacks.history.History at 0x15e8cdbd0>

Based on the results, we notice that:

- Adding more hidden layers to the model increases its capacity to learn and represent complex relationships within the data. This allows the model to better identify, as a result, the model becomes more effective at fitting the training data and potentially improving its predictions.
- By reducing the proportion of data set aside for validation and using a larger portion for training, the model has access to more examples to learn from. This additional training data helps the model improve its understanding of the underlying trends, which can lead to better overall performance.  
