In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense

In [7]:
#Part A

# Read the data
df = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')

# Define predictors and target
X = df.drop(columns=['Strength'])
y = df['Strength']

# Create a list of mean squared errors
mse_list = []

for _ in range(50):
    # Split the data using train_test_split function
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)
    
    # Build the model
    model = Sequential()
    model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)
    
    
# Report mean and standard deviation

mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean MSE: {mean_mse}")
print(f"Standard deviation of MSE: {std_mse}")





2024-07-17 09:00:19.006625: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
2024-07-17 09:00:19.011416: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2394285000 Hz
2024-07-17 09:00:19.012002: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x563dc2cb9070 executing computations on platform Host. Devices:
2024-07-17 09:00:19.012063: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>


Mean MSE: 475.5199922967593
Standard deviation of MSE: 1053.2755765559464


In [None]:
# Part B


# Normalize the data by subtracting the mean and dividing by standard deviation

X_normalized = (X - X.mean()) / X.std()

# Create a list of mean squared errors
mse_list_normalized = []

for _ in range(50):
    # Split the data using train_test_split function
    X_tran, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=None)
    
    # Build the model
    model = Sequential()
    model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    #Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list_normalized.append(mse)
    
# Report mean and standard deviation

mean_mse_normalized = np.mean(mse_list_normalized)
std_mse_normalized = np.std(mse_list_normalized)

print(f"Mean MSE (Normalized): {mean_mse_normalized}")
print(f"Standard Deviation of MSE (Normalized): {std_mse_normalized}")


"""
Normalization in this case resulted in a higher mean MSE compared to the baseline. 
This suggests that the model did not benefit from normalization, which might be due to the nature of the data.
The standard deviation of the MSE significantly decreased, indicating that normalization made
the training process more stable and consistent across different runs
"""

In [None]:
# Part C

# Create a list of mean squared errors
mse_list_100_epochs = []

for _ in range(50):
    # Split the data using train_test_split function
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=None)
    
    # Build the model
    
    model = Sequential()
    model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model increasing epochs to 100
    model.fit(X_train, y_train, epochs=100, verbose=0)
    
    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list_100_epochs.append(mse)
    
# Report mean and standard deviation
mean_mse_100_epochs = np.mean(mse_list_100_epochs)
std_mse_100_epochs = np.std(mse_list_100_epochs)

print(f"Mean MSE (Normalized, 100 Epochs): {mean_mse_100_epochs}")
print(f"Standard Deviation of MSE (Normalized, 100 Epochs): {std_mse_100_epochs}")

"""
Training the model for 100 epochs led to a dramatic reduction in the mean MSE. 
The model was able to learn much better from the data given more epochs, which allowed for a more thorough training process.
The standard deviation of the MSE further decreased, indicating very consistent performance across different runs. 
This demonstrates that with more epochs, the model converges better and produces more reliable results.
"""

In [15]:
# Part D

# Create a list of mean squared errors
mse_list_3_hidden_layers = []

for _ in range(50):
    # Split the data using train_test_split function
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=None)
    
    # Build the model increasing number of hidden layers to 3
    model = Sequential()
    model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list_3_hidden_layers.append(mse)
    
# Report mean and standard deviation
mean_mse_3_hidden_layers = np.mean(mse_list_3_hidden_layers)
std_mse_3_hidden_layers = np.std(mse_list_3_hidden_layers)

print(f"Mean MSE (Normalized, 3 Hidden Layers): {mean_mse_3_hidden_layers}")
print(f"Standard Deviation of MSE (Normalized, 3 Hidden Layers): {std_mse_3_hidden_layers}")
        
"""

"""

Mean MSE (Normalized, 3 Hidden Layers): 125.77561950166174
Standard Deviation of MSE (Normalized, 3 Hidden Layers): 20.09355128353075
