In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

In [3]:
# Load the dataset
url = "concrete_data.csv"
data = pd.read_csv(url)

In [4]:
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [5]:
# Split data into predictors (X) and target (y)
X = data.drop("Strength", axis=1)
y = data["Strength"]

In [6]:
# Part A: Build Baseline Model
def baseline_model(X, y):
    mse_list = []
    for _ in range(50):
        # Split into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=np.random.randint(0, 100))
        
        # Build the model
        model = Sequential([
            Dense(10, activation='relu', input_shape=(X_train.shape[1],)),
            Dense(1)  # Output layer
        ])
        model.compile(optimizer=Adam(), loss='mean_squared_error')
        
        # Train the model
        model.fit(X_train, y_train, epochs=50, verbose=0)
        
        # Evaluate the model
        y_pred = model.predict(X_test, verbose=0)
        mse = mean_squared_error(y_test, y_pred)
        mse_list.append(mse)
    
    return np.mean(mse_list), np.std(mse_list)

# Part B: Normalize the data
def normalized_model(X, y):
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(X)
    return baseline_model(X_normalized, y)

# Part C: Increase the number of epochs
def increased_epochs_model(X, y):
    mse_list = []
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(X)
    for _ in range(50):
        # Split into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=np.random.randint(0, 100))
        
        # Build the model
        model = Sequential([
            Dense(10, activation='relu', input_shape=(X_train.shape[1],)),
            Dense(1)
        ])
        model.compile(optimizer=Adam(), loss='mean_squared_error')
        
        # Train the model with 100 epochs
        model.fit(X_train, y_train, epochs=100, verbose=0)
        
        # Evaluate the model
        y_pred = model.predict(X_test, verbose=0)
        mse = mean_squared_error(y_test, y_pred)
        mse_list.append(mse)
    
    return np.mean(mse_list), np.std(mse_list)

# Part D: Increase the number of hidden layers
def increased_layers_model(X, y):
    mse_list = []
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(X)
    for _ in range(50):
        # Split into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=np.random.randint(0, 100))
        
        # Build the model with 3 hidden layers
        model = Sequential([
            Dense(10, activation='relu', input_shape=(X_train.shape[1],)),
            Dense(10, activation='relu'),
            Dense(10, activation='relu'),
            Dense(1)
        ])
        model.compile(optimizer=Adam(), loss='mean_squared_error')
        
        # Train the model
        model.fit(X_train, y_train, epochs=50, verbose=0)
        
        # Evaluate the model
        y_pred = model.predict(X_test, verbose=0)
        mse = mean_squared_error(y_test, y_pred)
        mse_list.append(mse)
    
    return np.mean(mse_list), np.std(mse_list)

In [7]:
# Run each part of the project
mean_a, std_a = baseline_model(X, y)
mean_b, std_b = normalized_model(X, y)
mean_c, std_c = increased_epochs_model(X, y)
mean_d, std_d = increased_layers_model(X, y)

In [8]:
# Print the results
print("Part A - Baseline Model: Mean MSE =", mean_a, "Std MSE =", std_a)
print("Part B - Normalized Model: Mean MSE =", mean_b, "Std MSE =", std_b)
print("Part C - Increased Epochs (100): Mean MSE =", mean_c, "Std MSE =", std_c)
print("Part D - Increased Hidden Layers: Mean MSE =", mean_d, "Std MSE =", std_d)

Part A - Baseline Model: Mean MSE = 376.38626342281765 Std MSE = 622.2187937717881
Part B - Normalized Model: Mean MSE = 362.9000539631059 Std MSE = 104.61969134938488
Part C - Increased Epochs (100): Mean MSE = 164.7102415766684 Std MSE = 15.227033912906442
Part D - Increased Hidden Layers: Mean MSE = 130.05372472334145 Std MSE = 16.934747796192468
