<a href="https://colab.research.google.com/github/widchy95/deep-learning-challenge/blob/main/AlphabetSoupCharity_Optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

# Load data
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
application_df = pd.read_csv(url)

# Drop irrelevant columns
application_df = application_df.drop(columns=["EIN", "NAME"])

In [16]:
# Bin APPLICATION_TYPE
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = application_type_counts[application_type_counts < 1000].index.tolist()
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, "Other")

# Bin CLASSIFICATION
classification_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = classification_counts[classification_counts < 1000].index.tolist()
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, "Other")

In [17]:
# One-hot encode categorical variables
application_df = pd.get_dummies(application_df)

In [18]:
# Split features and target
X = application_df.drop(columns=["IS_SUCCESSFUL"])
y = application_df["IS_SUCCESSFUL"]

In [20]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Attempt 1: Increase Neurons and Layers

In [21]:
# Define model
nn1 = tf.keras.models.Sequential()
nn1.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=X_train_scaled.shape[1]))
nn1.add(tf.keras.layers.Dense(units=80, activation="relu"))
nn1.add(tf.keras.layers.Dense(units=50, activation="relu"))
nn1.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile and train model
nn1.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history1 = nn1.fit(X_train_scaled, y_train, epochs=50, batch_size=32, verbose=2)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


858/858 - 3s - 3ms/step - accuracy: 0.7201 - loss: 0.5693
Epoch 2/50
858/858 - 3s - 4ms/step - accuracy: 0.7289 - loss: 0.5576
Epoch 3/50
858/858 - 2s - 2ms/step - accuracy: 0.7303 - loss: 0.5548
Epoch 4/50
858/858 - 3s - 3ms/step - accuracy: 0.7325 - loss: 0.5527
Epoch 5/50
858/858 - 3s - 3ms/step - accuracy: 0.7332 - loss: 0.5510
Epoch 6/50
858/858 - 1s - 2ms/step - accuracy: 0.7341 - loss: 0.5498
Epoch 7/50
858/858 - 3s - 3ms/step - accuracy: 0.7337 - loss: 0.5501
Epoch 8/50
858/858 - 2s - 2ms/step - accuracy: 0.7339 - loss: 0.5489
Epoch 9/50
858/858 - 2s - 2ms/step - accuracy: 0.7348 - loss: 0.5484
Epoch 10/50
858/858 - 2s - 2ms/step - accuracy: 0.7340 - loss: 0.5480
Epoch 11/50
858/858 - 3s - 3ms/step - accuracy: 0.7352 - loss: 0.5473
Epoch 12/50
858/858 - 3s - 3ms/step - accuracy: 0.7354 - loss: 0.5466
Epoch 13/50
858/858 - 3s - 3ms/step - accuracy: 0.7355 - loss: 0.5469
Epoch 14/50
858/858 - 3s - 3ms/step - accuracy: 0.7354 - loss: 0.5460
Epoch 15/50
858/858 - 2s - 2ms/step - ac

In [22]:
# Evaluate model
loss1, accuracy1 = nn1.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Attempt 1 - Loss: {loss1}, Accuracy: {accuracy1}")

215/215 - 0s - 2ms/step - accuracy: 0.7262 - loss: 0.5744
Attempt 1 - Loss: 0.5744023323059082, Accuracy: 0.7262390851974487


# Attempt 2: Adjust Activation Functions and Add Dropout

In [23]:
# Define model
nn2 = tf.keras.models.Sequential()
nn2.add(tf.keras.layers.Dense(units=128, activation="tanh", input_dim=X_train_scaled.shape[1]))
nn2.add(tf.keras.layers.Dropout(0.2))
nn2.add(tf.keras.layers.Dense(units=64, activation="relu"))
nn2.add(tf.keras.layers.Dense(units=32, activation="relu"))
nn2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile and train model
nn2.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history2 = nn2.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=2)

Epoch 1/100
858/858 - 3s - 3ms/step - accuracy: 0.7199 - loss: 0.5711
Epoch 2/100
858/858 - 3s - 3ms/step - accuracy: 0.7258 - loss: 0.5609
Epoch 3/100
858/858 - 1s - 2ms/step - accuracy: 0.7287 - loss: 0.5579
Epoch 4/100
858/858 - 1s - 2ms/step - accuracy: 0.7293 - loss: 0.5565
Epoch 5/100
858/858 - 2s - 2ms/step - accuracy: 0.7295 - loss: 0.5553
Epoch 6/100
858/858 - 2s - 2ms/step - accuracy: 0.7301 - loss: 0.5534
Epoch 7/100
858/858 - 2s - 2ms/step - accuracy: 0.7315 - loss: 0.5528
Epoch 8/100
858/858 - 3s - 3ms/step - accuracy: 0.7312 - loss: 0.5526
Epoch 9/100
858/858 - 3s - 3ms/step - accuracy: 0.7324 - loss: 0.5512
Epoch 10/100
858/858 - 3s - 3ms/step - accuracy: 0.7320 - loss: 0.5512
Epoch 11/100
858/858 - 2s - 2ms/step - accuracy: 0.7327 - loss: 0.5513
Epoch 12/100
858/858 - 3s - 3ms/step - accuracy: 0.7338 - loss: 0.5509
Epoch 13/100
858/858 - 2s - 2ms/step - accuracy: 0.7323 - loss: 0.5500
Epoch 14/100
858/858 - 3s - 3ms/step - accuracy: 0.7349 - loss: 0.5497
Epoch 15/100
85

In [24]:
# Evaluate model
loss2, accuracy2 = nn2.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Attempt 2 - Loss: {loss2}, Accuracy: {accuracy2}")

215/215 - 0s - 2ms/step - accuracy: 0.7271 - loss: 0.5675
Attempt 2 - Loss: 0.5674978494644165, Accuracy: 0.7271137237548828


# Attempt 3: Longer Training with Early Stopping

In [25]:
# Define model
nn3 = tf.keras.models.Sequential()
nn3.add(tf.keras.layers.Dense(units=150, activation="relu", input_dim=X_train_scaled.shape[1]))
nn3.add(tf.keras.layers.Dense(units=100, activation="relu"))
nn3.add(tf.keras.layers.Dense(units=50, activation="relu"))
nn3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile model
nn3.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Add EarlyStopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=5)

# Train model
history3 = nn3.fit(X_train_scaled, y_train, epochs=200, batch_size=32, callbacks=[early_stopping], verbose=2)

Epoch 1/200
858/858 - 3s - 4ms/step - accuracy: 0.7215 - loss: 0.5714
Epoch 2/200
858/858 - 1s - 2ms/step - accuracy: 0.7302 - loss: 0.5569
Epoch 3/200
858/858 - 4s - 5ms/step - accuracy: 0.7303 - loss: 0.5543
Epoch 4/200
858/858 - 4s - 4ms/step - accuracy: 0.7325 - loss: 0.5526
Epoch 5/200
858/858 - 1s - 2ms/step - accuracy: 0.7319 - loss: 0.5520
Epoch 6/200
858/858 - 3s - 3ms/step - accuracy: 0.7342 - loss: 0.5508
Epoch 7/200
858/858 - 1s - 2ms/step - accuracy: 0.7328 - loss: 0.5505
Epoch 8/200
858/858 - 2s - 2ms/step - accuracy: 0.7330 - loss: 0.5493
Epoch 9/200
858/858 - 2s - 3ms/step - accuracy: 0.7334 - loss: 0.5493
Epoch 10/200
858/858 - 2s - 2ms/step - accuracy: 0.7348 - loss: 0.5477
Epoch 11/200
858/858 - 1s - 2ms/step - accuracy: 0.7340 - loss: 0.5475
Epoch 12/200
858/858 - 3s - 3ms/step - accuracy: 0.7357 - loss: 0.5468
Epoch 13/200
858/858 - 3s - 3ms/step - accuracy: 0.7350 - loss: 0.5465
Epoch 14/200
858/858 - 1s - 2ms/step - accuracy: 0.7354 - loss: 0.5462
Epoch 15/200
85

In [26]:
# Evaluate model
loss3, accuracy3 = nn3.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Attempt 3 - Loss: {loss3}, Accuracy: {accuracy3}")

215/215 - 0s - 2ms/step - accuracy: 0.7290 - loss: 0.6031
Attempt 3 - Loss: 0.6031057834625244, Accuracy: 0.7290087342262268


In [27]:
# Save the best model (nn3)
nn3.save("AlphabetSoupCharity_Optimization.h5")

