<a href="https://colab.research.google.com/github/widchy95/deep-learning-challenge/blob/main/AlphabetSoupCharity_Optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

# Load data
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
application_df = pd.read_csv(url)

# Drop irrelevant columns
application_df = application_df.drop(columns=["EIN", "NAME"])

In [2]:
# Bin APPLICATION_TYPE
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = application_type_counts[application_type_counts < 1000].index.tolist()
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, "Other")

# Bin CLASSIFICATION
classification_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = classification_counts[classification_counts < 1000].index.tolist()
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, "Other")

In [3]:
# One-hot encode categorical variables
application_df = pd.get_dummies(application_df)

In [4]:
# Split features and target
X = application_df.drop(columns=["IS_SUCCESSFUL"])
y = application_df["IS_SUCCESSFUL"]

In [5]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Attempt 1: Increase Neurons and Layers

In [6]:
# Define model
nn1 = tf.keras.models.Sequential()
nn1.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=X_train_scaled.shape[1]))
nn1.add(tf.keras.layers.Dense(units=80, activation="relu"))
nn1.add(tf.keras.layers.Dense(units=50, activation="relu"))
nn1.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile and train model
nn1.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history1 = nn1.fit(X_train_scaled, y_train, epochs=50, batch_size=32, verbose=2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
858/858 - 5s - 5ms/step - accuracy: 0.7199 - loss: 0.5727
Epoch 2/50
858/858 - 2s - 2ms/step - accuracy: 0.7302 - loss: 0.5580
Epoch 3/50
858/858 - 1s - 2ms/step - accuracy: 0.7304 - loss: 0.5545
Epoch 4/50
858/858 - 2s - 2ms/step - accuracy: 0.7304 - loss: 0.5532
Epoch 5/50
858/858 - 2s - 2ms/step - accuracy: 0.7321 - loss: 0.5523
Epoch 6/50
858/858 - 2s - 2ms/step - accuracy: 0.7325 - loss: 0.5505
Epoch 7/50
858/858 - 2s - 2ms/step - accuracy: 0.7346 - loss: 0.5501
Epoch 8/50
858/858 - 1s - 2ms/step - accuracy: 0.7340 - loss: 0.5493
Epoch 9/50
858/858 - 1s - 2ms/step - accuracy: 0.7344 - loss: 0.5487
Epoch 10/50
858/858 - 1s - 2ms/step - accuracy: 0.7348 - loss: 0.5477
Epoch 11/50
858/858 - 1s - 2ms/step - accuracy: 0.7355 - loss: 0.5474
Epoch 12/50
858/858 - 1s - 2ms/step - accuracy: 0.7349 - loss: 0.5474
Epoch 13/50
858/858 - 1s - 2ms/step - accuracy: 0.7345 - loss: 0.5469
Epoch 14/50
858/858 - 1s - 2ms/step - accuracy: 0.7356 - loss: 0.5464
Epoch 15/50
858/858 - 1s - 2m

In [7]:
# Evaluate model
loss1, accuracy1 = nn1.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Attempt 1 - Loss: {loss1}, Accuracy: {accuracy1}")

215/215 - 1s - 3ms/step - accuracy: 0.7265 - loss: 0.5698
Attempt 1 - Loss: 0.5697861313819885, Accuracy: 0.7265306115150452


# Attempt 2: Adjust Activation Functions and Add Dropout

In [8]:
# Define model
nn2 = tf.keras.models.Sequential()
nn2.add(tf.keras.layers.Dense(units=128, activation="tanh", input_dim=X_train_scaled.shape[1]))
nn2.add(tf.keras.layers.Dropout(0.2))
nn2.add(tf.keras.layers.Dense(units=64, activation="relu"))
nn2.add(tf.keras.layers.Dense(units=32, activation="relu"))
nn2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile and train model
nn2.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history2 = nn2.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=2)

Epoch 1/100
858/858 - 3s - 4ms/step - accuracy: 0.7188 - loss: 0.5711
Epoch 2/100
858/858 - 2s - 2ms/step - accuracy: 0.7275 - loss: 0.5601
Epoch 3/100
858/858 - 2s - 2ms/step - accuracy: 0.7294 - loss: 0.5571
Epoch 4/100
858/858 - 2s - 2ms/step - accuracy: 0.7315 - loss: 0.5563
Epoch 5/100
858/858 - 2s - 2ms/step - accuracy: 0.7305 - loss: 0.5545
Epoch 6/100
858/858 - 2s - 2ms/step - accuracy: 0.7312 - loss: 0.5537
Epoch 7/100
858/858 - 2s - 2ms/step - accuracy: 0.7309 - loss: 0.5525
Epoch 8/100
858/858 - 2s - 2ms/step - accuracy: 0.7326 - loss: 0.5523
Epoch 9/100
858/858 - 2s - 2ms/step - accuracy: 0.7324 - loss: 0.5516
Epoch 10/100
858/858 - 2s - 2ms/step - accuracy: 0.7329 - loss: 0.5505
Epoch 11/100
858/858 - 2s - 2ms/step - accuracy: 0.7329 - loss: 0.5505
Epoch 12/100
858/858 - 2s - 2ms/step - accuracy: 0.7339 - loss: 0.5501
Epoch 13/100
858/858 - 2s - 2ms/step - accuracy: 0.7332 - loss: 0.5501
Epoch 14/100
858/858 - 2s - 2ms/step - accuracy: 0.7345 - loss: 0.5492
Epoch 15/100
85

In [9]:
# Evaluate model
loss2, accuracy2 = nn2.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Attempt 2 - Loss: {loss2}, Accuracy: {accuracy2}")

215/215 - 1s - 3ms/step - accuracy: 0.7284 - loss: 0.5703
Attempt 2 - Loss: 0.570280909538269, Accuracy: 0.7284256815910339


# Attempt 3: Longer Training with Early Stopping

In [10]:
# Define model
nn3 = tf.keras.models.Sequential()
nn3.add(tf.keras.layers.Dense(units=150, activation="relu", input_dim=X_train_scaled.shape[1]))
nn3.add(tf.keras.layers.Dense(units=100, activation="relu"))
nn3.add(tf.keras.layers.Dense(units=50, activation="relu"))
nn3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile model
nn3.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Add EarlyStopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=5)

# Train model
history3 = nn3.fit(X_train_scaled, y_train, epochs=200, batch_size=32, callbacks=[early_stopping], verbose=2)

Epoch 1/200
858/858 - 4s - 5ms/step - accuracy: 0.7228 - loss: 0.5691
Epoch 2/200
858/858 - 2s - 2ms/step - accuracy: 0.7298 - loss: 0.5580
Epoch 3/200
858/858 - 2s - 2ms/step - accuracy: 0.7315 - loss: 0.5545
Epoch 4/200
858/858 - 2s - 2ms/step - accuracy: 0.7323 - loss: 0.5529
Epoch 5/200
858/858 - 2s - 2ms/step - accuracy: 0.7317 - loss: 0.5519
Epoch 6/200
858/858 - 2s - 2ms/step - accuracy: 0.7329 - loss: 0.5506
Epoch 7/200
858/858 - 2s - 2ms/step - accuracy: 0.7340 - loss: 0.5505
Epoch 8/200
858/858 - 2s - 2ms/step - accuracy: 0.7338 - loss: 0.5491
Epoch 9/200
858/858 - 2s - 2ms/step - accuracy: 0.7342 - loss: 0.5486
Epoch 10/200
858/858 - 2s - 2ms/step - accuracy: 0.7342 - loss: 0.5485
Epoch 11/200
858/858 - 2s - 2ms/step - accuracy: 0.7344 - loss: 0.5475
Epoch 12/200
858/858 - 2s - 2ms/step - accuracy: 0.7356 - loss: 0.5479
Epoch 13/200
858/858 - 2s - 2ms/step - accuracy: 0.7347 - loss: 0.5467
Epoch 14/200
858/858 - 2s - 2ms/step - accuracy: 0.7351 - loss: 0.5467
Epoch 15/200
85

In [11]:
# Evaluate model
loss3, accuracy3 = nn3.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Attempt 3 - Loss: {loss3}, Accuracy: {accuracy3}")

215/215 - 1s - 3ms/step - accuracy: 0.7286 - loss: 0.5747
Attempt 3 - Loss: 0.5747330784797668, Accuracy: 0.7285714149475098


In [12]:
# Save the best model (nn3)
nn3.save("AlphabetSoupCharity_Optimization.h5")

