In [2]:
# Import dependencies
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Load the data
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Drop unnecessary columns
application_df = application_df.drop(columns=['EIN', 'NAME'])

# Identify columns with more than 10 unique values and bin rare categories
for column in application_df.columns:
    if application_df[column].dtype == 'object' and application_df[column].nunique() > 10:  # Ensure it is categorical
        value_counts = application_df[column].value_counts()
        rare_values = value_counts[value_counts < 10].index  # Identify rare values
        # Replace values with <10 occurrences
        application_df[column] = application_df[column].replace(rare_values, 'Other')

# One-hot encode categorical variables
application_df_encoded = pd.get_dummies(application_df)

# Define features (X) and target (y)
X = application_df_encoded.drop(columns=['IS_SUCCESSFUL'])  # Features
y = application_df_encoded['IS_SUCCESSFUL']  # Target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test_scaled = scaler.transform(X_test)  # Only transform test data using the training scaler




In [3]:
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the number of input features
number_input_features = X_train_scaled.shape[1]

# Define the model architecture
nn = tf.keras.models.Sequential()

# Input layer and first hidden layer (64 nodes, relu activation)
nn.add(tf.keras.layers.Dense(units=64, input_dim=number_input_features, activation='relu'))

# Second hidden layer (32 nodes, relu activation)
nn.add(tf.keras.layers.Dense(units=32, activation='relu'))

# Third hidden layer (16 nodes, sigmoid activation for variety)
nn.add(tf.keras.layers.Dense(units=16, activation='sigmoid'))

# Fourth hidden layer (8 nodes, relu activation)
nn.add(tf.keras.layers.Dense(units=8, activation='relu'))

# Output layer (1 node, sigmoid activation for binary classification)
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
model_checkpoint = ModelCheckpoint('AlphabetSoupCharity_Optimization.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)

# Train the model
history = nn.fit(
    X_train_scaled, y_train, 
    epochs=100, 
    batch_size=32, 
    validation_split=0.2, 
    callbacks=[early_stopping, model_checkpoint], 
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m682/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.6950 - loss: 0.5972
Epoch 1: val_loss improved from inf to 0.54531, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.6952 - loss: 0.5970 - val_accuracy: 0.7374 - val_loss: 0.5453
Epoch 2/100
[1m671/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7296 - loss: 0.5524
Epoch 2: val_loss did not improve from 0.54531
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7296 - loss: 0.5524 - val_accuracy: 0.7383 - val_loss: 0.5460
Epoch 3/100
[1m655/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7284 - loss: 0.5540
Epoch 3: val_loss improved from 0.54531 to 0.54277, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7285 - loss: 0.5538 - val_accuracy: 0.7416 - val_loss: 0.5428
Epoch 4/100
[1m661/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7323 - loss: 0.5475
Epoch 4: val_loss improved from 0.54277 to 0.54127, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7322 - loss: 0.5476 - val_accuracy: 0.7440 - val_loss: 0.5413
Epoch 5/100
[1m666/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7334 - loss: 0.5469
Epoch 5: val_loss improved from 0.54127 to 0.54044, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7333 - loss: 0.5469 - val_accuracy: 0.7403 - val_loss: 0.5404
Epoch 6/100
[1m681/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7387 - loss: 0.5367
Epoch 6: val_loss did not improve from 0.54044
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7386 - loss: 0.5368 - val_accuracy: 0.7323 - val_loss: 0.5432
Epoch 7/100
[1m683/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7336 - loss: 0.5472
Epoch 7: val_loss did not improve from 0.54044
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7336 - loss: 0.5472 - val_accuracy: 0.7402 - val_loss: 0.5436
Epoch 8/100
[1m676/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7311 - loss: 0.5474
Epoch 8: val_loss did not improve from 0.54044
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[



[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7328 - loss: 0.5469 - val_accuracy: 0.7414 - val_loss: 0.5404
Epoch 11/100
[1m685/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7345 - loss: 0.5415
Epoch 11: val_loss did not improve from 0.54039
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7345 - loss: 0.5415 - val_accuracy: 0.7321 - val_loss: 0.5414
Epoch 12/100
[1m685/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7297 - loss: 0.5448
Epoch 12: val_loss did not improve from 0.54039
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7297 - loss: 0.5448 - val_accuracy: 0.7382 - val_loss: 0.5418
Epoch 13/100
[1m680/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7338 - loss: 0.5407
Epoch 13: val_loss did not improve from 0.54039
[1m686/686[0m [32m━━━━━━━━━━━━━━━━



[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7345 - loss: 0.5399 - val_accuracy: 0.7411 - val_loss: 0.5393
Epoch 16/100
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7293 - loss: 0.5454
Epoch 16: val_loss did not improve from 0.53927
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.7293 - loss: 0.5454 - val_accuracy: 0.7411 - val_loss: 0.5411
Epoch 17/100
[1m680/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7365 - loss: 0.5404
Epoch 17: val_loss did not improve from 0.53927
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7364 - loss: 0.5404 - val_accuracy: 0.7418 - val_loss: 0.5410
Epoch 18/100
[1m676/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.7383 - loss: 0.5350
Epoch 18: val_loss did not improve from 0.53927
[1m686/686[0m [32m━━━━━━━━━━━━━━━━



[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7348 - loss: 0.5373 - val_accuracy: 0.7429 - val_loss: 0.5392
Epoch 24/100
[1m683/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7446 - loss: 0.5295
Epoch 24: val_loss did not improve from 0.53922
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7445 - loss: 0.5296 - val_accuracy: 0.7383 - val_loss: 0.5416
Epoch 25/100
[1m685/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7377 - loss: 0.5362
Epoch 25: val_loss improved from 0.53922 to 0.53918, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7377 - loss: 0.5362 - val_accuracy: 0.7398 - val_loss: 0.5392
Epoch 26/100
[1m670/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7381 - loss: 0.5406
Epoch 26: val_loss did not improve from 0.53918
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7381 - loss: 0.5405 - val_accuracy: 0.7405 - val_loss: 0.5407
Epoch 27/100
[1m678/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7361 - loss: 0.5362
Epoch 27: val_loss did not improve from 0.53918
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7361 - loss: 0.5362 - val_accuracy: 0.7369 - val_loss: 0.5422
Epoch 28/100
[1m682/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.7367 - loss: 0.5356
Epoch 28: val_loss did not improve from 0.53918
[1m686/686[0m [32m━━━━━━━━━━━━━━━━

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score  # Importing for better clarity in accuracy calculation

# Initialize and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)  # Train the model on the scaled data

# Make predictions on the test set
y_pred = rf_model.predict(X_test_scaled)  # Explicitly predict the outcomes for the test data

# Evaluate the model using accuracy_score for more clarity
rf_accuracy = accuracy_score(y_test, y_pred)  # Compare the true and predicted labels

# Print the Random Forest Test Accuracy
print(f"Random Forest Test Accuracy: {rf_accuracy * 100:.2f}%")


Random Forest Test Accuracy: 71.43%


In [7]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Load the charity data
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Drop columns we don’t need (EIN and NAME)
application_df = application_df.drop(columns=['EIN', 'NAME'])

# Turn text columns into numbers (one-hot encoding)
categorical_cols = application_df.select_dtypes(include=["object"]).columns
application_df_encoded = pd.get_dummies(application_df, columns=categorical_cols)

# Separate the features (X) from the target (y)
X = application_df_encoded.drop("IS_SUCCESSFUL", axis=1)
y = application_df_encoded["IS_SUCCESSFUL"]

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data to ensure the model isn't confused by large values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network with more layers and nodes
number_input_features = X_train_scaled.shape[1]  # Number of features in the data

# Build the neural network model
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Dense(units=64, input_dim=number_input_features, activation="relu"))  # First layer: 64 nodes
nn.add(tf.keras.layers.Dense(units=32, activation="relu"))  # Second layer: 32 nodes
nn.add(tf.keras.layers.Dense(units=16, activation="sigmoid"))  # Third layer: 16 nodes, sigmoid for variety
nn.add(tf.keras.layers.Dense(units=8, activation="relu"))  # Fourth layer: 8 nodes
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))  # Output layer: 1 node for binary classification (yes/no)

# Show the model structure
nn.summary()

# Compile the model (set up how it learns)
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Stop training early if performance isn’t improving, and save the best version of the model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
model_checkpoint = ModelCheckpoint("AlphabetSoupCharity_Optimization.h5", save_best_only=True, monitor='val_loss', mode='min', verbose=1)

# Train the model
history = nn.fit(
    X_train_scaled, y_train, 
    epochs=100,  # Train for up to 100 epochs
    validation_split=0.2,  # Use 20% of training data for validation
    callbacks=[early_stopping, model_checkpoint],  # Early stopping and model checkpoint
    verbose=1  # Show progress during training
)

# Check the performance of the model on the test data
test_loss, test_acc = nn.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Neural Network Test Accuracy: {test_acc * 100:.2f}%")

# Try a simpler Random Forest model as a backup
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Evaluate the Random Forest model
rf_accuracy = rf_model.score(X_test_scaled, y_test)
print(f"Random Forest Test Accuracy: {rf_accuracy * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m659/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.6692 - loss: 0.6192
Epoch 1: val_loss improved from inf to 0.54791, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.6709 - loss: 0.6177 - val_accuracy: 0.7403 - val_loss: 0.5479
Epoch 2/100
[1m665/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7286 - loss: 0.5587
Epoch 2: val_loss improved from 0.54791 to 0.54367, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7286 - loss: 0.5586 - val_accuracy: 0.7422 - val_loss: 0.5437
Epoch 3/100
[1m668/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7313 - loss: 0.5498
Epoch 3: val_loss did not improve from 0.54367
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7313 - loss: 0.5498 - val_accuracy: 0.7340 - val_loss: 0.5485
Epoch 4/100
[1m680/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7224 - loss: 0.5538
Epoch 4: val_loss did not improve from 0.54367
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7225 - loss: 0.5537 - val_accuracy: 0.7400 - val_loss: 0.5454
Epoch 5/100
[1m661/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7342 - loss: 0.5418
Epoch 5: val_loss improved from 0.54367 to 0.54329, saving model to AlphabetSoupCharity_Op



[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7340 - loss: 0.5419 - val_accuracy: 0.7413 - val_loss: 0.5433
Epoch 6/100
[1m678/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7326 - loss: 0.5441
Epoch 6: val_loss improved from 0.54329 to 0.54099, saving model to AlphabetSoupCharity_Optimization.h5




[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7326 - loss: 0.5441 - val_accuracy: 0.7407 - val_loss: 0.5410
Epoch 7/100
[1m683/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7319 - loss: 0.5494
Epoch 7: val_loss did not improve from 0.54099
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7319 - loss: 0.5493 - val_accuracy: 0.7403 - val_loss: 0.5422
Epoch 8/100
[1m663/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7301 - loss: 0.5483
Epoch 8: val_loss did not improve from 0.54099
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7301 - loss: 0.5482 - val_accuracy: 0.7400 - val_loss: 0.5433
Epoch 9/100
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7334 - loss: 0.5439
Epoch 9: val_loss did not improve from 0.54099
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[



[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.7348 - loss: 0.5422 - val_accuracy: 0.7411 - val_loss: 0.5405
Epoch 14/100
[1m683/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7360 - loss: 0.5398
Epoch 14: val_loss did not improve from 0.54052
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7360 - loss: 0.5398 - val_accuracy: 0.7329 - val_loss: 0.5471
Epoch 15/100
[1m679/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7351 - loss: 0.5422
Epoch 15: val_loss did not improve from 0.54052
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7351 - loss: 0.5422 - val_accuracy: 0.7400 - val_loss: 0.5450
Epoch 16/100
[1m678/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7347 - loss: 0.5422
Epoch 16: val_loss did not improve from 0.54052
[1m686/686[0m [32m━━━━━━━━━━━━━━━━



[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7297 - loss: 0.5459 - val_accuracy: 0.7425 - val_loss: 0.5392
Epoch 19/100
[1m684/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7350 - loss: 0.5404
Epoch 19: val_loss did not improve from 0.53922
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.7350 - loss: 0.5404 - val_accuracy: 0.7402 - val_loss: 0.5416
Epoch 20/100
[1m677/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7376 - loss: 0.5394
Epoch 20: val_loss did not improve from 0.53922
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7375 - loss: 0.5395 - val_accuracy: 0.7407 - val_loss: 0.5431
Epoch 21/100
[1m659/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.7307 - loss: 0.5458
Epoch 21: val_loss did not improve from 0.53922
[1m686/686[0m [32m━━━━━━━━━━━━━━━━



[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7344 - loss: 0.5378 - val_accuracy: 0.7409 - val_loss: 0.5385
Epoch 28/100
[1m664/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - accuracy: 0.7363 - loss: 0.5375
Epoch 28: val_loss did not improve from 0.53853
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7363 - loss: 0.5376 - val_accuracy: 0.7389 - val_loss: 0.5414
Epoch 29/100
[1m669/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7340 - loss: 0.5397
Epoch 29: val_loss did not improve from 0.53853
[1m686/686[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7341 - loss: 0.5396 - val_accuracy: 0.7402 - val_loss: 0.5411
Epoch 30/100
[1m664/686[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7337 - loss: 0.5417
Epoch 30: val_loss did not improve from 0.53853
[1m686/686[0m [32m━━━━━━━━━━━━━━━━

In [8]:
# Import necessary dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Load the charity data
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

# Drop the non-beneficial ID columns
application_df = application_df.drop(["EIN"], axis=1)

# Check the number of unique values in each column
application_df.nunique()

# Replace NAME values with "Other" for those occurring less than or equal to 5 times
name_counts = application_df['NAME'].value_counts()
names_to_replace = list(name_counts[name_counts <= 5].index)
for app in names_to_replace:
    application_df['NAME'] = application_df['NAME'].replace(app, "Other")

# Check replacement success
application_df['NAME'].value_counts()

# Replace APPLICATION_TYPE values with "Other" for those occurring less than 500 times
application_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = list(application_counts[application_counts < 500].index)
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app, "Other")

# Check replacement success
application_df['APPLICATION_TYPE'].value_counts()

# Replace CLASSIFICATION values with "Other" for those occurring less than 1000 times
class_counts = application_df['CLASSIFICATION'].value_counts()
classes_to_replace = list(class_counts[class_counts < 1000].index)
for cls in classes_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls, "Other")

# Check replacement success
application_df['CLASSIFICATION'].value_counts()

# Generate our categorical variable list
application_cat = application_df.dtypes[application_df.dtypes == "object"].index.tolist()

# Perform one-hot encoding for categorical variables
application_with_dummies_df = pd.get_dummies(application_df)

# Split the preprocessed data into features and target
X = application_with_dummies_df.drop(["IS_SUCCESSFUL"], axis='columns').values
y = application_with_dummies_df["IS_SUCCESSFUL"].values

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Create and fit the StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network model
number_input_features = len(X_train[0])  # Number of features in the data
hidden_nodes_layer1 = 100
hidden_nodes_layer2 = 30
hidden_nodes_layer3 = 10

nn = tf.keras.models.Sequential()
# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="sigmoid"))
# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="sigmoid"))
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Export the trained model to an HDF5 file
nn.save("AlphabetSoupCharity_Optimization.h5")

# Create a random forest classifier as an alternative
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fit the random forest model
rf_model.fit(X_train_scaled, y_train)

# Make predictions with the random forest model
y_pred = rf_model.predict(X_test_scaled)

# Print the accuracy of the random forest model
print(f"Random Forest Model Accuracy: {accuracy_score(y_test, y_pred):.3f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.7244 - loss: 0.5542
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.7942 - loss: 0.4443
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8004 - loss: 0.4313
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7989 - loss: 0.4290
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8011 - loss: 0.4223
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7961 - loss: 0.4285
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7965 - loss: 0.4258
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7987 - loss: 0.4238
Epoch 9/100
[1m804/804[0m [32



Loss: 0.45031166076660156, Accuracy: 0.788688063621521
Random Forest Model Accuracy: 0.776
