In [5]:
# Import our dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import numpy as np
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
application_df = pd.read_csv("resources/charity_data.csv")

In [2]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns=['EIN', 'NAME'])

In [3]:
# Determine which values to replace if counts are less than ...?
apptype_counts = application_df['APPLICATION_TYPE'].value_counts()
replace_application = list(apptype_counts[apptype_counts < 500].index)

# Replace in dataframe
for app in replace_application:
    application_df.APPLICATION_TYPE = application_df.APPLICATION_TYPE.replace(app,"Other")

In [4]:
# Determine which values to replace if counts are less than ..?
class_counts = application_df.CLASSIFICATION.value_counts()
replace_class = list(class_counts[class_counts < 1800].index)

# Replace in dataframe
for cls in replace_class:
    application_df.CLASSIFICATION = application_df.CLASSIFICATION.replace(cls,"Other")

In [5]:
# Generate our categorical variable lists
application_cat = application_df.dtypes[application_df.dtypes == 'object'].index.tolist()

In [6]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(application_cat)

In [7]:
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(columns=application_cat)

In [8]:
# Split our preprocessed data into our features and target arrays
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL', 'STATUS']).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [9]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn1 = tf.keras.models.Sequential()

# First hidden layer
nn1.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='relu'))

# Second hidden layer
nn1.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# Output layer
nn1.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn1.summary()

Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 80)                3440      
                                                                 
 dense_1 (Dense)             (None, 30)                2430      
                                                                 
 dense_2 (Dense)             (None, 1)                 31        
                                                                 
Total params: 5,901
Trainable params: 5,901
Non-trainable params: 0
_________________________________________________________________


2022-01-01 16:07:24.366929: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-01-01 16:07:24.367049: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [11]:
# Compile the model
nn1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [12]:
os.makedirs('checkpoints/nn1/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [13]:
# Train the model
fit_model = nn1.fit(X_train_scaled, y_train, epochs=100, callbacks=[cp_callback])

Epoch 1/100


2022-01-01 16:07:24.647063: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-01-01 16:07:24.794324: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 

In [14]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 16:14:48.550367: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 1s - loss: 0.5540 - accuracy: 0.7251 - 909ms/epoch - 3ms/step
Loss: 0.554018497467041, Accuracy: 0.7251311540603638


In [24]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 40
hidden_nodes_layer3 = 80

nn2 = tf.keras.models.Sequential()

# First hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='relu'))

# Second hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# Third hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='relu'))

# Output layer
nn2.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn2.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_26 (Dense)            (None, 160)               6880      
                                                                 
 dense_27 (Dense)            (None, 40)                6440      
                                                                 
 dense_28 (Dense)            (None, 80)                3280      
                                                                 
 dense_29 (Dense)            (None, 1)                 81        
                                                                 
Total params: 16,681
Trainable params: 16,681
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Compile the model
nn2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
os.makedirs('checkpoints/nn2/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [27]:
# Train the model
fit_model = nn2.fit(X_train_scaled, y_train, epochs=100, callbacks=[cp_callback])

Epoch 1/100
  1/804 [..............................] - ETA: 3:52 - loss: 0.7324 - accuracy: 0.3750

2022-01-01 16:45:28.413091: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 

In [28]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 16:53:33.122947: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 1s - loss: 0.5686 - accuracy: 0.7254 - 958ms/epoch - 4ms/step
Loss: 0.5686084032058716, Accuracy: 0.7253644466400146


In [31]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80
hidden_nodes_layer3 = 40
hidden_nodes_layer4 = 20

nn3 = tf.keras.models.Sequential()

# First hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='selu'))

# Second hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='selu'))

# Third hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='selu'))

# Fourth hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation='selu'))

# Output layer
nn3.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn3.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_40 (Dense)            (None, 160)               6880      
                                                                 
 dense_41 (Dense)            (None, 80)                12880     
                                                                 
 dense_42 (Dense)            (None, 40)                3240      
                                                                 
 dense_43 (Dense)            (None, 20)                820       
                                                                 
 dense_44 (Dense)            (None, 1)                 21        
                                                                 
Total params: 23,841
Trainable params: 23,841
Non-trainable params: 0
_________________________________________________________________


In [32]:
# Compile the model
nn3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [33]:
os.makedirs('checkpoints/nn3/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [34]:
# Train the model
fit_model = nn3.fit(X_train_scaled, y_train, epochs=100, callbacks=[cp_callback])

Epoch 1/100
  1/804 [..............................] - ETA: 4:22 - loss: 1.0974 - accuracy: 0.3125

2022-01-01 16:56:48.174381: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 

In [35]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn3.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 17:14:09.590517: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 2s - loss: 0.5563 - accuracy: 0.7266 - 2s/epoch - 6ms/step
Loss: 0.5563240647315979, Accuracy: 0.7266471982002258


In [36]:
# Import and read the charity_data.csv.
application_df = pd.read_csv("resources/charity_data.csv")

In [37]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns=['EIN', 'NAME'])

In [39]:
# Determine which values to replace if counts are less than ...?
apptype_counts = application_df['APPLICATION_TYPE'].value_counts()
replace_application = list(apptype_counts[apptype_counts < 16].index)

# Replace in dataframe
for app in replace_application:
    application_df.APPLICATION_TYPE = application_df.APPLICATION_TYPE.replace(app,"Other")

In [44]:
# Determine which values to replace if counts are less than ..?
class_counts = application_df.CLASSIFICATION.value_counts()
replace_class = list(class_counts[class_counts < 50].index)

# Replace in dataframe
for cls in replace_class:
    application_df.CLASSIFICATION = application_df.CLASSIFICATION.replace(cls,"Other")

In [45]:
# Generate our categorical variable lists
application_cat = application_df.dtypes[application_df.dtypes == 'object'].index.tolist()

In [46]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(application_cat)

In [47]:
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(columns=application_cat)

In [48]:
# Split our preprocessed data into our features and target arrays
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL', 'STATUS']).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [49]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [50]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80
# hidden_nodes_layer3 = 40
# hidden_nodes_layer4 = 20

nn4 = tf.keras.models.Sequential()

# First hidden layer
nn4.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='selu'))

# Second hidden layer
nn4.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='selu'))

# # Third hidden layer
# nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='selu'))

# # Fourth hidden layer
# nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation='selu'))

# Output layer
nn4.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn4.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_45 (Dense)            (None, 160)               9120      
                                                                 
 dense_46 (Dense)            (None, 80)                12880     
                                                                 
 dense_47 (Dense)            (None, 1)                 81        
                                                                 
Total params: 22,081
Trainable params: 22,081
Non-trainable params: 0
_________________________________________________________________


In [51]:
# Compile the model
nn4.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [52]:
os.makedirs('checkpoints/nn4/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [53]:
# Train the model
fit_model = nn4.fit(X_train_scaled, y_train, epochs=50, callbacks=[cp_callback])

Epoch 1/50
  1/804 [..............................] - ETA: 3:47 - loss: 0.7222 - accuracy: 0.4375

2022-01-01 17:25:55.043474: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 00045: saving mode

In [54]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn4.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 17:31:53.733716: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 1s - loss: 0.5526 - accuracy: 0.7273 - 1s/epoch - 5ms/step
Loss: 0.5526371598243713, Accuracy: 0.7273468971252441


In [56]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80
hidden_nodes_layer3 = 40
hidden_nodes_layer4 = 20

nn5 = tf.keras.models.Sequential()

# First hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='relu'))

# Second hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# Third hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='relu'))

# Fourth hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation='relu'))

# Output layer
nn5.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn5.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_49 (Dense)            (None, 160)               9120      
                                                                 
 dense_50 (Dense)            (None, 80)                12880     
                                                                 
 dense_51 (Dense)            (None, 40)                3240      
                                                                 
 dense_52 (Dense)            (None, 20)                820       
                                                                 
 dense_53 (Dense)            (None, 1)                 21        
                                                                 
Total params: 26,081
Trainable params: 26,081
Non-trainable params: 0
_________________________________________________________________


In [57]:
# Compile the model
nn5.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [58]:
os.makedirs('checkpoints/nn5/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [59]:
# Train the model
fit_model = nn5.fit(X_train_scaled, y_train, epochs=50, callbacks=[cp_callback])

Epoch 1/50


2022-01-01 17:43:01.287027: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 00045: saving mode

In [60]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn5.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 17:47:15.937413: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 1s - loss: 0.5600 - accuracy: 0.7256 - 993ms/epoch - 4ms/step
Loss: 0.5600066781044006, Accuracy: 0.7255976796150208


In [19]:
# Import and read the charity_data.csv.
application_df = pd.read_csv("resources/charity_data.csv")

In [20]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns=['EIN', 'NAME'])

In [21]:
# Determine which values to replace if counts are less than ...?
apptype_counts = application_df['APPLICATION_TYPE'].value_counts()
replace_application = list(apptype_counts[apptype_counts < 16].index)

# Replace in dataframe
for app in replace_application:
    application_df.APPLICATION_TYPE = application_df.APPLICATION_TYPE.replace(app,"Other")

In [22]:
# Determine which values to replace if counts are less than ..?
class_counts = application_df.CLASSIFICATION.value_counts()
replace_class = list(class_counts[class_counts < 50].index)

# Replace in dataframe
for cls in replace_class:
    application_df.CLASSIFICATION = application_df.CLASSIFICATION.replace(cls,"Other")

In [24]:
# Determine which values to replace if counts are less than ..?
affil_counts = application_df.AFFILIATION.value_counts()
replace_affil = list(affil_counts[affil_counts < 15000].index)

# Replace in Dataframe
for affils in replace_affil:
    application_df.AFFILIATION = application_df.AFFILIATION.replace(affils,"Other")

In [25]:
# Generate our categorical variable lists
application_cat = application_df.dtypes[application_df.dtypes == 'object'].index.tolist()

In [26]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(application_cat)

In [27]:
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(columns=application_cat)

In [28]:
# Split our preprocessed data into our features and target arrays
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL', 'STATUS']).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [29]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [30]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80
# hidden_nodes_layer3 = 40
# hidden_nodes_layer4 = 20

nn6 = tf.keras.models.Sequential()

# First hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='selu'))

# Second hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='selu'))

# # Third hidden layer
# nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='selu'))

# # Fourth hidden layer
# nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation='selu'))

# Output layer
nn6.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn6.summary()

Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 160)               8640      
                                                                 
 dense_1 (Dense)             (None, 80)                12880     
                                                                 
 dense_2 (Dense)             (None, 1)                 81        
                                                                 
Total params: 21,601
Trainable params: 21,601
Non-trainable params: 0
_________________________________________________________________


2022-01-01 18:17:28.206010: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-01-01 18:17:28.206133: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [31]:
# Compile the model
nn6.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [32]:
os.makedirs('checkpoints/nn6/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [33]:
# Train the model
fit_model = nn6.fit(X_train_scaled, y_train, epochs=50, callbacks=[cp_callback])

2022-01-01 18:17:28.406152: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/50


2022-01-01 18:17:28.550467: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 00045: saving mode

In [34]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn6.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 18:23:20.932771: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 1s - loss: 0.5493 - accuracy: 0.7268 - 1s/epoch - 5ms/step
Loss: 0.5492773652076721, Accuracy: 0.7267638444900513


In [35]:
# Import and read the charity_data.csv.
application_df = pd.read_csv("resources/charity_data.csv")

In [36]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns=['EIN', 'NAME'])

In [37]:
# Determine which values to replace if counts are less than ...?
apptype_counts = application_df['APPLICATION_TYPE'].value_counts()
replace_application = list(apptype_counts[apptype_counts < 150].index)

# Replace in dataframe
for app in replace_application:
    application_df.APPLICATION_TYPE = application_df.APPLICATION_TYPE.replace(app,"Other")

In [38]:
# Determine which values to replace if counts are less than ..?
class_counts = application_df.CLASSIFICATION.value_counts()
replace_class = list(class_counts[class_counts < 700].index)

# Replace in dataframe
for cls in replace_class:
    application_df.CLASSIFICATION = application_df.CLASSIFICATION.replace(cls,"Other")

In [39]:
# Determine which values to replace if counts are less than ..?
affil_counts = application_df.AFFILIATION.value_counts()
replace_affil = list(affil_counts[affil_counts < 15000].index)

# Replace in Dataframe
for affils in replace_affil:
    application_df.AFFILIATION = application_df.AFFILIATION.replace(affils,"Other")

In [40]:
# Determine which values to replace if counts are less than ..?
usecase_counts = application_df.USE_CASE.value_counts()
replace_usecase = list(usecase_counts[usecase_counts < 5000].index)

# Replace in dataframe
for usecase in replace_usecase:
    application_df.USE_CASE = application_df.USE_CASE.replace(usecase,"Other")

In [41]:
# Determine which values to replace if counts are less than ..?
org_counts = application_df.ORGANIZATION.value_counts()
replace_org = list(org_counts[org_counts < 10000].index)

# Replace in dataframe
for org in replace_org:
    application_df.ORGANIZATION = application_df.ORGANIZATION.replace(org,"Other")

In [42]:
# Determine which values to replace if counts are less than ..?
inc_counts = application_df.INCOME_AMT.value_counts()
replace_inc = list(inc_counts[inc_counts < 3000].index)

# Replace in dataframe
for inc in replace_inc:
    application_df.INCOME_AMT = application_df.INCOME_AMT.replace(inc,"Other")

In [43]:
# Generate our categorical variable lists
application_cat = application_df.dtypes[application_df.dtypes == 'object'].index.tolist()

In [44]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(application_cat)

In [45]:
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(columns=application_cat)

In [46]:
# Split our preprocessed data into our features and target arrays
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL', 'STATUS']).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [47]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [48]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80
# hidden_nodes_layer3 = 40
# hidden_nodes_layer4 = 20

nn6 = tf.keras.models.Sequential()

# First hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='selu'))

# Second hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='selu'))

# # Third hidden layer
# nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='selu'))

# # Fourth hidden layer
# nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation='selu'))

# Output layer
nn6.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn6.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 160)               5440      
                                                                 
 dense_4 (Dense)             (None, 80)                12880     
                                                                 
 dense_5 (Dense)             (None, 1)                 81        
                                                                 
Total params: 18,401
Trainable params: 18,401
Non-trainable params: 0
_________________________________________________________________


In [49]:
# Compile the model
nn6.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [50]:
os.makedirs('checkpoints/nn6/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [51]:
# Train the model
fit_model = nn6.fit(X_train_scaled, y_train, epochs=50, callbacks=[cp_callback])

Epoch 1/50
  7/804 [..............................] - ETA: 6s - loss: 0.6625 - accuracy: 0.6607  

2022-01-01 18:43:05.993314: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 00045: saving mode

In [52]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn6.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 18:48:55.452509: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 1s - loss: 0.5583 - accuracy: 0.7237 - 1s/epoch - 5ms/step
Loss: 0.5582652688026428, Accuracy: 0.7237317562103271


In [53]:
# Import and read the charity_data.csv.
application_df = pd.read_csv("resources/charity_data.csv")

In [54]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns=['EIN', 'NAME'])

In [55]:
# Generate our categorical variable lists
application_cat = application_df.dtypes[application_df.dtypes == 'object'].index.tolist()

In [56]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(application_cat)

In [57]:
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(columns=application_cat)

In [58]:
# Split our preprocessed data into our features and target arrays
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL']).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [59]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [65]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
n_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 120
hidden_nodes_layer3 = 80
hidden_nodes_layer4 = 40

nn7 = tf.keras.models.Sequential()

# First hidden layer
nn7.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=n_features, activation='selu'))

# Second hidden layer
nn7.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='selu'))

# Third hidden layer
nn7.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='selu'))

# Fourth hidden layer
nn7.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation='selu'))

# Output layer
nn7.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn7.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 160)               18720     
                                                                 
 dense_10 (Dense)            (None, 120)               19320     
                                                                 
 dense_11 (Dense)            (None, 80)                9680      
                                                                 
 dense_12 (Dense)            (None, 40)                3240      
                                                                 
 dense_13 (Dense)            (None, 1)                 41        
                                                                 
Total params: 51,001
Trainable params: 51,001
Non-trainable params: 0
_________________________________________________________________


In [66]:
# Compile the model
nn7.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [67]:
os.makedirs('checkpoints/nn7/', exist_ok=True)

cp_callback = ModelCheckpoint(
    filepath='checkpoints/weights.{epoch:02d}.hdf5',
    verbose=1,
    save_weights_only=True,
    period=5
)



In [68]:
# Train the model
fit_model = nn7.fit(X_train_scaled, y_train, epochs=200, callbacks=[cp_callback])

Epoch 1/200
  1/804 [..............................] - ETA: 4:10 - loss: 0.6892 - accuracy: 0.5938

2022-01-01 18:59:09.647608: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 00005: saving model to checkpoints/weights.05.hdf5
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 00010: saving model to checkpoints/weights.10.hdf5
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 00015: saving model to checkpoints/weights.15.hdf5
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 00030: saving model to checkpoints/weights.30.hdf5
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 00035: saving model to checkpoints/weights.35.hdf5
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 00040: saving model to checkpoints/weights.40.hdf5
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 

In [69]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn7.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2022-01-01 19:32:57.273867: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


268/268 - 2s - loss: 0.5815 - accuracy: 0.7273 - 2s/epoch - 6ms/step
Loss: 0.5815345644950867, Accuracy: 0.7273468971252441


In [70]:
nn7.save('AlphabetSoupCharity_Optimization.h5')