In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("Resources/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
app_df_dropped = application_df.drop(columns = ["EIN", "NAME"])
app_df_dropped.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [3]:
# Look at APPLICATION_TYPE value counts for binning
app_type_counts = app_df_dropped["APPLICATION_TYPE"].value_counts()

In [4]:
# Choose a cutoff value (count < 100) and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
# Saving the top 9 index values
application_types_to_replace = app_type_counts.index[9:].tolist()

In [5]:
# Replace in dataframe
for app in application_types_to_replace:
    app_df_dropped['APPLICATION_TYPE'] = app_df_dropped['APPLICATION_TYPE'].replace(app,"Other")

In [6]:
# Look at CLASSIFICATION value counts for binning
class_counts = app_df_dropped["CLASSIFICATION"].value_counts()

In [7]:
# Choose a cutoff value (count < 500) and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
# Saving the top 6 values in this case
classifications_to_replace = class_counts.index[6:].tolist()

In [8]:
# Replace in dataframe
for cls in classifications_to_replace:
    app_df_dropped['CLASSIFICATION'] = app_df_dropped['CLASSIFICATION'].replace(cls,"Other")

In [9]:
# Convert categorical data to numeric with `pd.get_dummies`
encoded_df = pd.get_dummies(app_df_dropped, columns = ["APPLICATION_TYPE", "AFFILIATION", "CLASSIFICATION",\
                                                                      "USE_CASE", "ORGANIZATION", "INCOME_AMT",\
                                                                     "SPECIAL_CONSIDERATIONS"])

In [10]:
# Split our preprocessed data into our features and target arrays
X = encoded_df.drop(columns = "IS_SUCCESSFUL").values
y = encoded_df["IS_SUCCESSFUL"].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

In [11]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
hl1_units = 16
hl2_units = 16
hl3_units = 16

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = hl1_units, activation = "relu", input_dim = 45))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = hl2_units, activation = "relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units = hl3_units, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16)                736       
                                                                 
 dense_1 (Dense)             (None, 16)                272       
                                                                 
 dense_2 (Dense)             (None, 16)                272       
                                                                 
 dense_3 (Dense)             (None, 1)                 17        
                                                                 
Total params: 1,297
Trainable params: 1,297
Non-trainable params: 0
_________________________________________________________________


In [13]:
nn.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

In [14]:
fit_model = nn.fit(X_train_scaled, y_train, epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [15]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.5504 - accuracy: 0.7318 - 268ms/epoch - 1ms/step
Loss: 0.5503658056259155, Accuracy: 0.7317784428596497


In [16]:
nn.save("AlphabetSoupCharityOpt2.h5")