In [9]:
# Import dependencies
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
application_df = pd.read_csv(url)

# Display the first few rows of the dataset
application_df.head()


Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [10]:
# Check for existing columns
print("Columns in dataset:", application_df.columns)

Columns in dataset: Index(['EIN', 'NAME', 'APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION',
       'USE_CASE', 'ORGANIZATION', 'STATUS', 'INCOME_AMT',
       'SPECIAL_CONSIDERATIONS', 'ASK_AMT', 'IS_SUCCESSFUL'],
      dtype='object')


In [11]:
# Drop non-beneficial columns
application_df = application_df.drop(columns=['EIN', 'NAME'])

# Replace rare application types with 'Other'
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
threshold = 500
application_types_to_replace = application_type_counts[application_type_counts < threshold].index.tolist()
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, 'Other')

# Replace rare classifications with 'Other'
classification_counts = application_df['CLASSIFICATION'].value_counts()
cutoff_value = 2000
classifications_to_replace = classification_counts[classification_counts < cutoff_value].index.tolist()
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, 'Other')

# Convert categorical data to dummy variables
application_df_dummies = pd.get_dummies(application_df)

# Define the target variable (y) and features (X)
y = application_df_dummies['IS_SUCCESSFUL']
X = application_df_dummies.drop(columns=['IS_SUCCESSFUL'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert categorical columns to dummy variables
X_train_dummies = pd.get_dummies(X_train)
X_test_dummies = pd.get_dummies(X_test)

# Align the dummy variables between training and testing set
X_train_dummies, X_test_dummies = X_train_dummies.align(X_test_dummies, join='left', axis=1, fill_value=0)

# Initialize the scaler
scaler = StandardScaler()

# Fit and transform the training data, transform the test data
X_train_scaled = scaler.fit_transform(X_train_dummies)
X_test_scaled = scaler.transform(X_test_dummies)


In [12]:
# Define a more complex model with dropout
nn = tf.keras.models.Sequential()

# First hidden layer with dropout
nn.add(tf.keras.layers.Dense(units=128, activation='relu', input_dim=X_train_scaled.shape[1]))
nn.add(tf.keras.layers.Dropout(0.5))

# Second hidden layer with dropout
nn.add(tf.keras.layers.Dense(units=64, activation='relu'))
nn.add(tf.keras.layers.Dropout(0.5))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=32, activation='relu'))

# Output layer
nn.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with early stopping
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = nn.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=2)

# Evaluate the optimized model
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Optimized Model with Dropout Loss: {model_loss}, Optimized Model Accuracy: {model_accuracy}")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
686/686 - 6s - 9ms/step - accuracy: 0.6842 - loss: 0.6191 - val_accuracy: 0.7340 - val_loss: 0.5612
Epoch 2/100
686/686 - 2s - 3ms/step - accuracy: 0.7174 - loss: 0.5841 - val_accuracy: 0.7343 - val_loss: 0.5564
Epoch 3/100
686/686 - 2s - 3ms/step - accuracy: 0.7191 - loss: 0.5789 - val_accuracy: 0.7394 - val_loss: 0.5583
Epoch 4/100
686/686 - 3s - 4ms/step - accuracy: 0.7196 - loss: 0.5731 - val_accuracy: 0.7369 - val_loss: 0.5551
Epoch 5/100
686/686 - 1s - 2ms/step - accuracy: 0.7218 - loss: 0.5701 - val_accuracy: 0.7376 - val_loss: 0.5604
Epoch 6/100
686/686 - 4s - 6ms/step - accuracy: 0.7223 - loss: 0.5687 - val_accuracy: 0.7362 - val_loss: 0.5561
Epoch 7/100
686/686 - 4s - 6ms/step - accuracy: 0.7224 - loss: 0.5680 - val_accuracy: 0.7345 - val_loss: 0.5565
Epoch 8/100
686/686 - 2s - 2ms/step - accuracy: 0.7214 - loss: 0.5683 - val_accuracy: 0.7349 - val_loss: 0.5615
Epoch 9/100
686/686 - 2s - 3ms/step - accuracy: 0.7243 - loss: 0.5639 - val_accuracy: 0.7362 - val_loss:

In [13]:
# Save the optimized model
nn.save('AlphabetSoupCharity_Optimization.h5')

