## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd 
app_df = pd.read_csv("charity_data.csv")
app_df.head()

In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
df = app_df.drop(columns=['EIN','NAME'])

In [None]:
df

In [None]:
# Determine the number of unique values in each column.
df.nunique()

In [None]:
# Look at APPLICATION_TYPE value counts for binning
application_types = df['APPLICATION_TYPE'].value_counts()
application_types

In [None]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
application_types_to_replace = [index for index,row in application_types.items() if row < 500]

# Replace in dataframe
for app in application_types_to_replace:
    df['APPLICATION_TYPE'] = df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
df['APPLICATION_TYPE'].value_counts()

In [None]:
# Look at CLASSIFICATION value counts for binning
#  YOUR CODE GOES HEREclassification_types = df['CLASSIFICATION'].value_counts()
classification_types = df['CLASSIFICATION'].value_counts()
classification_types

In [None]:
# You may find it helpful to look at CLASSIFICATION value counts >1
[[index,row] for index,row in classification_types.items() if row > 1]

In [None]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
classifications_to_replace = [index for index,row in classification_types.items() if row < classification_types.mean()]

# Replace in dataframe
for cls in classifications_to_replace:
    app_df['CLASSIFICATION'] = app_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
app_df['CLASSIFICATION'].value_counts()

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`
dummies = pd.get_dummies(df[['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE',
       'ORGANIZATION', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS']])
processed_df = pd.concat([df[['STATUS', 'ASK_AMT']], dummies], axis=1)

In [None]:
# Split our preprocessed data into our features and target arrays
X = processed_df
y = df['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=.2,
                                                    random_state=42)

print(f"X_train Shape: {X_train.shape}, X_test Shape: {X_test.shape}")

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
model = tf.keras.models.load_model('AlphabetSoupCharity.h5')

In [None]:
# Compile the model
model.compile(loss="BinaryCrossentropy", optimizer="adam", metrics=['accuracy', 'mse'])

In [None]:
# Train the model
fit_model = model.fit(X_train_scaled, y_train, epochs=100, validation_data=(X_test_scaled, y_test))

In [None]:
# Evaluate the model and visualize performance on the test set
evaluate_metrics = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {evaluate_metrics[0]}, Test Accuracy: {evaluate_metrics[1]}")


In [None]:
# Save the optimized model
model.save('optimized_model2.h5')