In [11]:
# Importing the Pandas library for data manipulation and analysis
import pandas as pd

# Importing train_test_split from sklearn to split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

# Importing the RandomForestClassifier from sklearn, which is an ensemble learning method for classification tasks
from sklearn.ensemble import RandomForestClassifier

# Importing accuracy_score, f1_score, precision_score, and recall_score from sklearn to evaluate the performance of the model
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Importing OneHotEncoder from sklearn to convert categorical variables into a format that can be provided to ML algorithms
from sklearn.preprocessing import OneHotEncoder

In [12]:
# Load the dataset
df = pd.read_csv('Churn_Modelling.csv')

In [13]:
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [14]:
# Preprocess the data
X = df.drop(['Exited', 'RowNumber', 'CustomerId', 'Surname'], axis=1)
y = df['Exited']

In [15]:
# One-hot encode the categorical variables
encoder = OneHotEncoder()
X_encoded = encoder.fit_transform(X).toarray()

In [16]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [17]:
# Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [18]:
# Make predictions on the test set
y_pred = rf_model.predict(X_test)

In [19]:
# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print('Random Forest Model Performance:')
print(f'Accuracy: {accuracy:.2f}')
print(f'F1-score: {f1:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

Random Forest Model Performance:
Accuracy: 0.84
F1-score: 0.39
Precision: 0.87
Recall: 0.25
