## REPORT

In [3]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from ydata_profiling import ProfileReport
from sklearn.model_selection import train_test_split
from sklearn.tree import ExtraTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import seaborn as sns
import matplotlib.pyplot as plt
from io import BytesIO
import base64
from IPython.core.display import HTML

# Sample dataframe
df=pd.read_csv('CHURN.csv')

del df['RowNumber']
del df['CustomerId']
del df['Surname']
del df['Geography']
df.head()

df.columns

df = df.dropna()

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

var = ['Gender']
  
for i in var:
    df[i] = le.fit_transform(df[i]).astype(int)

# Generate basic data profiling report
prof = ProfileReport(df)
prof.to_file(output_file='Basic_report.html')

# Split the data
X = df.drop("Exited", axis=1)
y = df["Exited"]
# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split the resampled data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train Random Forest Classifier
clf = ExtraTreeClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Generate Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_title('Confusion Matrix')

# Save confusion matrix plot to HTML-compatible format
buf = BytesIO()
plt.savefig(buf, format='png')
plt.close(fig)
buf.seek(0)
img_str = base64.b64encode(buf.read()).decode('utf-8')
img_tag = f'<img src="data:image/png;base64,{img_str}" />'

# Generate Classification Report
class_report = classification_report(y_test, y_pred, output_dict=True)
class_report_df = pd.DataFrame(class_report).transpose()

# Convert classification report dataframe to HTML
class_report_html = class_report_df.to_html()

# Create a separate report HTML for metrics
metrics_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Metrics Report</title>
</head>
<body>
    <h1>Metrics Report</h1>
    <h2>Confusion Matrix</h2>
    {img_tag}
    <h2>Classification Report</h2>
    {class_report_html}
    <h2>Accuracy</h2>
    <p>{accuracy}</p>
</body>
</html>
"""

# Save the metrics report to a new HTML file
with open('Metrics_report.html', 'w') as file:
    file.write(metrics_html)

print("Metrics report saved as Metrics_report.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

Metrics report saved as Metrics_report.html
