In [None]:

# Exploratory Data Analysis (EDA)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
ecommerce_df = pd.read_csv('E Commerce Dataset.csv')
bank_churn_df = pd.read_csv('Bank_churn.csv')
orange_telecom_df = pd.read_csv('orange_telecom.csv')

# EDA on E-Commerce Dataset
def perform_eda(df, target_col):
    plt.figure(figsize=(15, 6))

    # Churn distribution
    plt.subplot(1, 3, 1)
    sns.countplot(x=target_col, data=df)
    plt.title(f'{target_col} Distribution')

    # Distribution of tenure
    if 'tenure' in df.columns:
        plt.subplot(1, 3, 2)
        sns.histplot(df['tenure'].dropna(), bins=20, kde=True)
        plt.title('Tenure Distribution')

    # Correlation heatmap
    plt.subplot(1, 3, 3)
    corr = df.corr(numeric_only=True)
    sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
    plt.title('Correlation Heatmap')

    plt.tight_layout()
    plt.show()

perform_eda(ecommerce_df, 'churn')


In [None]:

# Feature Engineering

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

def preprocess_data(df, target_column):
    X = df.drop(columns=[target_column])
    y = df[target_column]

    categorical_columns = X.select_dtypes(include=['object']).columns.tolist()
    numerical_columns = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

    categorical_pipeline = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])
    
    numerical_pipeline = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
    preprocessor = ColumnTransformer(transformers=[
        ('num', numerical_pipeline, numerical_columns),
        ('cat', categorical_pipeline, categorical_columns)
    ])

    X_processed = preprocessor.fit_transform(X)
    return X_processed, y

# Preprocess the datasets
ecommerce_processed, ecommerce_target = preprocess_data(ecommerce_df, 'churn')
bank_churn_processed, bank_churn_target = preprocess_data(bank_churn_df, 'churn')
orange_telecom_processed, orange_telecom_target = preprocess_data(orange_telecom_df, 'churn')


In [None]:

# Model Selection and Training

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

def train_and_evaluate_model(X, y, model_name="Logistic Regression"):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    if model_name == "Logistic Regression":
        model = LogisticRegression(max_iter=1000)
    elif model_name == "Random Forest":
        model = RandomForestClassifier()

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auc_roc = roc_auc_score(y_test, y_pred)

    return model, {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'auc_roc': auc_roc}

# Train models on the E-Commerce dataset
ecommerce_model, ecommerce_results = train_and_evaluate_model(ecommerce_processed, ecommerce_target, "Random Forest")
print("E-Commerce Results:", ecommerce_results)


In [None]:

# Model Evaluation

# Repeat the same training and evaluation for Bank Churn and Orange Telecom datasets
bank_churn_model, bank_churn_results = train_and_evaluate_model(bank_churn_processed, bank_churn_target, "Random Forest")
orange_telecom_model, orange_telecom_results = train_and_evaluate_model(orange_telecom_processed, orange_telecom_target, "Random Forest")

print("Bank Churn Results:", bank_churn_results)
print("Orange Telecom Results:", orange_telecom_results)


In [None]:

# Model Export

import pickle

# Save the trained models
with open('ecommerce_model.pkl', 'wb') as f:
    pickle.dump(ecommerce_model, f)

with open('bank_churn_model.pkl', 'wb') as f:
    pickle.dump(bank_churn_model, f)

with open('orange_telecom_model.pkl', 'wb') as f:
    pickle.dump(orange_telecom_model, f)

print("Models exported successfully!")
