<a href="https://colab.research.google.com/github/vaish5rise/ML-models-on-Iranian-churn-dataset/blob/main/DT%2CRF%2C_KNN%2CSVC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score


In [None]:
from google.colab import files
uploaded = files.upload()


Saving Iranian churn dataset.csv to Iranian churn dataset (2).csv


In [None]:
#  Read the uploaded Excel file
file_name = list(uploaded.keys())[0]  # Get the uploaded file name
churn_data = pd.read_csv(file_name)

# Display first few rows of the dataset
print(churn_data.head())



   Call  Failure  Complains  Subscription  Length  Charge  Amount  \
0              8          0                    38               0   
1              0          0                    39               0   
2             10          0                    37               0   
3             10          0                    38               0   
4              3          0                    38               0   

   Seconds of Use  Frequency of use  Frequency of SMS  \
0            4370                71                 5   
1             318                 5                 7   
2            2453                60               359   
3            4198                66                 1   
4            2393                58                 2   

   Distinct Called Numbers  Age Group  Tariff Plan  Status  Age  \
0                       17          3            1       1   30   
1                        4          2            1       2   25   
2                       24          3    

In [None]:

# Split the dataset into features (X) and target (y)
X = churn_data.drop(columns=['Churn'])  # All features except 'Churn'
y = churn_data['Churn']  # Target variable 'Churn'


# Split data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
#  Train the models

# Decision Tree (DT)
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

# Random Forest (RF)
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# K-Nearest Neighbors (KNN)
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)

# Support Vector Classifier (SVC)
svc_model = SVC(kernel='linear', random_state=42)
svc_model.fit(X_train, y_train)
svc_pred = svc_model.predict(X_test)



           Model  Accuracy  Precision    Recall  F1 Score
0  Decision Tree  0.930159   0.817308  0.772727  0.794393
1  Random Forest  0.939683   0.846154  0.800000  0.822430
2            KNN  0.836508   0.543210  0.400000  0.460733
3            SVC  0.874603   0.674157  0.545455  0.603015


In [None]:
# Step 4: Evaluation and Metrics

# Function to calculate evaluation metrics
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return accuracy, precision, recall, f1

# Evaluate each model
dt_metrics = evaluate_model(y_test, dt_pred)
rf_metrics = evaluate_model(y_test, rf_pred)
knn_metrics = evaluate_model(y_test, knn_pred)
svc_metrics = evaluate_model(y_test, svc_pred)

# Step 5: Compare evaluation measures in a single table
results = pd.DataFrame({
    'Model': ['Decision Tree', 'Random Forest', 'KNN', 'SVC'],
    'Accuracy': [dt_metrics[0], rf_metrics[0], knn_metrics[0], svc_metrics[0]],
    'Precision': [dt_metrics[1], rf_metrics[1], knn_metrics[1], svc_metrics[1]],
    'Recall': [dt_metrics[2], rf_metrics[2], knn_metrics[2], svc_metrics[2]],
    'F1 Score': [dt_metrics[3], rf_metrics[3], knn_metrics[3], svc_metrics[3]]
})

# Display the results
print(results)

           Model  Accuracy  Precision    Recall  F1 Score
0  Decision Tree  0.930159   0.817308  0.772727  0.794393
1  Random Forest  0.939683   0.846154  0.800000  0.822430
2            KNN  0.836508   0.543210  0.400000  0.460733
3            SVC  0.874603   0.674157  0.545455  0.603015
