# EMAIL SPAM DETECTION WITH MACHINE LEARNING

### We’ve all been the recipient of spam emails before. Spam mail, or junk mail, is a type of email that is sent to a massive number of users at one time, frequently containing cryptic messages, scams, or most dangerously, phishing content.



In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.naive_bayes import BernoulliNB,MultinomialNB

from sklearn.model_selection import GridSearchCV,KFold
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score,classification_report,confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from gensim.models import Word2Vec
import time

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv(r"C:\Users\ADMIN\Downloads\oasis\codes\spam.csv",encoding='latin-1')
df = df[["v1","v2"]]
df.head()

Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


#### Since there are a lot of null values in the remaining 2 columns ,so we will not take them in the dataframe. 

In [3]:
df.shape

(5572, 2)

#### In total there are 5572 rows and 2 columns in the dataframe 

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5572 entries, 0 to 5571
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   v1      5572 non-null   object
 1   v2      5572 non-null   object
dtypes: object(2)
memory usage: 87.2+ KB


In [5]:
df[df.duplicated()==True]

Unnamed: 0,v1,v2
102,ham,As per your request 'Melle Melle (Oru Minnamin...
153,ham,As per your request 'Melle Melle (Oru Minnamin...
206,ham,"As I entered my cabin my PA said, '' Happy B'd..."
222,ham,"Sorry, I'll call later"
325,ham,No calls..messages..missed calls
...,...,...
5524,spam,You are awarded a SiPix Digital Camera! call 0...
5535,ham,"I know you are thinkin malaria. But relax, chi..."
5539,ham,Just sleeping..and surfing
5553,ham,Hahaha..use your brain dear


#### In total there is 403 duplicate rows present in the dataframe, so we can drop these duplicted rows.

In [6]:
df.drop_duplicates(keep="first",inplace=True)

In [7]:
df.duplicated().sum()

0

In [8]:
df = df.reset_index(drop=True)

In [9]:
df.shape

(5169, 2)

In [10]:
def clean_str(string):
    try:
        string = re.sub(r'^https?:\/\/<>.*[\r\n]*', '', string, flags=re.MULTILINE)
        string = re.sub(r"[^A-Za-z]", " ", string)         
        words = string.strip().lower().split()    
        words = [w for w in words if len(w)>=1]
        return " ".join(words)
    except:
        return ""

In [11]:
df["v3"] = df['v2'].apply(clean_str)

In [12]:
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [13]:
stopword = stopwords.words("english")

In [14]:
df["v3"] = df["v3"].apply(lambda x: " ".join([i  for i in x.split()   if i not in stopword]))

In [15]:
df.head()

Unnamed: 0,v1,v2,v3
0,ham,"Go until jurong point, crazy.. Available only ...",go jurong point crazy available bugis n great ...
1,ham,Ok lar... Joking wif u oni...,ok lar joking wif u oni
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,free entry wkly comp win fa cup final tkts st ...
3,ham,U dun say so early hor... U c already then say...,u dun say early hor u c already say
4,ham,"Nah I don't think he goes to usf, he lives aro...",nah think goes usf lives around though


In [16]:
df.tail()

Unnamed: 0,v1,v2,v3
5164,spam,This is the 2nd time we have tried 2 contact u...,nd time tried contact u u pound prize claim ea...
5165,ham,Will Ì_ b going to esplanade fr home?,b going esplanade fr home
5166,ham,"Pity, * was in mood for that. So...any other s...",pity mood suggestions
5167,ham,The guy did some bitching but I acted like i'd...,guy bitching acted like interested buying some...
5168,ham,Rofl. Its true to its name,rofl true name


In [17]:
x = df["v3"]
y = df["v1"]

### Creating Userdefined Function

In [18]:
def model_cal(model_df,model_name,model,x,y):
    xtrain,xtest,ytrain,ytest = train_test_split(x,y,random_state=10,test_size=0.30)
    xtrain,xtest,ytrain,ytest
    model.fit(xtrain,ytrain)
    ypred_train = model.predict(xtrain)
    ypred_test = model.predict(xtest)
    train_acc = accuracy_score(ytrain,ypred_train)
    train_f1 = f1_score(ytrain,ypred_train,average="weighted")
    train_rec = recall_score(ytrain,ypred_train,average="weighted")
    train_pres = precision_score(ytrain,ypred_train,average="weighted")
    test_acc = accuracy_score(ytest,ypred_test)
    test_f1 = f1_score(ytest,ypred_test,average="weighted")
    test_rec = recall_score(ytest,ypred_test,average="weighted")
    test_pres = precision_score(ytest,ypred_test,average="weighted")
    model_df.loc[model_name,:]=[train_acc,train_f1,train_rec,train_pres,test_acc,test_f1,test_rec,test_pres]
    print(classification_report(ytrain,ypred_train))
    print(classification_report(ytest,ypred_test))
    return model_df

###  CountVectorizer Method

In [19]:
col = pd.MultiIndex.from_product([["Train","Test"],["Accuracy","F1 Score","Recall","Precision"]])
model_score_cv = pd.DataFrame(columns=col)
model_score_cv.index.name = "Model Name"
model_score_cv

Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2


In [20]:
cv=CountVectorizer(ngram_range=(1, 2))

In [21]:
x_vec = cv.fit_transform(x)

### Labeling target

In [22]:
label = {"ham":0,"spam":1}
y = y.replace(label)

#### Logistic Regression

In [23]:
lr = LogisticRegression(random_state=10)
model_cal(model_score_cv,"Logistic Regression",lr,x_vec,y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3168
           1       1.00      0.98      0.99       450

    accuracy                           1.00      3618
   macro avg       1.00      0.99      1.00      3618
weighted avg       1.00      1.00      1.00      3618

              precision    recall  f1-score   support

           0       0.97      1.00      0.98      1348
           1       0.99      0.78      0.87       203

    accuracy                           0.97      1551
   macro avg       0.98      0.89      0.93      1551
weighted avg       0.97      0.97      0.97      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.998065,0.998059,0.998065,0.998069,0.969697,0.96814,0.969697,0.970247


#### BernoulliNB

In [24]:
bnb = BernoulliNB()
model_cal(model_score_cv,"BernoulliNB",bnb,x_vec,y)

              precision    recall  f1-score   support

           0       0.93      1.00      0.96      3168
           1       0.99      0.46      0.63       450

    accuracy                           0.93      3618
   macro avg       0.96      0.73      0.79      3618
weighted avg       0.94      0.93      0.92      3618

              precision    recall  f1-score   support

           0       0.90      0.99      0.95      1348
           1       0.89      0.27      0.42       203

    accuracy                           0.90      1551
   macro avg       0.89      0.63      0.68      1551
weighted avg       0.90      0.90      0.88      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.998065,0.998059,0.998065,0.998069,0.969697,0.96814,0.969697,0.970247
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837


#### MultinomialNB

In [25]:
mnb = MultinomialNB()
model_cal(model_score_cv,"MultinomialNB",mnb,x_vec,y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3168
           1       0.98      0.99      0.99       450

    accuracy                           1.00      3618
   macro avg       0.99      0.99      0.99      3618
weighted avg       1.00      1.00      1.00      3618

              precision    recall  f1-score   support

           0       0.99      0.98      0.98      1348
           1       0.86      0.95      0.90       203

    accuracy                           0.97      1551
   macro avg       0.93      0.96      0.94      1551
weighted avg       0.98      0.97      0.97      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.998065,0.998059,0.998065,0.998069,0.969697,0.96814,0.969697,0.970247
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.996407,0.996409,0.996407,0.996411,0.973565,0.974112,0.973565,0.975337


#### Decision Tree

In [26]:
dt = DecisionTreeClassifier(random_state=10)
model_cal(model_score_cv,"Decision Tree",dt,x_vec,y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3168
           1       1.00      1.00      1.00       450

    accuracy                           1.00      3618
   macro avg       1.00      1.00      1.00      3618
weighted avg       1.00      1.00      1.00      3618

              precision    recall  f1-score   support

           0       0.96      0.99      0.98      1348
           1       0.93      0.76      0.84       203

    accuracy                           0.96      1551
   macro avg       0.95      0.88      0.91      1551
weighted avg       0.96      0.96      0.96      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.998065,0.998059,0.998065,0.998069,0.969697,0.96814,0.969697,0.970247
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.996407,0.996409,0.996407,0.996411,0.973565,0.974112,0.973565,0.975337
Decision Tree,1.0,1.0,1.0,1.0,0.961315,0.959587,0.961315,0.960548


#### Random Forest

In [27]:
rf = RandomForestClassifier(random_state=10)
model_cal(model_score_cv,"Random Forest",rf,x_vec,y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3168
           1       1.00      1.00      1.00       450

    accuracy                           1.00      3618
   macro avg       1.00      1.00      1.00      3618
weighted avg       1.00      1.00      1.00      3618

              precision    recall  f1-score   support

           0       0.95      1.00      0.98      1348
           1       1.00      0.66      0.80       203

    accuracy                           0.96      1551
   macro avg       0.98      0.83      0.89      1551
weighted avg       0.96      0.96      0.95      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.998065,0.998059,0.998065,0.998069,0.969697,0.96814,0.969697,0.970247
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.996407,0.996409,0.996407,0.996411,0.973565,0.974112,0.973565,0.975337
Decision Tree,1.0,1.0,1.0,1.0,0.961315,0.959587,0.961315,0.960548
Random Forest,1.0,1.0,1.0,1.0,0.955513,0.951513,0.955513,0.957679


#### AdaBoost

In [28]:
adb = AdaBoostClassifier(random_state=10)
model_cal(model_score_cv,"AdaBoost",adb,x_vec,y)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      3168
           1       0.98      0.89      0.93       450

    accuracy                           0.98      3618
   macro avg       0.98      0.94      0.96      3618
weighted avg       0.98      0.98      0.98      3618

              precision    recall  f1-score   support

           0       0.96      0.99      0.98      1348
           1       0.92      0.76      0.83       203

    accuracy                           0.96      1551
   macro avg       0.94      0.87      0.90      1551
weighted avg       0.96      0.96      0.96      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.998065,0.998059,0.998065,0.998069,0.969697,0.96814,0.969697,0.970247
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.996407,0.996409,0.996407,0.996411,0.973565,0.974112,0.973565,0.975337
Decision Tree,1.0,1.0,1.0,1.0,0.961315,0.959587,0.961315,0.960548
Random Forest,1.0,1.0,1.0,1.0,0.955513,0.951513,0.955513,0.957679
AdaBoost,0.983416,0.983065,0.983416,0.983345,0.960026,0.958344,0.960026,0.959041


### TfidfVectorizer Method

In [29]:
tdidf = TfidfVectorizer(binary=True,ngram_range=(1,2))
x_tdidf = tdidf.fit_transform(x)

In [30]:
col = pd.MultiIndex.from_product([["Train","Test"],["Accuracy","F1 Score","Recall","Precision"]])
model_score_tdidf = pd.DataFrame(columns=col)
model_score_tdidf.index.name = "Model Name"
model_score_tdidf

Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2


#### Logistic Regression

In [31]:
lr = LogisticRegression(random_state=10)
model_cal(model_score_tdidf,"Logistic Regression",lr,x_tdidf,y)

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3168
           1       0.98      0.56      0.72       450

    accuracy                           0.94      3618
   macro avg       0.96      0.78      0.84      3618
weighted avg       0.95      0.94      0.94      3618

              precision    recall  f1-score   support

           0       0.91      1.00      0.95      1348
           1       0.99      0.33      0.49       203

    accuracy                           0.91      1551
   macro avg       0.95      0.66      0.72      1551
weighted avg       0.92      0.91      0.89      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.944444,0.937683,0.944444,0.946741,0.91167,0.891775,0.91167,0.918372


#### BernoulliNB

In [32]:
bnb = BernoulliNB()
model_cal(model_score_tdidf,"BernoulliNB",bnb,x_tdidf,y)

              precision    recall  f1-score   support

           0       0.93      1.00      0.96      3168
           1       0.99      0.46      0.63       450

    accuracy                           0.93      3618
   macro avg       0.96      0.73      0.79      3618
weighted avg       0.94      0.93      0.92      3618

              precision    recall  f1-score   support

           0       0.90      0.99      0.95      1348
           1       0.89      0.27      0.42       203

    accuracy                           0.90      1551
   macro avg       0.89      0.63      0.68      1551
weighted avg       0.90      0.90      0.88      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.944444,0.937683,0.944444,0.946741,0.91167,0.891775,0.91167,0.918372
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837


#### MultinomialNB

In [33]:
mnb = MultinomialNB()
model_cal(model_score_tdidf,"MultinomialNB",mnb,x_tdidf,y)

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      3168
           1       1.00      0.74      0.85       450

    accuracy                           0.97      3618
   macro avg       0.98      0.87      0.92      3618
weighted avg       0.97      0.97      0.97      3618

              precision    recall  f1-score   support

           0       0.92      1.00      0.96      1348
           1       1.00      0.41      0.59       203

    accuracy                           0.92      1551
   macro avg       0.96      0.71      0.77      1551
weighted avg       0.93      0.92      0.91      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.944444,0.937683,0.944444,0.946741,0.91167,0.891775,0.91167,0.918372
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.967938,0.965854,0.967938,0.969071,0.923275,0.908991,0.923275,0.929499


#### Decision Tree

In [34]:
dt = DecisionTreeClassifier(random_state=10)
model_cal(model_score_tdidf,"Decision Tree",dt,x_tdidf,y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3168
           1       1.00      1.00      1.00       450

    accuracy                           1.00      3618
   macro avg       1.00      1.00      1.00      3618
weighted avg       1.00      1.00      1.00      3618

              precision    recall  f1-score   support

           0       0.97      0.98      0.97      1348
           1       0.83      0.77      0.80       203

    accuracy                           0.95      1551
   macro avg       0.90      0.87      0.89      1551
weighted avg       0.95      0.95      0.95      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.944444,0.937683,0.944444,0.946741,0.91167,0.891775,0.91167,0.918372
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.967938,0.965854,0.967938,0.969071,0.923275,0.908991,0.923275,0.929499
Decision Tree,1.0,1.0,1.0,1.0,0.94971,0.948942,0.94971,0.948486


#### Random Forest

In [35]:
rf = RandomForestClassifier(random_state=10)
model_cal(model_score_tdidf,"Random Forest",rf,x_tdidf,y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3168
           1       1.00      1.00      1.00       450

    accuracy                           1.00      3618
   macro avg       1.00      1.00      1.00      3618
weighted avg       1.00      1.00      1.00      3618

              precision    recall  f1-score   support

           0       0.95      1.00      0.97      1348
           1       0.99      0.66      0.79       203

    accuracy                           0.95      1551
   macro avg       0.97      0.83      0.88      1551
weighted avg       0.96      0.95      0.95      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.944444,0.937683,0.944444,0.946741,0.91167,0.891775,0.91167,0.918372
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.967938,0.965854,0.967938,0.969071,0.923275,0.908991,0.923275,0.929499
Decision Tree,1.0,1.0,1.0,1.0,0.94971,0.948942,0.94971,0.948486
Random Forest,1.0,1.0,1.0,1.0,0.954223,0.950108,0.954223,0.956089


#### AdaBoost

In [36]:
adb = AdaBoostClassifier(random_state=10)
model_cal(model_score_tdidf,"AdaBoost",adb,x_tdidf,y)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      3168
           1       0.98      0.89      0.93       450

    accuracy                           0.98      3618
   macro avg       0.98      0.94      0.96      3618
weighted avg       0.98      0.98      0.98      3618

              precision    recall  f1-score   support

           0       0.97      0.99      0.98      1348
           1       0.93      0.77      0.84       203

    accuracy                           0.96      1551
   macro avg       0.95      0.88      0.91      1551
weighted avg       0.96      0.96      0.96      1551



Unnamed: 0_level_0,Train,Train,Train,Train,Test,Test,Test,Test
Unnamed: 0_level_1,Accuracy,F1 Score,Recall,Precision,Accuracy,F1 Score,Recall,Precision
Model Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Logistic Regression,0.944444,0.937683,0.944444,0.946741,0.91167,0.891775,0.91167,0.918372
BernoulliNB,0.932007,0.920754,0.932007,0.93615,0.900064,0.875961,0.900064,0.898837
MultinomialNB,0.967938,0.965854,0.967938,0.969071,0.923275,0.908991,0.923275,0.929499
Decision Tree,1.0,1.0,1.0,1.0,0.94971,0.948942,0.94971,0.948486
Random Forest,1.0,1.0,1.0,1.0,0.954223,0.950108,0.954223,0.956089
AdaBoost,0.983693,0.983373,0.983693,0.983595,0.962605,0.961032,0.962605,0.961864
