## Dataset 

https://www.kaggle.com/uciml/sms-spam-collection-dataset



In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

In [4]:
df1 = pd.read_csv("../input/sms-spam-collection-dataset/spam.csv", encoding="latin-1")

In [5]:
df1

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,
...,...,...,...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...,,,
5568,ham,Will Ì_ b going to esplanade fr home?,,,
5569,ham,"Pity, * was in mood for that. So...any other s...",,,
5570,ham,The guy did some bitching but I acted like i'd...,,,


In [6]:
df1.rename(columns={'v1':'class',"v2":"message"},inplace=True)
df1.head()

Unnamed: 0,class,message,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [7]:
nedict = {'ham':0, 
        'spam':1}

# apply using map
df1['class'] = df1['class'].map(nedict)

In [8]:
df1

Unnamed: 0,class,message,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,0,"Go until jurong point, crazy.. Available only ...",,,
1,0,Ok lar... Joking wif u oni...,,,
2,1,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,0,U dun say so early hor... U c already then say...,,,
4,0,"Nah I don't think he goes to usf, he lives aro...",,,
...,...,...,...,...,...
5567,1,This is the 2nd time we have tried 2 contact u...,,,
5568,0,Will Ì_ b going to esplanade fr home?,,,
5569,0,"Pity, * was in mood for that. So...any other s...",,,
5570,0,The guy did some bitching but I acted like i'd...,,,


In [9]:
df1.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)

In [10]:
df1

Unnamed: 0,class,message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,1,This is the 2nd time we have tried 2 contact u...
5568,0,Will Ì_ b going to esplanade fr home?
5569,0,"Pity, * was in mood for that. So...any other s..."
5570,0,The guy did some bitching but I acted like i'd...


In [11]:
X = df1['message']
y = df1['class']

In [12]:
cv = CountVectorizer()
X = cv.fit_transform(X) # Fit the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


In [13]:
from sklearn import svm
clf1 = svm.SVC()
clf1.fit(X_train,y_train)
clf1.score(X_test,y_test)
y_pred = clf1.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      1.00      0.99      1587
           1       1.00      0.83      0.91       252

    accuracy                           0.98      1839
   macro avg       0.98      0.92      0.95      1839
weighted avg       0.98      0.98      0.98      1839



In [14]:
from sklearn.ensemble import RandomForestClassifier

clf2 = RandomForestClassifier()
clf2.fit(X_train,y_train)
clf2.score(X_test,y_test)
y_pred = clf2.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.97      1.00      0.99      1587
           1       1.00      0.82      0.90       252

    accuracy                           0.98      1839
   macro avg       0.99      0.91      0.94      1839
weighted avg       0.98      0.98      0.97      1839



In [15]:
#Naive Bayes Classifier
clf3 = MultinomialNB()
clf3.fit(X_train,y_train)
clf3.score(X_test,y_test)
y_pred = clf3.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1587
           1       0.93      0.92      0.92       252

    accuracy                           0.98      1839
   macro avg       0.96      0.95      0.96      1839
weighted avg       0.98      0.98      0.98      1839



In [16]:
from sklearn.neighbors import KNeighborsClassifier

clf4= KNeighborsClassifier(n_neighbors=3)
clf4.fit(X_train,y_train)
clf4.score(X_test,y_test)
y_pred = clf4.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96      1587
           1       1.00      0.47      0.64       252

    accuracy                           0.93      1839
   macro avg       0.96      0.73      0.80      1839
weighted avg       0.93      0.93      0.92      1839



In [17]:
from sklearn import tree
clf5 = tree.DecisionTreeClassifier()
clf5.fit(X_train,y_train)
clf5.score(X_test,y_test)
y_pred = clf5.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98      1587
           1       0.85      0.86      0.86       252

    accuracy                           0.96      1839
   macro avg       0.91      0.92      0.92      1839
weighted avg       0.96      0.96      0.96      1839



In [18]:
from sklearn.neural_network import MLPClassifier
clf5 = MLPClassifier()
clf5.fit(X_train,y_train)
clf5.score(X_test,y_test)
y_pred = clf5.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      1587
           1       1.00      0.86      0.92       252

    accuracy                           0.98      1839
   macro avg       0.99      0.93      0.96      1839
weighted avg       0.98      0.98      0.98      1839



In [19]:
from sklearn.ensemble import AdaBoostClassifier
clf6 = AdaBoostClassifier()
clf6.fit(X_train,y_train)
clf6.score(X_test,y_test)
y_pred = clf6.predict(X_test)
print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

           0       0.97      0.99      0.98      1587
           1       0.93      0.83      0.88       252

    accuracy                           0.97      1839
   macro avg       0.95      0.91      0.93      1839
weighted avg       0.97      0.97      0.97      1839

