In [28]:
## Spam Classification Using SVM
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC

In [29]:
## Loading the data
df=pd.read_csv('spam.csv')
df.head()

Unnamed: 0,Label,EmailText
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [30]:
## Value count of Labels
df.Label.value_counts()

ham     4825
spam     747
Name: Label, dtype: int64

In [31]:
## Encoding Spam to 1 and ham to 0
df['Label']=df['Label'].map({'spam':1,'ham':0})

In [32]:

df.Label.value_counts()

0    4825
1     747
Name: Label, dtype: int64

In [33]:
X=df['EmailText']
Y=df['Label']

In [34]:
## Spliting data into train and test
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,stratify=Y)

In [35]:
## Taking word into matrix
vec=CountVectorizer(stop_words='english')
x_train=vec.fit_transform(x_train)

In [36]:
x_test=vec.transform(x_test)

In [37]:
x_test

<1393x7059 sparse matrix of type '<class 'numpy.int64'>'
	with 9557 stored elements in Compressed Sparse Row format>

In [38]:
## SVM Classifier
sv=SVC()
sv.fit(x_train,y_train)

SVC()

In [39]:
print('Accuracy :',sv.score(x_test,y_test))

Accuracy : 0.9820531227566404


In [50]:
## GridSearchCV using SVC classifier
from sklearn.model_selection import GridSearchCV

tuned_parameters = {'kernel': ['rbf','linear'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]}


grid=GridSearchCV(SVC(),tuned_parameters)

In [45]:
grid.fit(x_train,y_train)

print(grid.best_params_)
#Step5: Test Accuracy
print(grid.score(x_test,y_test))

{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.9856424982053122


In [49]:
## Review prediction
print(grid.predict(vec.transform(['Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...'])))

[0]
