# Text Classification  
`pip install tensorflow`

In [1]:
import pandas as pd
import numpy as np
df=pd.read_csv('./dataset/spam.csv')
df['target'] = df['label'].map( {'spam':1, 'ham':0 })
print(df.head())

  label                                                sms  target
0   ham  Go until jurong point, crazy.. Available only ...       0
1   ham                      Ok lar... Joking wif u oni...       0
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...       1
3   ham  U dun say so early hor... U c already then say...       0
4   ham  Nah I don't think he goes to usf, he lives aro...       0


In [2]:
text_train = np.asarray(df['sms'])
y_train = np.asarray(df['target'])
print(text_train[:3])
print(y_train[:3])


['Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...'
 'Ok lar... Joking wif u oni...'
 "Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's"]
[0 0 1]


In [3]:
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer(stop_words="english", min_df=5).fit(text_train)
X_train = vect.transform(text_train)
print(X_train.shape)
print(y_train.shape)
print(y_train[:5])

(5572, 1602)
(5572,)
[0 0 1 0 0]


In [4]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Dense(512, activation='relu', input_shape=(1602,)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(2, activation='softmax')
])

model.compile(optimizer=keras.optimizers.Adam(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=['accuracy'])
model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               820736    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 258       
Total params: 985,218
Trainable params: 985,218
Non-trainable params: 0
_________________________________________________________________


In [5]:
history = model.fit(X_train, y_train, batch_size=64, epochs=10, validation_split=0.3, verbose=2)

Train on 3900 samples, validate on 1672 samples
Epoch 1/10
3900/3900 - 1s - loss: 0.2241 - acc: 0.9233 - val_loss: 0.0541 - val_acc: 0.9827
Epoch 2/10
3900/3900 - 1s - loss: 0.0290 - acc: 0.9923 - val_loss: 0.0555 - val_acc: 0.9833
Epoch 3/10
3900/3900 - 0s - loss: 0.0060 - acc: 0.9987 - val_loss: 0.0744 - val_acc: 0.9839
Epoch 4/10
3900/3900 - 0s - loss: 0.0036 - acc: 0.9995 - val_loss: 0.0729 - val_acc: 0.9839
Epoch 5/10
3900/3900 - 0s - loss: 0.0034 - acc: 0.9995 - val_loss: 0.0771 - val_acc: 0.9839
Epoch 6/10
3900/3900 - 0s - loss: 0.0033 - acc: 0.9992 - val_loss: 0.0796 - val_acc: 0.9839
Epoch 7/10
3900/3900 - 1s - loss: 0.0032 - acc: 0.9992 - val_loss: 0.0908 - val_acc: 0.9844
Epoch 8/10
3900/3900 - 1s - loss: 0.0030 - acc: 0.9995 - val_loss: 0.0836 - val_acc: 0.9839
Epoch 9/10
3900/3900 - 0s - loss: 0.0031 - acc: 0.9995 - val_loss: 0.1049 - val_acc: 0.9839
Epoch 10/10
3900/3900 - 0s - loss: 0.0028 - acc: 0.9995 - val_loss: 0.0973 - val_acc: 0.9844


In [6]:
sms_test = ['Hi Paul, would you come around tonight']

test_str  = vect.transform(sms_test)

model.predict_classes(test_str)

array([0], dtype=int64)

In [7]:
sms_test = ['Free SMS service for anyone']
test_str  = vect.transform(sms_test)

model.predict_classes(test_str)

array([1], dtype=int64)