In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Model
from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding, GRU, SimpleRNN
from keras.optimizers import RMSprop
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.utils.data_utils import pad_sequences
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
%matplotlib inline

from sklearn.metrics import classification_report, confusion_matrix

In [20]:
data = pd.read_csv('./dataset_spam.csv')
data.tail()

Unnamed: 0,content,label
6663,f a n s b e t i n g agent bola terpercaya di i...,spam
6664,pasti ada campur tangan ulah orang dalam karen...,normal
6665,mauuu puunyaa kullittt puutiiihh secaaraa inns...,spam
6666,bagi aku kaaa dikit aja,normal
6667,mila sipitt allah tuh benci sama orang yg meru...,normal


In [21]:
X = data.content
Y = data.label
label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(Y)
Y = Y.reshape(-1, 1)

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [22]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

In [23]:
max_words = 1000
max_len = 150
tok = Tokenizer(num_words=max_words)
tok.fit_on_texts(X_train)
sequence = tok.texts_to_sequences(X_train)
sequence_matrix = pad_sequences(sequence, maxlen=max_len)

In [24]:
def RNN():
    inputs = Input(name='inputs', shape=[max_len])
    layer = Embedding(max_words, 50, input_length=max_len)(inputs)
    layer = LSTM(64)(layer)
    layer = Dense(256, name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.5)(layer)
    layer = Dense(1, name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs, outputs=layer)
    return model

In [25]:
model = RNN()
model.summary()
model.compile(loss='binary_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inputs (InputLayer)         [(None, 150)]             0         
                                                                 
 embedding_3 (Embedding)     (None, 150, 50)           50000     
                                                                 
 lstm_3 (LSTM)               (None, 64)                29440     
                                                                 
 FC1 (Dense)                 (None, 256)               16640     
                                                                 
 activation_6 (Activation)   (None, 256)               0         
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                                 
 out_layer (Dense)           (None, 1)                 257 

In [26]:
# model.fit(sequence_matrix, batch_size=128, epochs=10, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])

model.fit(sequence_matrix, Y_train,batch_size=128, epochs=10,
          validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


<keras.callbacks.History at 0x1eaa8763910>

In [29]:
test_sequences = tok.texts_to_sequences(X_test)
test_sequences_matrix = pad_sequences(test_sequences, maxlen=max_len)

In [30]:
accr = model.evaluate(test_sequences_matrix, Y_test)



In [31]:
print('Test set\nLoss: {:0.3f}\nAccuracy: {:0.3f}'.format(accr[0],accr[1]))

Test set
Loss: 0.198
Accuracy: 0.949


# Basic NLP Metrics

In [32]:
y_predictions = model.predict(test_sequences_matrix)



In [33]:
confusion_matrix = confusion_matrix(Y_test, np.rint(y_predictions))

In [34]:
confusion_matrix

array([[1694,   12],
       [  90,  205]], dtype=int64)

In [35]:
print(classification_report(Y_test, np.rint(y_predictions)))

              precision    recall  f1-score   support

           0       0.95      0.99      0.97      1706
           1       0.94      0.69      0.80       295

    accuracy                           0.95      2001
   macro avg       0.95      0.84      0.89      2001
weighted avg       0.95      0.95      0.95      2001



# Advances NLP Metrics

In [36]:
from nltk.translate.bleu_score import sentence_bleu

In [37]:
reference = [['budi', 'pergi', 'ke', 'sekolah']]
candidate = ['budi', 'pergi', 'ke', 'sekolah']
score = sentence_bleu(reference, candidate)
print(score)

1.0


In [38]:
reference = [["saya", "suka", "makan", "nasi", "goreng", "di", "restoran", "ini"] ]
candidate = ["aku", "suka", "makan", "nasi", "goreng", "di", "tempat", "ini"]

score = sentence_bleu(reference, candidate)
print(score)

0.5410822690539396


In [39]:
reference = [["saya", "suka", "makan", "nasi", "goreng", "di", "restoran", "ini"] ]
candidate = ["aku", "suka", "makan", "nasi", "goreng", "di", "tempat", "ini"]

score = sentence_bleu(reference, candidate, weights=(1, 0, 0, 0))
print(score)

0.75


In [40]:
print('Individual 1-gram: %f' % sentence_bleu(reference, candidate, weights=(1, 0, 0, 0)))
print('Individual 2-gram: %f' % sentence_bleu(reference, candidate, weights=(0, 1, 0, 0)))
print('Individual 3-gram: %f' % sentence_bleu(reference, candidate, weights=(0, 0, 1, 0)))
print('Individual 4-gram: %f' % sentence_bleu(reference, candidate, weights=(0, 0, 0, 1)))

Individual 1-gram: 0.750000
Individual 2-gram: 0.571429
Individual 3-gram: 0.500000
Individual 4-gram: 0.400000


In [41]:
score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))
print(score)

0.5410822690539396


In [42]:
print('Cumulative 1-gram: %f' % sentence_bleu(reference, candidate, weights=(1, 0, 0, 0)))
print('Cumulative 2-gram: %f' % sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0)))
print('Cumulative 3-gram: %f' % sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0)))

print('Cumulative 4-gram: %f' % sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25)))

Cumulative 1-gram: 0.750000
Cumulative 2-gram: 0.654654
Cumulative 3-gram: 0.601489
Cumulative 4-gram: 0.541082


In [43]:
reference = [["saya", "suka", "makan", "nasi", "goreng", "di", "restoran", "ini"] ]
candidate = ["saya", "suka", "makan", "nasi", "goreng", "di", "restoran", "ini"]
score = sentence_bleu(reference, candidate)
print(score)

1.0


In [44]:
reference = [["saya", "suka", "makan", "nasi", "goreng", "di", "restoran", "ini"] ]
candidate = ["saya", "suka", "makan", "nasi", "goreng", "di", "tempat", "ini"]
score = sentence_bleu(reference, candidate)
print(score)

0.7071067811865475


In [45]:
reference = [["saya", "suka", "makan", "nasi", "goreng", "di", "restoran", "ini"] ]
candidate = ["saya", "suka", "beli", "nasi", "goreng", "di", "tempat", "ini"]
score = sentence_bleu(reference, candidate)
print(score)

5.87583260478785e-78


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [46]:
reference = [["saya", "suka", "makan", "nasi", "goreng", "di", "restoran", "ini"] ]
candidate = ["q", "w", "e", "r", "t", "y", "u", "i"]
score = sentence_bleu(reference, candidate)
print(score)

0


In [47]:
import evaluate
rouge = evaluate.load('rouge')
predictions = ["Hari ini saya belajar NLP"]
references = [
              ["Hari ini saya belajar NLP di rumah"]
             ]
results = rouge.compute(predictions=predictions, references=references)
print(results)

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

{'rouge1': 0.8333333333333333, 'rouge2': 0.8, 'rougeL': 0.8333333333333333, 'rougeLsum': 0.8333333333333333}


In [48]:
rouge = evaluate.load('rouge')
predictions = ["Hari ini saya belajar NLP di rumah"]
references = [
              ["Hari ini saya belajar NLP di rumah"]
             ]
results = rouge.compute(predictions=predictions, references=references)
print(results)

{'rouge1': 1.0, 'rouge2': 1.0, 'rougeL': 1.0, 'rougeLsum': 1.0}


In [49]:
rouge = evaluate.load('rouge')
predictions = ["Hari ini saya belajar di rumah"]
references = [
              ["Hari ini saya belajar NLP di rumah"]
             ]
results = rouge.compute(predictions=predictions, references=references)
print(results)

{'rouge1': 0.923076923076923, 'rouge2': 0.7272727272727272, 'rougeL': 0.923076923076923, 'rougeLsum': 0.923076923076923}
