Library yang digunakan

In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\anasb\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Load dataset


In [3]:
data = pd.read_csv('dataset_sms_spam_v1.csv')
data.head()

Unnamed: 0,teks,label
0,[PROMO] Beli paket Flash mulai 1GB di MY TELKO...,2
1,2.5 GB/30 hari hanya Rp 35 Ribu Spesial buat A...,2
2,"2016-07-08 11:47:11.Plg Yth, sisa kuota Flash ...",2
3,"2016-08-07 11:29:47.Plg Yth, sisa kuota Flash ...",2
4,4.5GB/30 hari hanya Rp 55 Ribu Spesial buat an...,2


## Text Preprocessing

Case Folding


In [4]:
import re
#membuat fungai untuk case folding
def casefolding(text):
    text = text.lower()                             #mengubah kalimat jadi huruf kecil
    text = re.sub(r'https?://\S+|www\.S+', '',text) #menghapus url dari kalimat
    text = re.sub(r'[-+]?[0-9]+', '', text) 
    text = re.sub(r'[^\w\s]', '', text) #menghapus angka dari kalimat
    text = text.strip()                     #menghapus tanda baca
    return text

In [5]:
#membandingkan before dan after case flding
raw_sample = data['teks'].iloc[2]
case_folding = casefolding(raw_sample)

print('Raw data\t : ',raw_sample)
print('Case Folding\t : ', case_folding)

Raw data	 :  2016-07-08 11:47:11.Plg Yth, sisa kuota Flash Anda 478KB. Download MyTelkomsel apps di http://tsel.me/tsel utk cek kuota&beli paket Flash atau hub *363#
Case Folding	 :  plg yth sisa kuota flash anda kb download mytelkomsel apps di  utk cek kuotabeli paket flash atau hub


# Word Normalization


In [7]:
key_norm = pd.read_csv('key_norm.csv')

def text_normalize(text):
    text = ' '.join([key_norm[key_norm['singkat'] == word]['hasil'].values[0]
                     if (key_norm['singkat'] == word).any()
                     else word for word in text.split()])
    text = str.lower(text)
    return text


In [8]:
# membandingkan before dan after word normalization

raw_data = data['teks'].iloc[696]
word_normal = text_normalize(case_folding)

print('Raw Data\t :', raw_data)
print('Word Normalize\t :', word_normal)

Raw Data	 : Btw magicomnya yg sedang Gais, gaada yg gede
Word Normalize	 : pelanggan yang terhormat sisa kuota flash anda kb download mytelkomsel apps di untuk cek kuotabeli paket flash atau hubungi


FIltering (Stopword removal)

In [9]:
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

stopwords_ind = stopwords.words('indonesian')


In [10]:
len(stopwords_ind)

758

In [11]:
stopwords_ind

['ada',
 'adalah',
 'adanya',
 'adapun',
 'agak',
 'agaknya',
 'agar',
 'akan',
 'akankah',
 'akhir',
 'akhiri',
 'akhirnya',
 'aku',
 'akulah',
 'amat',
 'amatlah',
 'anda',
 'andalah',
 'antar',
 'antara',
 'antaranya',
 'apa',
 'apaan',
 'apabila',
 'apakah',
 'apalagi',
 'apatah',
 'artinya',
 'asal',
 'asalkan',
 'atas',
 'atau',
 'ataukah',
 'ataupun',
 'awal',
 'awalnya',
 'bagai',
 'bagaikan',
 'bagaimana',
 'bagaimanakah',
 'bagaimanapun',
 'bagi',
 'bagian',
 'bahkan',
 'bahwa',
 'bahwasanya',
 'baik',
 'bakal',
 'bakalan',
 'balik',
 'banyak',
 'bapak',
 'baru',
 'bawah',
 'beberapa',
 'begini',
 'beginian',
 'beginikah',
 'beginilah',
 'begitu',
 'begitukah',
 'begitulah',
 'begitupun',
 'bekerja',
 'belakang',
 'belakangan',
 'belum',
 'belumlah',
 'benar',
 'benarkah',
 'benarlah',
 'berada',
 'berakhir',
 'berakhirlah',
 'berakhirnya',
 'berapa',
 'berapakah',
 'berapalah',
 'berapapun',
 'berarti',
 'berawal',
 'berbagai',
 'berdatangan',
 'beri',
 'berikan',
 'berikut'

Membuat stopwords removal

In [13]:
# menambahkan kata dalam stopworkd
more_stopword = ['tsel', 'gb', 'rb', 'btw']
stopwords_ind = stopwords_ind + more_stopword

def remove_stop_word(text):
    clean_words = []
    text = text.split()
    for word in text:
        if word not in stopwords_ind:
            clean_words.append(word)
    return " ".join(clean_words)

In [14]:
raw_sample = data['teks'].iloc[696]
case_folding = casefolding(raw_sample)
stopword_removal = remove_stop_word(case_folding)

print('Raw Data \t\t :', raw_data)
print('Case Folding \t\t :', case_folding)
print('Stopword Removal \t\t', stopword_removal)

Raw Data 		 : Btw magicomnya yg sedang Gais, gaada yg gede
Case Folding 		 : btw magicomnya yg sedang gais gaada yg gede
Stopword Removal 		 magicomnya yg gais gaada yg gede


Stemming


In [15]:
!pip -q install sastrawi



In [16]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

factory = StemmerFactory()
stemmer = factory.create_stemmer()

def stemming(text):
    text = stemmer.stem(text)
    return text


In [17]:
raw_sample = data['teks'].iloc[696]
case_folding = casefolding(raw_sample)
stopword_removal = remove_stop_word(case_folding)
text_stemming = stemming(stopword_removal)

print('Raw Data \t\t :', raw_data)
print('Case Folding \t\t :', case_folding)
print('Stopword Removal \t\t', stopword_removal)
print('Stemming \t\t :', text_stemming)

Raw Data 		 : Btw magicomnya yg sedang Gais, gaada yg gede
Case Folding 		 : btw magicomnya yg sedang gais gaada yg gede
Stopword Removal 		 magicomnya yg gais gaada yg gede
Stemming 		 : magicomnya yg gais gaada yg gede


text preprocessing pipeline

In [18]:
def text_preprocessing_process(text):
    text = casefolding(text)
    text = text_normalize(text)
    text = remove_stop_word(text)
    text = stemming(text)
    return text

In [19]:
%%time
data['clean_teks']= data['teks'].apply(text_preprocessing_process)

CPU times: total: 2min 8s
Wall time: 8min 9s


In [20]:
data

Unnamed: 0,teks,label,clean_teks
0,[PROMO] Beli paket Flash mulai 1GB di MY TELKO...,2,promo beli paket flash my telkomsel app extra ...
1,2.5 GB/30 hari hanya Rp 35 Ribu Spesial buat A...,2,rupiah ribu spesial pilih aktif promo sd novem...
2,"2016-07-08 11:47:11.Plg Yth, sisa kuota Flash ...",2,langgan hormat sisa kuota flash kb download my...
3,"2016-08-07 11:29:47.Plg Yth, sisa kuota Flash ...",2,langgan hormat sisa kuota flash kb download my...
4,4.5GB/30 hari hanya Rp 55 Ribu Spesial buat an...,2,rupiah ribu spesial pilih aktif buru skb
...,...,...,...
1138,"Yooo sama2, oke nanti aku umumin di grup kelas",0,yooo oke umumin grup kelas
1139,😁 sebelumnya ga ad nulis kerudung. Kirain warn...,0,nulis kerudung kirain warna jins
1140,Mba mau kirim 300 ya,0,mbak kirim ya
1141,nama1 beaok bwrangkat pagi...mau cas atay tra...,0,nama beaok bwrangkat pagimau cas atay tranfer


In [21]:
data.to_csv('clean_data.csv')

Feature Engineering

In [22]:
# pisahkan kolom feature dan target
x = data['clean_teks']
y = data['label']


In [23]:
x

0       promo beli paket flash my telkomsel app extra ...
1       rupiah ribu spesial pilih aktif promo sd novem...
2       langgan hormat sisa kuota flash kb download my...
3       langgan hormat sisa kuota flash kb download my...
4                rupiah ribu spesial pilih aktif buru skb
                              ...                        
1138                           yooo oke umumin grup kelas
1139                     nulis kerudung kirain warna jins
1140                                        mbak kirim ya
1141        nama beaok bwrangkat pagimau cas atay tranfer
1142                                       nomor bri nama
Name: clean_teks, Length: 1143, dtype: object

In [24]:
y

0       2
1       2
2       2
3       2
4       2
       ..
1138    0
1139    0
1140    0
1141    0
1142    0
Name: label, Length: 1143, dtype: int64

Feature Extraction (TF-IDF dan N-Gram)


In [25]:
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer

vec_TF_IDF = TfidfVectorizer(ngram_range=(1,1))
vec_TF_IDF.fit(x)

x_tf_idf = vec_TF_IDF.transform(x)

pickle.dump(vec_TF_IDF.vocabulary_,open("feature_tf-idf.sav", "wb"))


In [26]:
#menampilkan vocabulary dari tf_idf
vec_TF_IDF.vocabulary_

{'promo': 2313,
 'beli': 323,
 'paket': 2106,
 'flash': 871,
 'my': 1898,
 'telkomsel': 2893,
 'app': 162,
 'extra': 842,
 'kuota': 1567,
 'lte': 1670,
 'telpon': 2896,
 'mnthr': 1849,
 'buru': 480,
 'cek': 521,
 'tselmemytsel': 3031,
 'sk': 2709,
 'rupiah': 2521,
 'ribu': 2470,
 'spesial': 2768,
 'pilih': 2193,
 'aktif': 66,
 'sd': 2575,
 'november': 2007,
 'langgan': 1595,
 'hormat': 1117,
 'sisa': 2702,
 'kb': 1415,
 'download': 753,
 'mytelkomsel': 1900,
 'apps': 167,
 'kuotabeli': 1568,
 'hubung': 1141,
 'skb': 2710,
 'ekstra': 805,
 'pulsa': 2350,
 'internet': 1223,
 'bulan': 466,
 'sjk': 2708,
 'augsept': 217,
 'detail': 665,
 'iring': 1245,
 'tarif': 2859,
 'panjang': 2119,
 'hits': 1106,
 'armada': 180,
 'curi': 600,
 'hati': 1070,
 'tekan': 2887,
 'okcall': 2059,
 'informasi': 1193,
 'eks': 802,
 'loh': 1657,
 'internetan': 1224,
 'pakai': 2104,
 'volume': 3147,
 'ultima': 3084,
 'mbhr': 1756,
 'harga': 1058,
 'tariflokasi': 2861,
 'tselmefl': 3029,
 'coboy': 568,
 'jr': 1332

In [30]:
#melihat jumlah feature
print(len(vec_TF_IDF.get_feature_names_out()))

3415


In [31]:
# melihat fitur apa saja yang ada di dalam corpus
print(vec_TF_IDF.get_feature_names_out())

['aa' 'aamiiiin' 'aamiin' ... 'zjt' 'zona' 'ztkm']


In [33]:
x1 = vec_TF_IDF.transform(x).toarray()
data_tabular_tf_idf = pd.DataFrame(x1,columns=vec_TF_IDF.get_feature_names_out())
data_tabular_tf_idf

Unnamed: 0,aa,aamiiiin,aamiin,ab,abadi,abai,abbee,abdul,acara,acaratks,...,yudisium,yuk,yuks,yuni,yunit,zalora,zarkasi,zjt,zona,ztkm
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
data_tabular_tf_idf.iloc[10:20,60:70]

Unnamed: 0,akang,akangteteh,akbar,akreditasi,akses,aksi,aktif,aktifasi,aktivasi,aktivitas
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,0.0,0.0,0.0,0.0,0.0,0.0,0.14944,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,0.0,0.0,0.0,0.0,0.262305,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.244053,0.0,0.382416,0.0
15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Feature Elecetion

In [37]:
x_train = np.array(data_tabular_tf_idf)
y_train = np.array(y)

In [39]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
chi2_feature = SelectKBest(chi2, k=3000)
x_kbest_feature = chi2_feature.fit_transform(x_train, y_train)

# untuk reduced features
print('Original Feature Number', x_train.shape[1])
print('Reduced feature Number', x_kbest_feature.shape[1])

Original Feature Number 3415
Reduced feature Number 3000


In [42]:
Data = pd.DataFrame(chi2_feature.scores_,columns=['Nilai'])
Data

Unnamed: 0,Nilai
0,0.843018
1,0.419698
2,1.558607
3,0.686416
4,0.759870
...,...
3410,1.126664
3411,0.503012
3412,0.686416
3413,2.918687


In [43]:
# Menampilkan feature beserta nilainya

feature = vec_TF_IDF.get_feature_names_out()
feature

Data['Fitur'] = feature
Data

Unnamed: 0,Nilai,Fitur
0,0.843018,aa
1,0.419698,aamiiiin
2,1.558607,aamiin
3,0.686416,ab
4,0.759870,abadi
...,...,...
3410,1.126664,zalora
3411,0.503012,zarkasi
3412,0.686416,zjt
3413,2.918687,zona


In [44]:
#Mengurutkan nilai feature terbaik
Data.sort_values(by='Nilai', ascending=False)

Unnamed: 0,Nilai,Fitur
2106,48.939838,paket
1567,45.025343,kuota
1031,43.690947,hadiah
2196,36.979908,pin
323,33.962373,beli
...,...,...
1536,0.044714,kopi
307,0.044468,bca
1712,0.031575,maksimal
3139,0.012716,via


In [45]:
mask = chi2_feature.get_support()
mask

array([ True,  True,  True, ...,  True,  True,  True])

In [46]:
# Menampilkan fitur yang terpilih berdasarkan nilai mask atau nilai tertinggi yang sudah diteta[kan pada chi square

new_feature=[]
for bool, f in zip(mask, feature):
    if bool:
        new_feature.append(f)
    selected_feature=new_feature
selected_feature


['aa',
 'aamiiiin',
 'aamiin',
 'ab',
 'abadi',
 'abai',
 'abbee',
 'abdul',
 'acaratks',
 'account',
 'ada',
 'adapromo',
 'adi',
 'adik',
 'adison',
 'admin',
 'administrasi',
 'adminlte',
 'ado',
 'adrian',
 'adu',
 'aduh',
 'advertising',
 'aea',
 'aesthetic',
 'afbe',
 'affc',
 'afr',
 'afrika',
 'agam',
 'agen',
 'agendain',
 'agenpulsa',
 'ags',
 'agst',
 'agsts',
 'agt',
 'agtskinfodlj',
 'agua',
 'agun',
 'agus',
 'agust',
 'agustuskunjungi',
 'ahaha',
 'ahub',
 'aidzin',
 'aigoo',
 'air',
 'aja',
 'ajaa',
 'ajaaa',
 'ajabri',
 'ajak',
 'ajeng',
 'akang',
 'akbar',
 'akreditasi',
 'akses',
 'aksi',
 'aktif',
 'aktifasi',
 'aktivasi',
 'aktivitas',
 'akucintaislam',
 'akumulasi',
 'akun',
 'akurasi',
 'akurat',
 'alaikum',
 'alaikumsaya',
 'alaiqum',
 'alam',
 'alamat',
 'alamsyah',
 'alesannya',
 'algoritma',
 'alhamdulillah',
 'alhuda',
 'ali',
 'aliando',
 'all',
 'allah',
 'allahaamiin',
 'alphard',
 'alquran',
 'alur',
 'aman',
 'amanda',
 'ambil',
 'amin',
 'ampuun',
 'an

In [47]:
# membuat vocabulary baru berdasarkan feature yang terseleksi

new_selected_feature = {}

for (k,v) in vec_TF_IDF.vocabulary_.items():
    if k in selected_feature:
        new_selected_feature[k] = v
        
new_selected_feature

{'promo': 2313,
 'beli': 323,
 'paket': 2106,
 'flash': 871,
 'my': 1898,
 'telkomsel': 2893,
 'app': 162,
 'extra': 842,
 'kuota': 1567,
 'lte': 1670,
 'telpon': 2896,
 'mnthr': 1849,
 'buru': 480,
 'cek': 521,
 'tselmemytsel': 3031,
 'sk': 2709,
 'rupiah': 2521,
 'ribu': 2470,
 'spesial': 2768,
 'pilih': 2193,
 'aktif': 66,
 'sd': 2575,
 'november': 2007,
 'langgan': 1595,
 'hormat': 1117,
 'sisa': 2702,
 'kb': 1415,
 'download': 753,
 'mytelkomsel': 1900,
 'apps': 167,
 'kuotabeli': 1568,
 'hubung': 1141,
 'skb': 2710,
 'ekstra': 805,
 'pulsa': 2350,
 'internet': 1223,
 'bulan': 466,
 'sjk': 2708,
 'augsept': 217,
 'detail': 665,
 'iring': 1245,
 'tarif': 2859,
 'panjang': 2119,
 'hits': 1106,
 'armada': 180,
 'curi': 600,
 'hati': 1070,
 'tekan': 2887,
 'okcall': 2059,
 'informasi': 1193,
 'eks': 802,
 'loh': 1657,
 'internetan': 1224,
 'pakai': 2104,
 'volume': 3147,
 'ultima': 3084,
 'mbhr': 1756,
 'harga': 1058,
 'tariflokasi': 2861,
 'tselmefl': 3029,
 'coboy': 568,
 'jr': 1332

In [48]:
len(new_selected_feature)

3000

In [54]:
pickle.dump(new_selected_feature,open("new_selected_feature_tf-idf.sav","wb"))

In [55]:
# menampilkan fitur-fitur yang diseleksi

data_selected_feature = pd.DataFrame(x_kbest_feature, columns=selected_feature)
data_selected_feature

Unnamed: 0,aa,aamiiiin,aamiin,ab,abadi,abai,abbee,abdul,acaratks,account,...,yudisium,yuk,yuks,yuni,yunit,zalora,zarkasi,zjt,zona,ztkm
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Modeling

In [56]:
selected_x = x_kbest_feature
selected_x

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [57]:
import random
from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import MultinomialNB

In [59]:
x = selected_x
y = data.label

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [60]:
print('Banyaknya x_train : ', len(x_train))
print('Banyaknya x_test : ', len(x_test))
print('Banyaknya y_train : ', len(y_train))
print('Banyaknya y_test : ', len(y_test))

Banyaknya x_train :  914
Banyaknya x_test :  229
Banyaknya y_train :  914
Banyaknya y_test :  229


In [65]:
# proses training menggunskan naive bayes
text_algorithm = MultinomialNB()

In [66]:
model = text_algorithm.fit(x_train, y_train)

In [68]:
# membuat model prediksi

data_input = ("promo beli paket flash my telkomsel app extra kuota lte extra telpon mnthr buru cek tselmemytsel sk")
data_input = text_preprocessing_process(data_input)

tfidf = TfidfVectorizer

loaded_vec = TfidfVectorizer(decode_error="replace", vocabulary=set(pickle.load(open("new_selected_feature_tf-idf.sav", "rb"))))
hasil = model.predict(loaded_vec.fit_transform([data_input]))

if(hasil==0):
    s = "SMS Normal"
elif(hasil==1):
    s = "SMS Penipuan"
else:
    s = "SMS Promo"
    
print("Hasil Prediksi : \n", s)

Hasil Prediksi : 
 SMS Promo


## Evaluasi Model

In [69]:
# library yang dibutuhkan
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

predicted = model.predict(x_test)

CM = confusion_matrix(y_test, predicted)

print(classification_report(y_test, predicted))

              precision    recall  f1-score   support

           0       0.95      0.96      0.95       126
           1       0.92      0.89      0.91        66
           2       0.84      0.84      0.84        37

    accuracy                           0.92       229
   macro avg       0.90      0.90      0.90       229
weighted avg       0.92      0.92      0.92       229



In [70]:
# menyimpan model
pickle.dump(model, open("model_fraud.sav", "wb"))