### **Import Library**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import string, re, requests, csv
from google.colab import drive
from wordcloud import WordCloud
from gensim.corpora import WikiCorpus

In [None]:
from nltk import word_tokenize, sent_tokenize
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
!pip install emoji



### **Load Dataset**

In [None]:
train_data = pd.read_csv('/content/train.csv')
train_data = train_data[['text', 'label']]
train_data.shape

(7967, 2)

In [None]:
train_data['label'].value_counts()

neutral     2926
negative    2775
positive    2266
Name: label, dtype: int64

In [None]:
test_data = pd.read_csv('/content/test.csv')
test_data = test_data[['text', 'label']]
test_data.shape

(1992, 2)

In [None]:
test_data['label'].value_counts()

neutral     732
negative    694
positive    566
Name: label, dtype: int64

### **All Preprocessing**

In [None]:
# comments = data['text']
comments_train = train_data['text']
comments_test = test_data['text']

In [None]:
### 1. Replace Username
def replace_username(data):
  pattern = "(?:@)([A-Za-z0-9_](?:(?:[A-Za-z0-9_]|(?:\.(?!\.))){0,28}(?:[A-Za-z0-9_]))?)"
  data = re.sub(pattern, "@username", data)
  
  return data

### 2. Cleansing Data
def cleansing(data):
    # lowercasing
    data = data.lower()

    # remove punctuation
    punct = string.punctuation
    translator = str.maketrans(punct, ' '*len(punct))
    data = data.translate(translator)

    # remove ASCII dan unicode
    data = data.encode('ascii', 'ignore').decode('utf-8')
    data = re.sub(r'[^\x00-\x7f]',r'', data)
    
    # remove newline
    data = data.replace('\n', ' ')

    # remove digit
    pattern = r'[0-9]'
    data = re.sub(pattern, '', data)

    # remove extra space
    data = ' '.join(data.split())
    
    return data

# ### 3. Remove Emoji  
import sys
def remove_emoji(data):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r' ', data)

### 3. Convert Emoji
import emoji
import functools
import operator
import re

df_emoji = pd.read_csv('emoji_to_text.csv')
UNICODE_EMO = {row['emoji']:row['makna'] for idx,row in df_emoji.iterrows()}
def convert_emojis(text):
    # split emojis
    em_split_emoji = emoji.get_emoji_regexp().split(text)
    em_split_whitespace = [substr.split() for substr in em_split_emoji]
    em_split = functools.reduce(operator.concat, em_split_whitespace)
    text = ' '.join(em_split)

    # convert emojis
    for emot in UNICODE_EMO:
        text = re.sub(r'('+emot+')', "_".join(UNICODE_EMO[emot].replace(",","").replace(":","").split()), text)
    return text.lower()
  
### 4. Normalize Kata Alay
# CONSTRUCT KAMUS ALAY
text_path1 = 'https://raw.githubusercontent.com/ramaprakoso/analisis-sentimen/master/kamus/kbba.txt'
text_path2 = 'https://raw.githubusercontent.com/nasalsabila/kamus-alay/master/colloquial-indonesian-lexicon.csv'
kamus_alay1 = pd.read_csv(text_path1, delimiter="\t", header=None, names=['slang', 'formal'])
kamus_alay2 = pd.read_csv(text_path2)
kamus_alay = pd.concat([kamus_alay1, kamus_alay2[['slang', 'formal']]]).reset_index(drop=True)

dict_alay = dict()
for index, row in kamus_alay.iterrows():
    dict_alay[row['slang']] = row['formal']

def normalize_text(data):
  word_tokens = word_tokenize(data)
  result = [dict_alay.get(w,w) for w in word_tokens]
  return ' '.join(result)


### 5. Remove Stopwords
# CONSTRUCT STOPWORDS
rama_stopword = "https://raw.githubusercontent.com/ramaprakoso/analisis-sentimen/master/kamus/stopword.txt"
yutomo_stopword = "https://raw.githubusercontent.com/yasirutomo/python-sentianalysis-id/master/data/feature_list/stopwordsID.txt"
fpmipa_stopword = "https://raw.githubusercontent.com/onlyphantom/elangdev/master/elang/word2vec/utils/stopwords-list/fpmipa-stopwords.txt"
sastrawi_stopword = "https://raw.githubusercontent.com/onlyphantom/elangdev/master/elang/word2vec/utils/stopwords-list/sastrawi-stopwords.txt"
aliakbar_stopword = "https://raw.githubusercontent.com/onlyphantom/elangdev/master/elang/word2vec/utils/stopwords-list/aliakbars-bilp.txt"
pebahasa_stopword = "https://raw.githubusercontent.com/onlyphantom/elangdev/master/elang/word2vec/utils/stopwords-list/pebbie-pebahasa.txt"
elang_stopword = "https://raw.githubusercontent.com/onlyphantom/elangdev/master/elang/word2vec/utils/stopwords-id.txt"
nltk_stopword = stopwords.words('indonesian')

path_stopwords = [rama_stopword, yutomo_stopword, fpmipa_stopword, sastrawi_stopword, 
                  aliakbar_stopword, pebahasa_stopword, elang_stopword]

# CUSTOM STOPWORDS
other = '''
admin mimin min minkes kalo nya username
'''

# gabungkan stopwords
stopwords_l = nltk_stopword
for path in path_stopwords:
    response = requests.get(path)
    stopwords_l += response.text.split('\n')

st_words = set(stopwords_l)
other_stopword = set(other.split())

stop_words = st_words | other_stopword

def remove_stopword(text, stop_words=stop_words):
    word_tokens = word_tokenize(text)
    filtered_sentence = [w for w in word_tokens if not w in stop_words]
    return ' '.join(filtered_sentence)

def preprocessing(data):
  data = replace_username(data)
  data = cleansing(data)
  # data = remove_emoji(data)
  data = convert_emojis(data)
  data = normalize_text(data)
  data = remove_stopword(data)

  return data

In [None]:
comments_train = comments_train.apply(lambda x: preprocessing(x))
comments_test = comments_test.apply(lambda x: preprocessing(x))

In [None]:
comments_train.head()

0                                                     
1                                     turun maju canik
2        prosentase kematian covid warga kota semarang
3    rapid test test swab pcr kawan-kawan dinkes se...
4                                      area pedurungan
Name: text, dtype: object

In [None]:
comments_test.head()

0                             vaksin massal cek vaksin
1             kak coba coba ulang terima kasih infonya
2                           tugu graha padma isoman cc
3    terimakasih infonya berharap update informasi ...
4                                            ayo turun
Name: text, dtype: object

# **Feature Extraction**

In [None]:
y_train =  pd.get_dummies(train_data['label']).values
y_test =  pd.get_dummies(test_data['label']).values

y_train.shape, y_test.shape

((7967, 3), (1992, 3))

In [None]:
y_train[1], y_train[0], y_train[5]

(array([0, 1, 0], dtype=uint8),
 array([1, 0, 0], dtype=uint8),
 array([0, 0, 1], dtype=uint8))

## **TF-IDF**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer( min_df=2, max_df=0.95, max_features = 5000, ngram_range = (1, 3),
                              sublinear_tf = True )

comments_train_tfidf = vectorizer.fit_transform(comments_train).toarray()
comments_test_tfidf = vectorizer.transform(comments_test).toarray()

comments_train_tfidf.shape, comments_test_tfidf.shape

((7967, 5000), (1992, 5000))

In [None]:
# reshape, since LSTM cells expects ndims = 3
comments_train_tfidf = comments_train_tfidf.reshape(comments_train_tfidf.shape[0], 1, comments_train_tfidf.shape[-1])
comments_test_tfidf = comments_test_tfidf.reshape(comments_test_tfidf.shape[0], 1, comments_test_tfidf.shape[-1])

comments_train_tfidf.shape, comments_test_tfidf.shape

((7967, 1, 5000), (1992, 1, 5000))

In [None]:
!pip install -q -U keras-tuner

[?25l[K     |███▍                            | 10 kB 20.3 MB/s eta 0:00:01[K     |██████▊                         | 20 kB 27.3 MB/s eta 0:00:01[K     |██████████                      | 30 kB 23.4 MB/s eta 0:00:01[K     |█████████████▍                  | 40 kB 17.9 MB/s eta 0:00:01[K     |████████████████▊               | 51 kB 5.9 MB/s eta 0:00:01[K     |████████████████████            | 61 kB 6.3 MB/s eta 0:00:01[K     |███████████████████████▍        | 71 kB 5.7 MB/s eta 0:00:01[K     |██████████████████████████▊     | 81 kB 6.3 MB/s eta 0:00:01[K     |██████████████████████████████  | 92 kB 6.7 MB/s eta 0:00:01[K     |████████████████████████████████| 98 kB 3.5 MB/s 
[?25h

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping 
from keras_tuner.engine.hyperparameters import HyperParameters
from keras_tuner.tuners import RandomSearch 
from keras_tuner.tuners import Hyperband
from keras.utils.vis_utils import plot_model

**Before Tuning**

In [None]:
model = Sequential()
model.add(LSTM(units=32, input_shape=comments_train_tfidf.shape[1:]))
model.add(Dropout(0.2))
model.add(Dense(3,activation='softmax'))
opt = Adam()
model.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics = ['accuracy'])

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

history = model.fit(
    comments_train_tfidf, 
    y_train, 
    epochs=50, 
    validation_split=0.2, 
    batch_size=32, 
    verbose=1,
    callbacks=[es]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 00014: early stopping


In [None]:
history.history

{'accuracy': [0.5683351755142212,
  0.7274439334869385,
  0.7745174765586853,
  0.8043307662010193,
  0.8275537490844727,
  0.842303454875946,
  0.8557978868484497,
  0.8667817115783691,
  0.8777655959129333,
  0.8879648447036743,
  0.891260027885437,
  0.8948689699172974,
  0.8981641530990601,
  0.8994194269180298],
 'loss': [1.0341700315475464,
  0.791092038154602,
  0.6245718002319336,
  0.5246053338050842,
  0.4585339426994324,
  0.4119190275669098,
  0.37551066279411316,
  0.34695711731910706,
  0.32383260130882263,
  0.3067140281200409,
  0.2913402020931244,
  0.28031542897224426,
  0.27242419123649597,
  0.26198309659957886],
 'val_accuracy': [0.6198243498802185,
  0.6907151937484741,
  0.6982434391975403,
  0.7063990235328674,
  0.7038896083831787,
  0.6982434391975403,
  0.6944792866706848,
  0.6932246088981628,
  0.6951066255569458,
  0.6919698715209961,
  0.6938519477844238,
  0.6919698715209961,
  0.6919698715209961,
  0.6875784397125244],
 'val_loss': [0.9260867834091187,


In [None]:
loss, accuracy = model.evaluate(comments_test_tfidf, y_test)



**After Tuning**

In [None]:
# Build out our simple LSTM
# # Model saving callback
# mc = ModelCheckpoint('keras_model', 
#                                  monitor='val_loss', 
#                                  verbose=1, 
#                                  save_best_only=True, 
#                                  mode='auto')

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

# model = Sequential()
# model.add(LSTM(lstm_out, dropout=dropout, input_shape = comments_train_tfidf.shape[1:])) # recurrent dropout?
# model.add(Dense(3,activation='softmax'))
# opt = Adam(learning_rate=lr)
# model.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics = ['accuracy'])
# print(model.summary())

def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Choice('lstm_out',values=[32, 48, 64]), input_shape=comments_train_tfidf.shape[1:]))
    model.add(Dropout(hp.Choice('dropout',values=[0.2, 0.5, 0.8])))
    model.add(Dense(3,activation='softmax'))
    lr = hp.Choice('learning_rate',values=[0.1, 0.01, 0.001])
    opt = Adam(learning_rate=lr)
    model.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics = ['accuracy'])
    plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
    print(model.summary())
    return model

In [None]:
tuner= RandomSearch(
        build_model,
        objective='val_accuracy',
        max_trials=27,
        executions_per_trial=1,
        directory='randomsearch-tfidf-20211211',
)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 32)                644224    
                                                                 
 dropout_2 (Dropout)         (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 3)                 99        
                                                                 
Total params: 644,323
Trainable params: 644,323
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
tuner.search(
      x=comments_train_tfidf,
      y=y_train,
      epochs=50,
      batch_size=32,
      validation_split=0.2,
      callbacks=[es]
)

Trial 21 Complete [00h 00m 27s]
val_accuracy: 0.6806775331497192

Best val_accuracy So Far: 0.7176913619041443
Total elapsed time: 00h 11m 00s
INFO:tensorflow:Oracle triggered exit


In [None]:
tuner.results_summary()

Results summary
Results in randomsearch-tfidf-20211211/untitled_project
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
lstm_out: 32
dropout: 0.8
learning_rate: 0.001
Score: 0.7176913619041443
Trial summary
Hyperparameters:
lstm_out: 64
dropout: 0.8
learning_rate: 0.001
Score: 0.7107904553413391
Trial summary
Hyperparameters:
lstm_out: 48
dropout: 0.8
learning_rate: 0.001
Score: 0.7101631164550781
Trial summary
Hyperparameters:
lstm_out: 48
dropout: 0.2
learning_rate: 0.001
Score: 0.7095357775688171
Trial summary
Hyperparameters:
lstm_out: 48
dropout: 0.5
learning_rate: 0.001
Score: 0.7082810401916504
Trial summary
Hyperparameters:
lstm_out: 32
dropout: 0.2
learning_rate: 0.01
Score: 0.7076537013053894
Trial summary
Hyperparameters:
lstm_out: 32
dropout: 0.2
learning_rate: 0.001
Score: 0.7070263624191284
Trial summary
Hyperparameters:
lstm_out: 32
dropout: 0.5
learning_rate: 0.001
Score: 0.7070263624191284
Trial summary
Hyperparamet

In [None]:
bestHP = tuner.get_best_hyperparameters(num_trials=1)[0]
bestHP.get("lstm_out"), bestHP.get("dropout"), bestHP.get("learning_rate")

(32, 0.8, 0.001)

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 32)                644224    
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 3)                 99        
                                                                 
Total params: 644,323
Trainable params: 644,323
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
loss, accuracy = best_model.evaluate(comments_test_tfidf, y_test)



# **Word2Vec**

In [None]:
# dowload pre-trained word2vec fasttext indonesia
! wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.id.300.bin.gz
# unzip
! gunzip cc.id.300.bin.gz

--2021-12-11 09:21:03--  https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.id.300.bin.gz
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.75.142, 104.22.74.142, 172.67.9.4, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.75.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4507049071 (4.2G) [application/octet-stream]
Saving to: ‘cc.id.300.bin.gz’


2021-12-11 09:23:14 (33.0 MB/s) - ‘cc.id.300.bin.gz’ saved [4507049071/4507049071]



In [None]:
from gensim.models import KeyedVectors
from gensim.models.wrappers import FastText

# load pre-trained word2vec fasttext
word2vec = FastText.load_fasttext_format('cc.id.300.bin')

In [None]:
# tokenize text
def tokenize(sentence):
    return word_tokenize(sentence)

train_text = comments_train.apply(lambda x: tokenize(x))
test_text = comments_test.apply(lambda x: tokenize(x))

In [None]:
# vectorize
VOCABULARY = word2vec.wv.vocab
def vectorize(tokens):
    word_vec = []
    for w in tokens:
        if w in VOCABULARY:
            # get word vector from pre-trained word2vec fasttext
            word_vec.append(word2vec[w])
    return word_vec

def avg_vectorize(tokens):
    sum_vec = np.zeros(300)
    word_count = 0

    for w in tokens:
        if w in VOCABULARY:
            # word vector from pre-trained word2vec fasttext and add vector
            sum_vec += word2vec[w]
            word_count += 1
    return sum_vec if word_count==0 else sum_vec/word_count

In [None]:
# check maximum token and count
max_size_token = 0
count = 0
max_len_choosen = 50

for tok in train_text:
    # find max token
    if len(tok) > max_size_token: max_size_token = len(tok)
    # count
    if len(tok) > max_len_choosen: count += 1

print(f'Maximum length token: {max_size_token}')
print(f'With MAX_LEN {max_len_choosen}, there are/is {count} token/s')

Maximum length token: 150
With MAX_LEN 50, there are/is 24 token/s


In [None]:
# padding
MAX_LEN = 50

def add_padding(word_vec):
    if len(word_vec) < MAX_LEN:
        pad_count = MAX_LEN - len(word_vec)
        return word_vec + [np.array([0]*300)]*pad_count
    else:
        return word_vec[:MAX_LEN]

In [None]:
# feature extraction
def extract_feature(data, ndim=3):
    if ndim == 3:
        features = vectorize(data)
        features = add_padding(features)
    elif ndim == 2:
        features = avg_vectorize(data)
    return np.array(features)

In [None]:
import numpy as np

# extract feature
comments_train_w2v = np.array([extract_feature(text, ndim=3) for text in train_text])
comments_test_w2v = np.array([extract_feature(text, ndim=3) for text in test_text])

comments_train_w2v.shape, comments_test_w2v.shape

((7967, 50, 300), (1992, 50, 300))

In [None]:
!pip install -q -U keras-tuner

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping 
from keras_tuner.engine.hyperparameters import HyperParameters
from keras_tuner.tuners import RandomSearch 
from keras_tuner.tuners import Hyperband
from keras.utils.vis_utils import plot_model

**Before Tuning**

In [None]:
model = Sequential()
model.add(LSTM(units=32, input_shape=comments_train_w2v.shape[1:]))
model.add(Dropout(0.2))
model.add(Dense(3,activation='softmax'))
opt = Adam()
model.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics = ['accuracy'])

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

history = model.fit(
    comments_train_w2v, 
    y_train, 
    epochs=50, 
    validation_split=0.2, 
    batch_size=32, 
    verbose=1,
    callbacks=[es]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 00049: early stopping


In [None]:
loss, accuracy = model.evaluate(comments_test_w2v, y_test)



In [None]:
# Build out our simple LSTM

# Model saving callback
# mc = ModelCheckpoint('keras_model', 
#                                  monitor='val_loss', 
#                                  verbose=1, 
#                                  save_best_only=True, 
#                                  mode='auto')

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

# model2 = Sequential()
# model2.add(LSTM(lstm_out, dropout=dropout, input_shape = train_features.shape[1:])) # recurrent dropout?
# model2.add(Dense(3,activation='softmax'))
# opt = Adam(learning_rate=lr)
# model2.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics = ['accuracy'])
# print(model2.summary())

def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Choice('lstm_out',values=[32, 48, 64]), input_shape=comments_train_w2v.shape[1:]))
    model.add(Dropout(hp.Choice('dropout',values=[0.2, 0.5, 0.8])))
    model.add(Dense(3,activation='softmax'))
    lr = hp.Choice('learning_rate',values=[0.1, 0.01, 0.001])
    opt = Adam(learning_rate=lr)
    model.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics = ['accuracy'])
    plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
    print(model.summary())
    return model

In [None]:
tuner= RandomSearch(
        build_model,
        objective='val_accuracy',
        max_trials=27,
        executions_per_trial=1,
        directory='randomsearch_w2v-20211211',
)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 32)                42624     
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 3)                 99        
                                                                 
Total params: 42,723
Trainable params: 42,723
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
tuner.search(
      x=comments_train_w2v,
      y=y_train,
      epochs=50,
      batch_size=32,
      validation_split=0.2,
      callbacks=[es]
)

Trial 22 Complete [00h 01m 42s]
val_accuracy: 0.5457967519760132

Best val_accuracy So Far: 0.703262209892273
Total elapsed time: 01h 25m 20s
INFO:tensorflow:Oracle triggered exit


In [None]:
tuner.results_summary()

Results summary
Results in randomsearch_w2v-20211211/untitled_project
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
lstm_out: 48
dropout: 0.2
learning_rate: 0.01
Score: 0.703262209892273
Trial summary
Hyperparameters:
lstm_out: 64
dropout: 0.5
learning_rate: 0.01
Score: 0.6957340240478516
Trial summary
Hyperparameters:
lstm_out: 64
dropout: 0.8
learning_rate: 0.01
Score: 0.6951066255569458
Trial summary
Hyperparameters:
lstm_out: 48
dropout: 0.2
learning_rate: 0.001
Score: 0.6932246088981628
Trial summary
Hyperparameters:
lstm_out: 32
dropout: 0.5
learning_rate: 0.01
Score: 0.6894604563713074
Trial summary
Hyperparameters:
lstm_out: 32
dropout: 0.8
learning_rate: 0.01
Score: 0.6888331174850464
Trial summary
Hyperparameters:
lstm_out: 64
dropout: 0.2
learning_rate: 0.01
Score: 0.6875784397125244
Trial summary
Hyperparameters:
lstm_out: 48
dropout: 0.8
learning_rate: 0.001
Score: 0.683814287185669
Trial summary
Hyperparameters:
lstm

In [None]:
bestHP = tuner.get_best_hyperparameters(num_trials=1)[0]
bestHP.get("lstm_out"), bestHP.get("dropout"), bestHP.get("learning_rate")

(48, 0.2, 0.01)

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 48)                67008     
                                                                 
 dropout (Dropout)           (None, 48)                0         
                                                                 
 dense (Dense)               (None, 3)                 147       
                                                                 
Total params: 67,155
Trainable params: 67,155
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
loss, accuracy = best_model.evaluate(comments_test_w2v, y_test)



In [None]:
best_model.save('/content/best_model_w2v.h5')