In [None]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import Adam
from keras.layers import Dense, LSTM, Embedding
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from sklearn.model_selection import GridSearchCV

**Sentimental Analysis with LSTM**

In [None]:
max_features = 30000
maxlen=100

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
X_train = sequence.pad_sequences(X_train, maxlen=100)
X_test = sequence.pad_sequences(X_test, maxlen=100)

In [None]:
def build_model(lstm_units, dropout_rate, recurrent_dropout_rate, learning_rate):
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(LSTM(lstm_units, dropout=dropout_rate, recurrent_dropout=recurrent_dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
param_grid = {
    'lstm_units': [64, 128],
    'dropout_rate': [0.2, 0.3],
    'recurrent_dropout_rate': [0.1, 0.2],
    'learning_rate': [0.001, 0.005, 0.01],
}

In [None]:
model = KerasClassifier(build_fn=build_model, verbose=0)

  model = KerasClassifier(build_fn=build_model, verbose=0)


In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, verbose=1)
grid_result = grid.fit(X_test, y_test)

In [None]:
print(grid_result.best_params_)

{'dropout_rate': 0.3, 'learning_rate': 0.005, 'lstm_units': 64, 'recurrent_dropout_rate': 0.1}


In [None]:
# model1 = build_model(64, 0.3, 0.1, 0.005)
# model1.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f8683c66a60>

In [None]:
# score, acc = model.evaluate(X_test, y_test, batch_size=32)
# print('Test score:', score)
# print('Test accuracy:', acc)

Test score: 0.8905755281448364
Test accuracy: 0.8299599885940552


In [None]:
# model2 = build_model(64, 0.3, 0.1, 0.005)
# model2.fit(X_train, y_train, batch_size=32, epochs=6, validation_data=(X_test, y_test))
# score, acc = model2.evaluate(X_test, y_test, batch_size=32)
# print('Test score:', score)
# print('Test accuracy:', acc)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test score: 0.6321529150009155
Test accuracy: 0.8290799856185913


In [None]:
# model3 = build_model(64, 0.3, 0.1, 0.005)
# model3.fit(X_train, y_train, batch_size=64, epochs=6, validation_data=(X_test, y_test))
# score, acc = model3.evaluate(X_test, y_test, batch_size=64)
# print('Test score:', score)
# print('Test accuracy:', acc)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test score: 0.8179002404212952
Test accuracy: 0.8186799883842468


In [None]:
model = build_model(64, 0.3, 0.1, 0.005)
model.fit(X_train, y_train, batch_size=64, epochs=10, validation_data=(X_test, y_test))
score, acc = model.evaluate(X_test, y_test, batch_size=64)
print('Test score:', score)
print('Test accuracy:', acc)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test score: 0.9100337624549866
Test accuracy: 0.8150399923324585


In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 128)          3840000   
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 3,889,473
Trainable params: 3,889,473
Non-trainable params: 0
_________________________________________________________________


In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
except ImportError:
    pass

Mounted at /content/drive


In [None]:
import os
def get_root_dir():
    if os.path.exists('/content/drive/MyDrive/BT4222-Project'):
        return '/content/drive/MyDrive/BT4222-Project'
    else:
        return './'

print(get_root_dir())
os.chdir(get_root_dir())

/content/drive/MyDrive/BT4222-Project


In [None]:
import pandas as pd

val_df = pd.read_csv('/content/drive/MyDrive/BT4222-Project/processed-data/val.csv')
summary = val_df['summary'][:5]

In [None]:
tk = Tokenizer(num_words=100)
tk.fit_on_texts(summary)

sq = tk.texts_to_sequences(summary)
pad_sq = sequence.pad_sequences(sq, maxlen=100)

model.predict(pad_sq)



array([[0.06727437],
       [0.92238164],
       [0.45253885],
       [0.79507846],
       [0.93111026]], dtype=float32)

In [None]:
print(summary[0], '\n')
print(summary[1], '\n')
print(summary[2], '\n')
print(summary[3], '\n')
print(summary[4], '\n')

Billboards with anti-U.S. slogans are collected in Iranian capital, state news reports .
Those who put them didn't have permission from Tehran authorities, spokesman says .
This comes amid a thaw in U.S.-Iran relations, though tension and differences remain . 

Palestinian hacker Khalil Shreateh discovered a glitch that allows anyone to post to a stranger's Facebook wall .
Facebook refused to pay him for finding the bug since he used it to post on Mark Zuckerberg's wall .
An online campaign was started to reward Shreateh for his work . 

Henderson looking for starting spot at World Cup to complete impressive season .
Midfielder says he has improved since making his England debut in 2010 .
Had a good season partnering Steven Gerrard in midfield at Liverpool . 

Ten developments in London reportedly being sold to overseas investors .
Buyers see London as an investment - homes offered to foreigners first .
Critics label homes 'safety deposit boxes' as many owners never live there .
Invest