In [3]:
# to load and check model:
from keras import models
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import pandas as pd
import re


In [10]:
# Preprocess Tweets Before Feeding to Model
data = pd.read_csv("../data/input/realdonaldtrump_20170120-20191231.csv")
data = data[['id','content', 'date']]
data['content'] = data['content'].apply(lambda x: x.lower())
data['content'] = data['content'].apply((lambda x: re.sub('[^a-zA-z0-9\s]','',x)))

In [11]:
data.head

<bound method NDFrame.head of                 id                                            content  \
0     8.224210e+17  it all begins today i will see you at 1100 am ...   
1     8.225020e+17  today we are not merely transferring power fro...   
2     8.225020e+17  power from washington dc and giving it back to...   
3     8.225020e+17  what truly matters is not which party controls...   
4     8.225020e+17  january 20th 2017 will be remembered as the da...   
...            ...                                                ...   
9685  1.212180e+18  thank you to the  dcexaminer washington examin...   
9686  1.212180e+18  thank you steve the greatest witch hunt in us ...   
9687  1.212180e+18  our fantastic first ladyhttpstwittercomflotuss...   
9688  1.212180e+18                                     happy new year   
9689  1.212210e+18                            pictwittercomevaeyd1agv   

                  date  
0       1/20/2017 6:31  
1      1/20/2017 11:51  
2      1/20/2017 1

In [12]:
data.tail

<bound method NDFrame.tail of                 id                                            content  \
0     8.224210e+17  it all begins today i will see you at 1100 am ...   
1     8.225020e+17  today we are not merely transferring power fro...   
2     8.225020e+17  power from washington dc and giving it back to...   
3     8.225020e+17  what truly matters is not which party controls...   
4     8.225020e+17  january 20th 2017 will be remembered as the da...   
...            ...                                                ...   
9685  1.212180e+18  thank you to the  dcexaminer washington examin...   
9686  1.212180e+18  thank you steve the greatest witch hunt in us ...   
9687  1.212180e+18  our fantastic first ladyhttpstwittercomflotuss...   
9688  1.212180e+18                                     happy new year   
9689  1.212210e+18                            pictwittercomevaeyd1agv   

                  date  
0       1/20/2017 6:31  
1      1/20/2017 11:51  
2      1/20/2017 1

In [13]:
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['content'].values)
X = tokenizer.texts_to_sequences(data['content'].values)
X = pad_sequences(X)

In [14]:
# Load in all models to be used 
# Called LSTM_10 because 10 epoch
LSTM_bal = models.load_model('../output/models/LSTM_balanced_10')
LSTM_bal.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 29, 128)           256000    
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 29, 128)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 196)               254800    
_________________________________________________________________
dense (Dense)                (None, 2)                 394       
Total params: 511,194
Trainable params: 511,194
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Generate Predictions for each model 
y_pred = LSTM_bal.predict(X)
data['negative_prob'] = pd.Series(y_pred[:,0])
data['positive_prob'] = pd.Series(y_pred[:,1])
data.to_csv('../output/results/LSTM_balanced_results.csv')



In [16]:
# Get Results of Unbalanced Model
LSTM_unbal = models.load_model('../output/models/LSTM_unbalanced')
LSTM_unbal.summary()
# Generate Predictions for each model 
y_pred = LSTM_unbal.predict(X)
data['negative_prob'] = pd.Series(y_pred[:,0])
data['positive_prob'] = pd.Series(y_pred[:,1])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 28, 128)           256000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 28, 128)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 196)               254800    
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 394       
Total params: 511,194
Trainable params: 511,194
Non-trainable params: 0
_________________________________________________________________


In [17]:
# 1 == Negative, 1 == Positive Sentiment
data['outcome'] = np.where(data['negative_prob']> 0.500, -1 ,1)

data.to_csv('../output/results/LSTM_unbalanced_full_results.csv')

In [18]:
data.head

<bound method NDFrame.head of                 id                                            content  \
0     8.224210e+17  it all begins today i will see you at 1100 am ...   
1     8.225020e+17  today we are not merely transferring power fro...   
2     8.225020e+17  power from washington dc and giving it back to...   
3     8.225020e+17  what truly matters is not which party controls...   
4     8.225020e+17  january 20th 2017 will be remembered as the da...   
...            ...                                                ...   
9685  1.212180e+18  thank you to the  dcexaminer washington examin...   
9686  1.212180e+18  thank you steve the greatest witch hunt in us ...   
9687  1.212180e+18  our fantastic first ladyhttpstwittercomflotuss...   
9688  1.212180e+18                                     happy new year   
9689  1.212210e+18                            pictwittercomevaeyd1agv   

                  date  negative_prob  positive_prob  outcome  
0       1/20/2017 6:31       