In [0]:
import pandas as pd
df = pd.read_csv('mod.csv')
df.text=df.text.astype(str)


In [8]:
df = df.drop(columns='Unnamed: 0')
df.head()

Unnamed: 0,sentiment,text
0,positive,"Thank you, Ellen. We have a strong 2018, with ..."
1,positive,Stock-based compensation totaled $2.3 billion....
2,neutral,"For opening remarks, I will be turning the cal..."
3,positive,[Operator Instructions]. Our first question wi...
4,positive,"Hey, thanks. So I guess the question I have an..."


In [9]:
df['sentiment'] = df['sentiment'].replace({'positive': 0,'negative':1,'neutral':2})
df.head()

Unnamed: 0,sentiment,text
0,0,"Thank you, Ellen. We have a strong 2018, with ..."
1,0,Stock-based compensation totaled $2.3 billion....
2,2,"For opening remarks, I will be turning the cal..."
3,0,[Operator Instructions]. Our first question wi...
4,0,"Hey, thanks. So I guess the question I have an..."


In [0]:
#RNN input requires array data type,
X, y = (df['text'].values, df['sentiment'].values)

###Text data has to be integer encoded before feeding it into the RNN model. This can be easily achieved by using basic tools from the Keras library:

In [0]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
tk = Tokenizer(lower = True)
tk.fit_on_texts(X)
X_seq = tk.texts_to_sequences(X)
X_pad = pad_sequences(X_seq, maxlen=1000, padding='post')

In [12]:
X_pad

array([[ 106,   10, 3962, ...,    0,    0,    0],
       [ 705,  273,  875, ...,    0,    0,    0],
       [  13,  917,  467, ...,    0,    0,    0],
       ...,
       [  71,   38,   11, ...,    0,    0,    0],
       [ 489,   23,   22, ...,    0,    0,    0],
       [ 140, 3037,  350, ...,    0,    0,    0]], dtype=int32)

In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size = 0.2, random_state = 1)

In [0]:
import numpy as np
from keras.utils import to_categorical
num_classes = len(np.unique(y_train))
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

###Single layer simpleRNN
where return_sequence is set to False (default)

In [16]:

from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout, SimpleRNN
vocabulary_size = len(tk.word_counts.keys())+1
max_words = 1000
batch_size = 32
embedding_size = 32
model = Sequential()
model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
model.add(SimpleRNN(32))
model.add(Dense(3, activation='softmax'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.


In [0]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, 1000, 32)          205120    
_________________________________________________________________
simple_rnn_18 (SimpleRNN)    (None, 32)                2080      
_________________________________________________________________
dense_11 (Dense)             (None, 3)                 99        
Total params: 207,299
Trainable params: 207,299
Non-trainable params: 0
_________________________________________________________________


In [0]:
from tensorboardcolab import *
tbc=TensorBoardColab()

Wait for 8 seconds...
TensorBoard link:
https://bfe96253.ngrok.io


In [0]:
model.fit(X_train, y_train,validation_data=(X_test,y_test),batch_size=batch_size,epochs=20,shuffle=True,callbacks=[TensorBoardColabCallback(tbc)])

###Two layer RNN

In [0]:
model = Sequential()
model.add(Embedding(vocabulary_size, 32,input_length=max_words))
model.add(SimpleRNN(32, return_sequences=True))
#model.add(SimpleRNN(32, return_sequences=True))
#model.add(SimpleRNN(32, return_sequences=True))
model.add(SimpleRNN(32))  # This last layer only returns the last outputs.
model.add(Dense(3, activation='softmax'))

model.summary()
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_14 (Embedding)     (None, 1000, 32)          205120    
_________________________________________________________________
simple_rnn_19 (SimpleRNN)    (None, 1000, 32)          2080      
_________________________________________________________________
simple_rnn_20 (SimpleRNN)    (None, 32)                2080      
_________________________________________________________________
dense_12 (Dense)             (None, 3)                 99        
Total params: 209,379
Trainable params: 209,379
Non-trainable params: 0
_________________________________________________________________


In [0]:
from tensorboardcolab import *
tbc=TensorBoardColab()

Wait for 8 seconds...
TensorBoard link:
https://5bba38b5.ngrok.io


In [0]:
model.fit(X_train, y_train,validation_data=(X_test,y_test),batch_size=batch_size,epochs=20,shuffle=True,callbacks=[TensorBoardColabCallback(tbc)])

Train on 1317 samples, validate on 330 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fbb2f3a85c0>

In [0]:
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_train)
matrix = confusion_matrix(y_train.argmax(axis=1), y_pred.argmax(axis=1))

In [0]:
y_pred

array([[0.38553756, 0.13320945, 0.4812529 ],
       [0.38538548, 0.13324724, 0.48136726],
       [0.38540816, 0.13321693, 0.48137492],
       ...,
       [0.38542792, 0.13323006, 0.48134205],
       [0.3854251 , 0.13318935, 0.4813856 ],
       [0.38540575, 0.13322994, 0.48136428]], dtype=float32)

In [0]:
df['sentiment'].value_counts()

2    833
0    657
1    157
Name: sentiment, dtype: int64

###LSTM

In [20]:
from keras.layers import LSTM
model = Sequential()
model.add(Embedding(vocabulary_size, 32,input_length=max_words))
model.add(LSTM(32))
model.add(Dense(3, activation='softmax'))

model.summary()
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 1000, 32)          205120    
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 99        
Total params: 213,539
Trainable params: 213,539
Non-trainable params: 0
_________________________________________________________________


In [21]:
from tensorboardcolab import *
tbc=TensorBoardColab()

Wait for 8 seconds...
TensorBoard link:
https://271c5f94.ngrok.io


In [22]:
model.fit(X_train, y_train,validation_data=(X_test,y_test),batch_size=batch_size,epochs=20,shuffle=True,callbacks=[TensorBoardColabCallback(tbc)])

Instructions for updating:
Use tf.cast instead.
Train on 1317 samples, validate on 330 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7feb94b3f278>

In [0]:
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_train)
matrix = confusion_matrix(y_train.argmax(axis=1), y_pred.argmax(axis=1))

In [24]:
print(matrix)

[[  0   0 533]
 [  0   0 126]
 [  0   0 658]]
