## Data Preparation

In [121]:
# importing data
import pandas as pd
import numpy as np

%load_ext autoreload
%autoreload 2

data = pd.read_csv("data/data_preprocessed.csv", sep='\t')

# checking its dimensions
data.shape

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


(14133, 24)

In [122]:
from sklearn.model_selection import train_test_split

# our feature and label data
text, y = data['text_preprocessed'], data['airline_sentiment']

# train-test split
text_train, text_test, y_train, y_test = train_test_split(text, y, test_size=0.10, random_state=37)
text_train.shape, text_test.shape

((12719,), (1414,))

Sampling tweets to see how our model would perform in a real production environment.

In [124]:
random_state = 12

tweet_samples = data.loc[text_test.index, 'text'].sample(5, random_state=random_state).values
labels = data.loc[text_test.index, 'airline_sentiment'].sample(5, random_state=random_state).values

list(zip(tweet_samples, labels))

[('@united any chance you could look into my problem from earlier?',
  'neutral'),
 ('@USAirways Yes &amp; have already spent 1K+ on the card &amp; paid the $89 annual fee, but the 50K miles have not shown up. Want to fly AUS --&gt; ATL',
  'negative'),
 ("@SouthwestAir Boooo!!!!!!   Don't be like the other airlines!!  http://t.co/WHAGPknnLF",
  'negative'),
 ("@AmericanAir Oh,i already have turned  itover to them, but apparently losing someones bag on their honeymoon doesn't require accountability",
  'negative'),
 ("@SouthwestAir @LeeAnnHealey I was all, yeah sale fares! I got places to fly! Oh damn, right, I live where SWA doesn't fly. #why",
  'negative')]

Here we will import our model and parameters necessary, of course, this would be transparent to an end user.

In [125]:
import pickle

with open('vocab_to_int', 'rb') as fp:
    vocab_to_int = pickle.load(fp)

In [126]:
from models import SentimentRNN

# Instantiate the model w/ hyperparams
vocab_size = len((vocab_to_int))+1
output_size = 3
embedding_dim = 200
hidden_dim = 256
n_layers = 3

net = SentimentRNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers, vocab_to_int)

print(net)

SentimentRNN(
  (embedding): Embedding(11809, 200)
  (lstm): LSTM(200, 256, num_layers=3, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.3)
  (fc): Linear(in_features=256, out_features=3, bias=True)
)


In [127]:
import torch

checkpoint = torch.load('model_pure_200dims.pt')
net.load_state_dict(checkpoint['model_state_dict'])

In [128]:
tweets = list(tweet_samples)
probabilities = []

for tweet in tweets:
    probabilities.append(net.predict(tweet, 30)[0])

### Analysing tweets probabilities

In [130]:
tweet_n = 0
print("Tweet: {}".format(tweets[tweet_n]))
print('-'*50)
print("Probability of being Negative: {} - Neutral: {} - Positive: {}".format(probabilities[tweet_n][0], 
                                                                            probabilities[tweet_n][1],
                                                                            probabilities[tweet_n][2]))
print("True Label: {}".format(labels[tweet_n]))

Tweet: @united any chance you could look into my problem from earlier?
--------------------------------------------------
Probability of being Negative: 0.10169485211372375 - Neutral: 0.8809473514556885 - Positive: 0.017357753589749336
True Label: neutral


In [116]:
tweet_n = 1
print("Tweet: {}".format(tweets[tweet_n]))
print('-'*50)
print("Probability of being Negative: {} - Neutral: {} - Positive: {}".format(probabilities[tweet_n][0], 
                                                                            probabilities[tweet_n][1],
                                                                            probabilities[tweet_n][2]))
print("True Label: {}".format(labels[tweet_n]))

Tweet: @SouthwestAir we are having a real issue trying to get thru to you.  On hold for 2 hours &amp; got disconnected!  Please help!
--------------------------------------------------
Probability of being Negative: 0.9789091348648071 - Neutral: 0.015689147636294365 - Positive: 0.005401642993092537
True Label: negative


In [131]:
tweet_n = 2
print("Tweet: {}".format(tweets[tweet_n]))
print('-'*50)
print("Probability of being Negative: {} - Neutral: {} - Positive: {}".format(probabilities[tweet_n][0], 
                                                                            probabilities[tweet_n][1],
                                                                          probabilities[tweet_n][2]))
print("True Label: {}".format(labels[tweet_n]))

Tweet: @SouthwestAir Boooo!!!!!!   Don't be like the other airlines!!  http://t.co/WHAGPknnLF
--------------------------------------------------
Probability of being Negative: 0.4408182203769684 - Neutral: 0.04991302266716957 - Positive: 0.5092687010765076
True Label: negative


In [132]:
tweet_n = 3
print("Tweet: {}".format(tweets[tweet_n]))
print('-'*50)
print("Probability of being Negative: {} - Neutral: {} - Positive: {}".format(probabilities[tweet_n][0], 
                                                                            probabilities[tweet_n][1],
                                                                            probabilities[tweet_n][2]))
print("True Label: {}".format(labels[tweet_n]))

Tweet: @AmericanAir Oh,i already have turned  itover to them, but apparently losing someones bag on their honeymoon doesn't require accountability
--------------------------------------------------
Probability of being Negative: 0.954486072063446 - Neutral: 0.04026637226343155 - Positive: 0.005247518420219421
True Label: negative


In [133]:
tweet_n = 4
print("Tweet: {}".format(tweets[tweet_n]))
print('-'*50)
print("Probability of being Negative: {} - Neutral: {} - Positive: {}".format(probabilities[tweet_n][0], 
                                                                            probabilities[tweet_n][1],
                                                                            probabilities[tweet_n][2]))
print("True Label: {}".format(labels[tweet_n]))

Tweet: @SouthwestAir @LeeAnnHealey I was all, yeah sale fares! I got places to fly! Oh damn, right, I live where SWA doesn't fly. #why
--------------------------------------------------
Probability of being Negative: 0.27486559748649597 - Neutral: 0.13366879522800446 - Positive: 0.5914656519889832
True Label: negative
