In [445]:
# Import libraries--------------------------------------

import pandas as pd
import numpy as np
from scipy.stats import zscore

# ML libraries------------------------------------------

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

# Metrics----------------------------------------------
from sklearn.metrics import classification_report, confusion_matrix

# library to manage natural language--------------------
import nltk
nltk.download('stopwords')
nltk.download('punkt')

# Manage regular expressions----------------------------
import re

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\xizes\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\xizes\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
df = pd.read_csv('DelayedFlights.csv')

In [4]:
pd.set_option('display.max_columns', None)
df.head(5)

Unnamed: 0.1,Unnamed: 0,Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
0,0,2008,1,3,4,2003.0,1955,2211.0,2225,WN,335,N712SW,128.0,150.0,116.0,-14.0,8.0,IAD,TPA,810,4.0,8.0,0,N,0,,,,,
1,1,2008,1,3,4,754.0,735,1002.0,1000,WN,3231,N772SW,128.0,145.0,113.0,2.0,19.0,IAD,TPA,810,5.0,10.0,0,N,0,,,,,
2,2,2008,1,3,4,628.0,620,804.0,750,WN,448,N428WN,96.0,90.0,76.0,14.0,8.0,IND,BWI,515,3.0,17.0,0,N,0,,,,,
3,4,2008,1,3,4,1829.0,1755,1959.0,1925,WN,3920,N464WN,90.0,90.0,77.0,34.0,34.0,IND,BWI,515,3.0,10.0,0,N,0,2.0,0.0,0.0,0.0,32.0
4,5,2008,1,3,4,1940.0,1915,2121.0,2110,WN,378,N726SW,101.0,115.0,87.0,11.0,25.0,IND,JAX,688,4.0,10.0,0,N,0,,,,,


In [5]:
#df.columns.tolist()
#df['SecurityDelay'].value_counts()

In [6]:
df_drop = df.drop(columns=[
 'Unnamed: 0',
 'Year',
 'Month',
 'DayofMonth',
 'DayOfWeek',
 'DepTime',
 'CRSDepTime',
 'ArrTime',
 'CRSArrTime',
 'UniqueCarrier',
 'FlightNum',
 'TailNum',
 'CRSElapsedTime',
 'Origin',
 'Dest',
 'Cancelled',
 'CancellationCode',
 'Diverted',
 'CarrierDelay',
 'WeatherDelay',
 'NASDelay',
 'SecurityDelay',
 'LateAircraftDelay'])

In [7]:
df_drop.dropna(inplace=True)

In [8]:
df_drop.isna().sum()

ActualElapsedTime    0
AirTime              0
ArrDelay             0
DepDelay             0
Distance             0
TaxiIn               0
TaxiOut              0
dtype: int64

In [9]:
df_sample = df_drop.sample(n=100000)

In [10]:
df_no_outliers = df_sample[(np.abs(zscore(df_sample)) < 3).all(axis=1)]

In [11]:
df_no_outliers.shape

(92689, 7)

In [12]:
df_no_outliers.describe()

Unnamed: 0,ActualElapsedTime,AirTime,ArrDelay,DepDelay,Distance,TaxiIn,TaxiOut
count,92689.0,92689.0,92689.0,92689.0,92689.0,92689.0,92689.0
mean,125.913032,102.867082,35.453333,37.935915,721.415055,6.320243,16.725706
std,61.704251,59.669471,40.03916,37.64955,501.329831,3.454566,9.527149
min,16.0,8.0,-51.0,6.0,30.0,0.0,0.0
25%,78.0,57.0,8.0,12.0,337.0,4.0,10.0
50%,112.0,88.0,23.0,23.0,595.0,5.0,14.0
75%,158.0,133.0,50.0,50.0,967.0,8.0,20.0
max,350.0,314.0,213.0,203.0,2486.0,22.0,61.0


In [13]:
# Select features and target
X = df_no_outliers.drop('ArrDelay', axis=1)
y = df_no_outliers['ArrDelay']

# Nivell 1

## Exercici 1
Agafa el conjunt de dades que vulguis i realitza un pipeline i un gridsearch aplicant l'algorisme de Random Forest.

In [14]:
# Create the steps of the pipeline containing the scaler and the model
steps = [('scaler', StandardScaler()), ('random_forest_regr', RandomForestRegressor())]

In [15]:
# Create pipeline object 
pipeline = Pipeline(steps=steps)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [71]:
random_forest_hiperparameters = {'random_forest_regr__n_estimators':[20, 40, 100], 
                                 'random_forest_regr__criterion':['mse', 'mae'], 
                                 'random_forest_regr__max_depth':[10, 15], 
                                 'random_forest_regr__random_state':[11]}

In [75]:
rand = RandomizedSearchCV(estimator=pipeline, param_distributions=random_forest_hiperparameters, cv=3, n_iter=5)

In [76]:
rand.fit(X_train, y_train)

RandomizedSearchCV(cv=3,
                   estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                             ('random_forest_regr',
                                              RandomForestRegressor())]),
                   n_iter=5,
                   param_distributions={'random_forest_regr__criterion': ['mse',
                                                                          'mae'],
                                        'random_forest_regr__max_depth': [10,
                                                                          15],
                                        'random_forest_regr__n_estimators': [20,
                                                                             40,
                                                                             100],
                                        'random_forest_regr__random_state': [11]})

**For some reason that I couldn´t understand RandomizedSearchCV took 7h(!) to complete. Even using just a few parameters and a sampple of the entire dataset.**

Try the classfier again with the higher number of estimators we determined before in order to check the timie it took to finish.

In [17]:
classifier_r_forest = RandomForestRegressor(n_estimators=100, criterion='mae', max_depth=15, random_state=11)

In [18]:
classifier_r_forest.fit(X_train, y_train)

RandomForestRegressor(criterion='mae', max_depth=15, random_state=11)

## Exercici 2
Agafa un text en anglès que vulguis, i calcula'n la freqüència de les paraules

**Aunque el enunciado del ejercicio pida un texto en inglés, preferí hacerlo usando 5 tweets recientes del perfil de Vox. Por un lado para usar algo más cercano a la realidad española y por otro con vistas a la posibilidad de desarollar un proyecto futuro de análisis de su perfil.**

In [87]:
# Copy and paste 5 recent tweets from Vox profile in twitter
texts = 'Resérvate el 9 y 10 de octubre para acudir a #VIVA21 🇪🇸, un gran acontecimiento que permitirá a los españoles disfrutar de la riqueza cultural de todas las provincias españolas reunidas en un mismo lugar. Atropellan derechos como la izquierda. Mantienen chiringuitos como la izquierda. Promueven el efecto llamada como la izquierda. Engañan a sus electores como la izquierda. Idolatran a las mismas figuras históricas que la izquierda. Los que estéis por la Región de Murcia no podéis faltar a esta cita este viernes en #Cartagena. El Gobierno pretende crear más infraestructuras para la acogida de los ilegales que llegan desde una de las rutas más peligrosas, Argelia. ¡No lo permitáis! #STOPInvasión. Más ideología y menos conocimiento en las aulas quiere decir menos oportunidades para los españoles con pocos recursos. Solo las elites pueden permitirse el lujo de llenar el currículo educativo de chorradas. Sin educación no hay futuro para nuestra gente. Necesitan a nuestros niños. Necesitan su inocencia y su pureza para cambiar el mundo y tranformarlo en algo sórdido, falso y destructivo. No se lo permitiremos. QUE SAQUEN SUS SUCIAS MANOS DE NUESTROS HIJOS.'

In [90]:
# Tokenize words
words = nltk.tokenize.word_tokenize(texts)

In [95]:
len(words)

209

In [83]:
#IGNORE

# Since we have 5 different texts, I´ll create a dictionary to receive them in a more organized fashion
#dic_words = {}

# Tokenize words for each text
#for i, text in enumerate(texts):
#    print(i, text)
#    dic_words['words_' + str(i)] = nltk.tokenize.word_tokenize(text)

0 Resérvate el 9 y 10 de octubre para acudir a #VIVA21 🇪🇸, un gran acontecimiento que permitirá a los españoles disfrutar de la riqueza cultural de todas las provincias españolas reunidas en un mismo lugar.
1 Atropellan derechos como la izquierda. Mantienen chiringuitos como la izquierda. Promueven el efecto llamada como la izquierda. Engañan a sus electores como la izquierda. Idolatran a las mismas figuras históricas que la izquierda.
2 Los que estéis por la Región de Murcia no podéis faltar a esta cita este viernes en #Cartagena. El Gobierno pretende crear más infraestructuras para la acogida de los ilegales que llegan desde una de las rutas más peligrosas, Argelia. ¡No lo permitáis! #STOPInvasión
3 Más ideología y menos conocimiento en las aulas quiere decir menos oportunidades para los españoles con pocos recursos. Solo las elites pueden permitirse el lujo de llenar el currículo educativo de chorradas. Sin educación no hay futuro para nuestra gente.
4 Necesitan a nuestros niños. Ne

In [91]:
# Extract the frequency of each word/punctuation
token_freq = nltk.probability.FreqDist(words)

In [92]:
token_freq.most_common()

[('.', 16),
 ('de', 8),
 ('la', 8),
 ('a', 6),
 ('el', 5),
 ('y', 5),
 ('para', 5),
 ('las', 5),
 ('izquierda', 5),
 ('que', 4),
 ('en', 4),
 ('como', 4),
 ('#', 3),
 (',', 3),
 ('los', 3),
 ('un', 2),
 ('españoles', 2),
 ('no', 2),
 ('más', 2),
 ('lo', 2),
 ('menos', 2),
 ('Necesitan', 2),
 ('su', 2),
 ('Resérvate', 1),
 ('9', 1),
 ('10', 1),
 ('octubre', 1),
 ('acudir', 1),
 ('VIVA21', 1),
 ('🇪🇸', 1),
 ('gran', 1),
 ('acontecimiento', 1),
 ('permitirá', 1),
 ('disfrutar', 1),
 ('riqueza', 1),
 ('cultural', 1),
 ('todas', 1),
 ('provincias', 1),
 ('españolas', 1),
 ('reunidas', 1),
 ('mismo', 1),
 ('lugar', 1),
 ('Atropellan', 1),
 ('derechos', 1),
 ('Mantienen', 1),
 ('chiringuitos', 1),
 ('Promueven', 1),
 ('efecto', 1),
 ('llamada', 1),
 ('Engañan', 1),
 ('sus', 1),
 ('electores', 1),
 ('Idolatran', 1),
 ('mismas', 1),
 ('figuras', 1),
 ('históricas', 1),
 ('Los', 1),
 ('estéis', 1),
 ('por', 1),
 ('Región', 1),
 ('Murcia', 1),
 ('podéis', 1),
 ('faltar', 1),
 ('esta', 1),
 ('cita'

# Nivell 2

## Exercici 1
Treu les stopwords i realitza stemming al teu conjunt de dades.

In [127]:
# Create variable with spanish stopwords
stopwords_spanish = stopwords.words('spanish')

# Create list to hold all words that are not stopwords
texts_no_stop_words = []

# Loop all words and append all non-stopwords to the list
# We´ll also exclude all special characters and punctuations
for word in words:
    if word not in stopwords_spanish and word.isalnum():
        texts_no_stop_words.append(word)

In [128]:
texts_no_stop_words

['Resérvate',
 '9',
 '10',
 'octubre',
 'acudir',
 'VIVA21',
 'gran',
 'acontecimiento',
 'permitirá',
 'españoles',
 'disfrutar',
 'riqueza',
 'cultural',
 'todas',
 'provincias',
 'españolas',
 'reunidas',
 'mismo',
 'lugar',
 'Atropellan',
 'derechos',
 'izquierda',
 'Mantienen',
 'chiringuitos',
 'izquierda',
 'Promueven',
 'efecto',
 'llamada',
 'izquierda',
 'Engañan',
 'electores',
 'izquierda',
 'Idolatran',
 'mismas',
 'figuras',
 'históricas',
 'izquierda',
 'Los',
 'Región',
 'Murcia',
 'podéis',
 'faltar',
 'cita',
 'viernes',
 'Cartagena',
 'El',
 'Gobierno',
 'pretende',
 'crear',
 'infraestructuras',
 'acogida',
 'ilegales',
 'llegan',
 'rutas',
 'peligrosas',
 'Argelia',
 'permitáis',
 'STOPInvasión',
 'Más',
 'ideología',
 'menos',
 'conocimiento',
 'aulas',
 'quiere',
 'decir',
 'menos',
 'oportunidades',
 'españoles',
 'pocos',
 'recursos',
 'Solo',
 'elites',
 'pueden',
 'permitirse',
 'lujo',
 'llenar',
 'currículo',
 'educativo',
 'chorradas',
 'Sin',
 'educación'

In [129]:
len(texts_no_stop_words)

104

In [212]:
nltk.probability.FreqDist(texts_no_stop_words).most_common()

[('izquierda', 5),
 ('españoles', 2),
 ('menos', 2),
 ('Necesitan', 2),
 ('Resérvate', 1),
 ('9', 1),
 ('10', 1),
 ('octubre', 1),
 ('acudir', 1),
 ('VIVA21', 1),
 ('gran', 1),
 ('acontecimiento', 1),
 ('permitirá', 1),
 ('disfrutar', 1),
 ('riqueza', 1),
 ('cultural', 1),
 ('todas', 1),
 ('provincias', 1),
 ('españolas', 1),
 ('reunidas', 1),
 ('mismo', 1),
 ('lugar', 1),
 ('Atropellan', 1),
 ('derechos', 1),
 ('Mantienen', 1),
 ('chiringuitos', 1),
 ('Promueven', 1),
 ('efecto', 1),
 ('llamada', 1),
 ('Engañan', 1),
 ('electores', 1),
 ('Idolatran', 1),
 ('mismas', 1),
 ('figuras', 1),
 ('históricas', 1),
 ('Los', 1),
 ('Región', 1),
 ('Murcia', 1),
 ('podéis', 1),
 ('faltar', 1),
 ('cita', 1),
 ('viernes', 1),
 ('Cartagena', 1),
 ('El', 1),
 ('Gobierno', 1),
 ('pretende', 1),
 ('crear', 1),
 ('infraestructuras', 1),
 ('acogida', 1),
 ('ilegales', 1),
 ('llegan', 1),
 ('rutas', 1),
 ('peligrosas', 1),
 ('Argelia', 1),
 ('permitáis', 1),
 ('STOPInvasión', 1),
 ('Más', 1),
 ('ideología

In [121]:
# Create stem object for spanish words
stem_spanish = nltk.stem.snowball.SpanishStemmer()

In [122]:
# Test
stem_spanish.stem('hola')

'hol'

In [130]:
for word in texts_no_stop_words:
    print(stem_spanish.stem(word))

reservat
9
10
octubr
acud
viva21
gran
acontec
permit
español
disfrut
riquez
cultural
tod
provinci
español
reun
mism
lug
atropell
derech
izquierd
mantien
chiringuit
izquierd
promuev
efect
llam
izquierd
engañ
elector
izquierd
idolatr
mism
figur
histor
izquierd
los
region
murci
pod
falt
cit
viern
cartagen
el
gobiern
pretend
cre
infraestructur
acog
ilegal
lleg
rut
peligr
argeli
permit
stopinvasion
mas
ideolog
men
conoc
aul
quier
dec
men
oportun
español
poc
recurs
sol
elit
pued
permit
luj
llen
curricul
educ
chorr
sin
educ
futur
gent
necesit
niñ
necesit
inocent
purez
cambi
mund
tranform
sord
fals
destruct
no
permit
que
saqu
sus
suci
man
de
nuestr
hij


# Nivell 3

## Exercici 1
Realitza sentiment analysis al teu conjunt de dades.

# Text Classification
I´ll make a simple example to show what´s happening with CountVectorizer and the bag of words.

In [218]:
# Sentiments table: 
# -1 = negative, 
# 0 = neutral, 
# 1 = positive

teste = ["Hi, I´m a man", "Hi, I´m a woman", "I hate you!", "I love you woman!"]
sentimento = [0, 0, -1, 1]
df_test = pd.DataFrame()
df_test["texto"] = teste
df_test["sentimento"] = sentimento
df_test

Unnamed: 0,texto,sentimento
0,"Hi, I´m a man",0
1,"Hi, I´m a woman",0
2,I hate you!,-1
3,I love you woman!,1


In [238]:
# Create vectorizer for english words
vectorizer_eng = CountVectorizer(lowercase=True, stop_words="english")

# Vectorize texts
vectors_eng = vectorizer_eng.fit_transform(df_test["texto"])

# Bag of words shape: we have 4 lines and 5 words (features)
print(vectors_eng.shape)
print(vectors_eng.toarray())

(4, 5)
[[0 1 0 1 0]
 [0 1 0 0 1]
 [1 0 0 0 0]
 [0 0 1 0 1]]


In [264]:
# Print the bag of words and it´s order
# "hate" comes at position 0, "hi" at position 1, and so on...
# As we can see the vectorizer excluded some words that were considered stopwords
print(vectorizer_eng.vocabulary_)       

{'hi': 1, 'man': 3, 'woman': 4, 'hate': 0, 'love': 2}


In [262]:
# Create df from the bag of words, after vectorizing, add text and sentiment columns from original df
df_bow = pd.DataFrame(data=vectors_eng.toarray(), columns=[sorted(vectorizer_eng.vocabulary_)])
df_bow.insert(loc=0, column='text', value=df_test["texto"])
df_bow["sentiment"] = df_test["sentimento"]

df_bow

Unnamed: 0,text,hate,hi,love,man,woman,sentiment
0,"Hi, I´m a man",0,1,0,1,0,0
1,"Hi, I´m a woman",0,1,0,0,1,0
2,I hate you!,1,0,0,0,0,-1
3,I love you woman!,0,0,1,0,1,1


## Analysis with real dataset
Now, for this analysis I´ll use a dataset composed of tweets comments of airliners.

In [266]:
# Read csv into dataframe
df_airliners = pd.read_csv('tweets_public.csv', sep=',')

In [268]:
# Show full text
pd.set_option("max_colwidth", 0)
df_airliners[["airline_sentiment", "text"]].head()

Unnamed: 0,airline_sentiment,text
0,neutral,Trabajar en #Ryanair como #TMA: https://t.co/ruUArBe1tO #empleo
1,neutral,@Iberia @FIONAFERRER Cuando gusten en Cancún se viaja y disfruta de manera sin igual
2,negative,"Sabiais que @Iberia te trata muy bien en santiago de chile?Te cambia el asiento,te manda a volar en el wc trasero,e… https://t.co/uansbOnn69"
3,negative,"NUNCA NUNCA NUNCA pidáis el café de Ryanair.\nBueno, nada que vendan a bordo."
4,positive,@cris_tortu @dakar @Iberia @Mitsubishi_ES @BFGoodrichEU @BurgosTur @ASTIntlogistics @Uremovil @karbium Muchos éxito… https://t.co/Ed9VrKyVU7


In [309]:
# as we can see the sentiment column is a little imbalanced. But for now we´ll leave like this.
df_airliners['airline_sentiment'].value_counts()

negative    3769
neutral     2609
positive    1489
Name: airline_sentiment, dtype: int64

## Preprocessing
Clean tweets from special characters, punctuations and mentions

In [363]:
# Clean tweets 
def cleanTweet(text):
    text = re.sub(r'@[A-Za-z0-9_]+','', text) # Remove @mentions
    text = re.sub(r'#', '', text) # Remove '#' symbol
    text = re.sub(r'RT[\s]+', '', text) # Remove RT and whitespaces after it
    text = re.sub(r'https?:\/\/\S+', '', text) # Remove url´s (http:// or https:// followed by non-whitespaces)
    text = re.sub(r'\d', '', text) # Remove any digit
    return text

In [364]:
# Apply cleaning function
df_airliners['clean_tweets'] = df['text'].apply(lambda tweet: cleanTweet(tweet))

In [365]:
df_airliners.head(1)

Unnamed: 0,airline_sentiment,is_reply,reply_count,retweet_count,text,tweet_coord,tweet_created,tweet_id,tweet_location,user_timezone,clean_tweets
0,neutral,False,0,0,Trabajar en #Ryanair como #TMA: https://t.co/ruUArBe1tO #empleo,,Fri Nov 03 12:05:12 +0000 2017,926419989107798016,,Madrid,Trabajar en Ryanair como TMA: empleo


In [265]:
#IGNORE

# Create tweeter specific tokenizer object
#t_tknzr = nltk.tokenize.TweetTokenizer()

In [269]:
# IGNORE

# Loop through all the tweets in the df, apply lower() and tokenize it
#tweets = [t_tknzr.tokenize(tweet.lower()) for tweet in df_airliners["text"]]

In [356]:
#IGNORE

# Check
#tweets[0]

In [None]:
#IGNORE

# Since the vectorizer below already has a parameter for excludind stopwords, I´ll leave this function to it
# I´m mantaining this cell just as and exercise

# Create spanish stopwords list
stopword_spa = nltk.corpus.stopwords.words("spanish")

# Create list to hold all words that are not stopwords
tokens_no_stp_words = []

# Loop through each tweet and each word check if it´s not a stopword and append it to the list
for tweet in tweets:
    for token in tweet:
        if token not in stopword_spa:
            tokens_no_stp_words.append(token)

#tokens_no_stp_words

# Check if the list tokens_no_stp_words really don´t have some pretty common stopwords
print(tokens_no_stp_words.count("de"))
print(tokens_no_stp_words.count("la"))
print(tokens_no_stp_words.count("en"))
print(tokens_no_stp_words.count("para"))
print(tokens_no_stp_words.count("un"))

## Text Classification

### 1 - Bag of Words approach

In [358]:
# Create vectorizer with a maximum of 1000 words on the bag of words
vectorizer = CountVectorizer(lowercase=True, stop_words=stopword_spa, tokenizer=t_tknzr.tokenize, max_features=1000)

In [366]:
# Create bag of words based on the tweets 
vectors = vectorizer.fit_transform(df_airliners['clean_tweets'])

In [367]:
# Shape of 7867 lines and 1000 words (features)
print(vectors.shape)

(7867, 1000)


In [420]:
# Create 'bag of words' df and insert text and sentiment from original df
df_bow_airliners = pd.DataFrame
df_bow_airliners = pd.DataFrame(data=vectors.toarray(), columns=sorted(vectorizer.vocabulary_.keys()))
df_bow_airliners.insert(loc=0, column='text', value=df_airliners['text'])
df_bow_airliners.insert(loc=1, column='sentiment', value=df_airliners['airline_sentiment'])

In [424]:
df_bow_airliners.head(5)

Unnamed: 0,text,sentiment,!,"""",$,%,',(,),):,*,+,",",-,.,..,...,/,:,:(,:),;,=,>,?,[,],abre,abrir,acaba,acabo,acceder,accidente,acuerdo,además,aerea,aerolinea,aerolineas,aerolínea,aerolíneas,aeropuerto,aeropuertos,agosto,ahora,ahí,air,airbus,aire,aires,aires-madrid,airlines,airways,ala,alemania,alguien,alguna,algún,alicante,allá,allí,amazon,amenaza,american,amigos,aniversario,aniversarioiberia,anuncia,aparece,app,aqui,aquí,argentina,asiento,asientos,asturias,así,atencion,atención,aterrizaje,aterrizar,aumentará,aunque,austriaca,aviación,avianca,avion,aviones,avios,avión,ayer,ayuda,ayudar,ayudarme,azafata,azafatas,aérea,aéreas,aéreo,año,años,aún,b,ba,bajar,bajo,bajó,barajas,barato,baratos,barcelona,bastante,bcn,bebé,before,bici,bicicleta,bien,bilbao,billete,billetes,black,blackfriday,bodega,bogotá,bordo,british,bruselas,buen,buena,buenas,bueno,buenos,busca,buscar,business,c,cabeza,cabina,cada,call,cambiar,cambio,campaña,cancela,cancelaciones,cancelación,cancelado,cancelan,cancelar,cansado,cara,cargo,caro,casa,casi,caso,cede,celebra,celebrarlo,center,centro,check,check-in,checkin,chile,cierra,cierto,cinco,ciudad,claro,clase,click,cliente,clientes,co,cobra,cobran,cobrar,coger,cola,colombia,comercial,comida,compartam,compartir,compañia,compañía,compañías,compra,comprado,comprar,compras,compre,compro,compré,condiciones,conexiones,conexión,conforma,conseguir,consulta,contacto,contesta,contestar,contigo,convertirse,convoca,convocan,correcto,correo,coruña,cosa,cosas,cost,costaría,costo,creo,cualquier,cuanto,cuatro,cuenta,cuento,cuesta,cuidado,culpa,cumple,cuál,cuándo,cuánto,código,cómo,d,da,dado,dais,dan,dar,darme,datos,debe,debería,debo,decir,deja,dejado,dejan,dejar,dejaron,demora,dentro,denuncian,derechos,desastre,descenso,desconvocan,descubre,descuento,descuentos,deseadme,desembarcar,despues,después,destino,destinos,devolucion,devolver,di,dia,diario,dias,dice,dicen,dicho,diciembre,diciendo,diferencia,diferentes,digan,digo,dijeron,dinero,dio,dios,dirección,directa,directo,director,directos,disculpas,dm,domingo,dos,doy,duda,duelodemarcas,décadas,día,días,dólares,dónde,easyjet,eeuu,eh,ejemplo,elegir,elige,email,embarcar,embarque,emergencia,empleo,empresa,empresas,encanta,encima,encontrar,encuentro,enero,enhorabuena,entiendo,entonces,entrar,entrará,enviado,envié,equipaje,equipajes,equipo,error,escala,escribe,escrito,espacio,españa,español,españoles,especial,espera,esperando,esperar,espero,estan,europa,europeos,euros,evitar,excelente,existe,experiencia,explicación,express,ezeiza,f,factura,facturación,facturar,falta,familia,favor,favorito,fecha,fechas,felices,felicidades,felicitaciones,feliz,fiestas,fin,final,flying,forma,formulario,foto,fraude,friday,funciona,fácil,gana,ganas,general,genial,gente,gestión,gracias,gran,grande,grandes,gratis,grupo,gusta,gustaría,gusto,h,habeis,haber,hablando,hablar,hace,hacen,hacer,hacerlo,hacia,haciendo,hacía,hago,harto,hecho,hice,hijo,hijos,historia,hizo,hola,holaargentina,holacolombia,holaeuropa,hombre,hora,horario,horas,horrible,hotel,hoy,huelga,huelgas,i,iag,ib,iba,ibe,iberia,ida,idea,ido,igual,importa,imposible,in,inaugura,incidencia,incl,includ,increíble,indemnización,información,informar,instantaneamente,intentado,intentando,intento,internacional,ir,irlanda,irlandeses,it,italia,jaja,jajaja,jajajaja,jamás,japón,jodamoslos,juan,jueves,junto,juntos,justo,k,karla,l,lado,lamentable,lanza,lanzan,larga,largo,latam,letal,level,liderazgo,lindo,lista,llama,llamada,llamar,llamo,llega,llegada,llegado,llegan,llegando,llegar,llegará,llego,llegue,llegó,lleno,lleva,llevamos,llevar,llevo,londres,low,luego,lufthansa,lugar,línea,m,ma,mad,madre,madrid,mail,mal,mala,maleta,maletas,mallorca,manera,mano,marca,marketing,marzo,mas,matriz,mayor,mañana,md,media,medicamentos,medio,mejor,mejores,menos,mensaje,mensajes,menudo,menú,mercadona,mes,meses,miedo,mientras,mierda,mil,millones,milán,min,minutos,mira,mirando,misma,mismo,mitad,momento,mostrador,motivo,mucha,muchas,mundo,málaga,méxico,móvil,n,nadie,navidad,navidades,necesita,necesito,niki,ninguna,ningún,niños,noche,noches,nombre,normal,norwegian,note,noteabordo,noticia,noticias,noviembre,nueva,nuevas,nueve,nuevo,nuevos,numero,nunca,número,octubre,oferta,ofertas,oficina,oficinas,ofrece,ojalá,ok,online,opción,operado,origen,overbooking,oye,p,pa,paga,pagado,pagar,pagas,pagina,pago,palma,par,parece,paros,parte,partir,parís,pasa,pasado,pasaje,pasajero,pasajeros,pasajes,pasan,pasar,paso,pasó,país,países,pedir,pena,peor,perder,perdida,perdido,perdieron,perdió,perfecto,permite,persona,personal,personas,pesar,pesetas,pide,pie,pierde,piloto,pilotos,pista,plata,plaza,plazas,plus,po,podemos,poder,podría,podrían,podéis,política,pone,poner,porqué,portugal,posible,prat,precio,precios,pregunta,pregunto,premium,primer,primera,primero,prioridad,privado,problema,problemas,procedente,programa,promoción,pronto,propia,próximo,publicidad,pueda,puede,pueden,puedes,puedo,puente,puerta,puerto,pues,puesto,puntos,puntuales,puse,puta,puto,página,pésima,pésimo,q,qu,queda,queja,queremos,quería,quiebra,quiere,quieren,quieres,quiero,quinto,quién,r,razón,realizar,rebajas,recibido,recibí,reclamaciones,reclamación,reclamar,reclamo,reclutará,reconoce,reconocerá,regala,regalando,regalo,regreso,reina,renfe,reserva,reservar,reservas,residente,respecto,responden,responder,respuesta,resulta,retrasa,retrasado,retraso,retrasos,retuiteemos,rey,rico,roma,ropa,rt,ruta,rutas,ryanair,s,sabe,saben,saber,sabes,sabéis,sabía,sacar,sala,sale,salen,salida,salir,salió,saludo,saludos,samsung,san,santiago,santo,sección,seguimos,seguir,segunda,segundo,seguridad,seguro,según,semana,semanales,semanas,sentirte,separados,ser,seria,serio,servicio,sevilla,señora,señores,shanghai,share,si,sido,siempre,siendo,siento,sigo,sigue,siguen,siguiente,sindical,sindicato,sindicatos,sino,siquiera,sirve,sistema,sitio,sola,solo,solucionar,solución,sorpresa,spanair,subir,suerte,supuesto,sábado,sé,sólo,t,tal,tampoco,tan,tarda,tarde,tardes,tarifa,tarjeta,tarjetas,tax,tecnología,telefono,telefónica,teléfono,tema,tener,tenerife,tiempo,tierra,tipo,to,toca,toda,todas,todavía,tokyo,trabajadores,trabajar,trabajo,tras,trata,trato,travelthursday,través,trayecto,tren,tres,tripulación,tripulantes,turismo,turista,turquía,twitter,técnicos,uds,unas,une,unió,usar,usted,ustedes,usuario,v,va,vacaciones,vale,valencia,valija,valijas,vamos,van,varias,varios,vas,vaya,ve,veces,velázquez,venden,venezuela,venta,ventas,veo,ver,verano,verdad,vergonzoso,verguenza,vergüenza,vez,vi,via,viaja,viajado,viajamos,viajando,viajar,viajas,viaje,viajeros,viajes,viajo,vida,video,viene,viernes,vigo,vigor,vip,visitar,visto,volado,volamos,volando,volar,volver,volé,voy,vs,vuela,vueling,vuelo,vuelos,vuelta,vuelve,vuelvo,vía,vídeo,web,x,xd,xq,york,|,¡,¿,última,último,única,único,‍,–,‘,’,“,”,…,€,♀,✈,❤,️,🇪,🇸,🎉,🏻,🏼,🏽,👍,👎,👏,😀,😁,😂,😉,😊,😍,😘,😡,😢,😭,😱,🙄,🙈,🙏,🛫,🤔,🤣,🤦
0,Trabajar en #Ryanair como #TMA: https://t.co/ruUArBe1tO #empleo,neutral,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,@Iberia @FIONAFERRER Cuando gusten en Cancún se viaja y disfruta de manera sin igual,neutral,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Sabiais que @Iberia te trata muy bien en santiago de chile?Te cambia el asiento,te manda a volar en el wc trasero,e… https://t.co/uansbOnn69",negative,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"NUNCA NUNCA NUNCA pidáis el café de Ryanair.\nBueno, nada que vendan a bordo.",negative,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,@cris_tortu @dakar @Iberia @Mitsubishi_ES @BFGoodrichEU @BurgosTur @ASTIntlogistics @Uremovil @karbium Muchos éxito… https://t.co/Ed9VrKyVU7,positive,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [425]:
# IGNORE
# Map sentiment values to numbers
#df_bow_airliners['sentiment_map'] = df_bow_airliners['sentiment'].map({'negative':'-1', 'neutral':'0', 'positive': '1'})

In [423]:
X_train, X_test, y_train, y_test = train_test_split(vectors, df_bow_airliners['sentiment'], test_size=0.2, random_state=11)

In [429]:
classifier_MultiNB = MultinomialNB()

In [430]:
classifier_MultiNB.fit(X_train, y_train)

MultinomialNB()

In [435]:
classifier_MultiNB.score(X_test, y_test)

0.5978398983481575

In [436]:
y_pred = classifier_MultiNB.predict(X_test)

In [439]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    negative       0.65      0.82      0.73       755
     neutral       0.54      0.35      0.42       513
    positive       0.48      0.47      0.48       306

    accuracy                           0.60      1574
   macro avg       0.56      0.54      0.54      1574
weighted avg       0.58      0.60      0.58      1574



In [440]:
confusion_matrix(y_test, y_pred)

array([[621,  85,  49],
       [232, 177, 104],
       [ 96,  67, 143]], dtype=int64)

To try to improve the results we could preprocess the texts a little more removing some of the special characters that are still present in it.

### 2 - TF-IDF approach

In [448]:
vectorizer_tdidf = TfidfVectorizer(lowercase=True, stop_words=stopword_spa, tokenizer=t_tknzr.tokenize, max_features=1000)

In [449]:
vectors_tfidf = vectorizer_tdidf.fit_transform(df_airliners['clean_tweets'])

In [450]:
vectors_tfidf.shape

(7867, 1000)

In [452]:
# Create 'Tf-idf' df and insert text and sentiment from original df
df_tfidf_airliners = pd.DataFrame
df_tfidf_airliners = pd.DataFrame(data=vectors_tfidf.toarray(), columns=sorted(vectorizer_tdidf.vocabulary_.keys()))
df_tfidf_airliners.insert(loc=0, column='text', value=df_airliners['text'])
df_tfidf_airliners.insert(loc=1, column='sentiment', value=df_airliners['airline_sentiment'])

In [459]:
df_tfidf_airliners.head(1)

Unnamed: 0,text,sentiment,!,"""",$,%,',(,),):,*,+,",",-,.,..,...,/,:,:(,:),;,=,>,?,[,],abre,abrir,acaba,acabo,acceder,accidente,acuerdo,además,aerea,aerolinea,aerolineas,aerolínea,aerolíneas,aeropuerto,aeropuertos,agosto,ahora,ahí,air,airbus,aire,aires,aires-madrid,airlines,airways,ala,alemania,alguien,alguna,algún,alicante,allá,allí,amazon,amenaza,american,amigos,aniversario,aniversarioiberia,anuncia,aparece,app,aqui,aquí,argentina,asiento,asientos,asturias,así,atencion,atención,aterrizaje,aterrizar,aumentará,aunque,austriaca,aviación,avianca,avion,aviones,avios,avión,ayer,ayuda,ayudar,ayudarme,azafata,azafatas,aérea,aéreas,aéreo,año,años,aún,b,ba,bajar,bajo,bajó,barajas,barato,baratos,barcelona,bastante,bcn,bebé,before,bici,bicicleta,bien,bilbao,billete,billetes,black,blackfriday,bodega,bogotá,bordo,british,bruselas,buen,buena,buenas,bueno,buenos,busca,buscar,business,c,cabeza,cabina,cada,call,cambiar,cambio,campaña,cancela,cancelaciones,cancelación,cancelado,cancelan,cancelar,cansado,cara,cargo,caro,casa,casi,caso,cede,celebra,celebrarlo,center,centro,check,check-in,checkin,chile,cierra,cierto,cinco,ciudad,claro,clase,click,cliente,clientes,co,cobra,cobran,cobrar,coger,cola,colombia,comercial,comida,compartam,compartir,compañia,compañía,compañías,compra,comprado,comprar,compras,compre,compro,compré,condiciones,conexiones,conexión,conforma,conseguir,consulta,contacto,contesta,contestar,contigo,convertirse,convoca,convocan,correcto,correo,coruña,cosa,cosas,cost,costaría,costo,creo,cualquier,cuanto,cuatro,cuenta,cuento,cuesta,cuidado,culpa,cumple,cuál,cuándo,cuánto,código,cómo,d,da,dado,dais,dan,dar,darme,datos,debe,debería,debo,decir,deja,dejado,dejan,dejar,dejaron,demora,dentro,denuncian,derechos,desastre,descenso,desconvocan,descubre,descuento,descuentos,deseadme,desembarcar,despues,después,destino,destinos,devolucion,devolver,di,dia,diario,dias,dice,dicen,dicho,diciembre,diciendo,diferencia,diferentes,digan,digo,dijeron,dinero,dio,dios,dirección,directa,directo,director,directos,disculpas,dm,domingo,dos,doy,duda,duelodemarcas,décadas,día,días,dólares,dónde,easyjet,eeuu,eh,ejemplo,elegir,elige,email,embarcar,embarque,emergencia,empleo,empresa,empresas,encanta,encima,encontrar,encuentro,enero,enhorabuena,entiendo,entonces,entrar,entrará,enviado,envié,equipaje,equipajes,equipo,error,escala,escribe,escrito,espacio,españa,español,españoles,especial,espera,esperando,esperar,espero,estan,europa,europeos,euros,evitar,excelente,existe,experiencia,explicación,express,ezeiza,f,factura,facturación,facturar,falta,familia,favor,favorito,fecha,fechas,felices,felicidades,felicitaciones,feliz,fiestas,fin,final,flying,forma,formulario,foto,fraude,friday,funciona,fácil,gana,ganas,general,genial,gente,gestión,gracias,gran,grande,grandes,gratis,grupo,gusta,gustaría,gusto,h,habeis,haber,hablando,hablar,hace,hacen,hacer,hacerlo,hacia,haciendo,hacía,hago,harto,hecho,hice,hijo,hijos,historia,hizo,hola,holaargentina,holacolombia,holaeuropa,hombre,hora,horario,horas,horrible,hotel,hoy,huelga,huelgas,i,iag,ib,iba,ibe,iberia,ida,idea,ido,igual,importa,imposible,in,inaugura,incidencia,incl,includ,increíble,indemnización,información,informar,instantaneamente,intentado,intentando,intento,internacional,ir,irlanda,irlandeses,it,italia,jaja,jajaja,jajajaja,jamás,japón,jodamoslos,juan,jueves,junto,juntos,justo,k,karla,l,lado,lamentable,lanza,lanzan,larga,largo,latam,letal,level,liderazgo,lindo,lista,llama,llamada,llamar,llamo,llega,llegada,llegado,llegan,llegando,llegar,llegará,llego,llegue,llegó,lleno,lleva,llevamos,llevar,llevo,londres,low,luego,lufthansa,lugar,línea,m,ma,mad,madre,madrid,mail,mal,mala,maleta,maletas,mallorca,manera,mano,marca,marketing,marzo,mas,matriz,mayor,mañana,md,media,medicamentos,medio,mejor,mejores,menos,mensaje,mensajes,menudo,menú,mercadona,mes,meses,miedo,mientras,mierda,mil,millones,milán,min,minutos,mira,mirando,misma,mismo,mitad,momento,mostrador,motivo,mucha,muchas,mundo,málaga,méxico,móvil,n,nadie,navidad,navidades,necesita,necesito,niki,ninguna,ningún,niños,noche,noches,nombre,normal,norwegian,note,noteabordo,noticia,noticias,noviembre,nueva,nuevas,nueve,nuevo,nuevos,numero,nunca,número,octubre,oferta,ofertas,oficina,oficinas,ofrece,ojalá,ok,online,opción,operado,origen,overbooking,oye,p,pa,paga,pagado,pagar,pagas,pagina,pago,palma,par,parece,paros,parte,partir,parís,pasa,pasado,pasaje,pasajero,pasajeros,pasajes,pasan,pasar,paso,pasó,país,países,pedir,pena,peor,perder,perdida,perdido,perdieron,perdió,perfecto,permite,persona,personal,personas,pesar,pesetas,pide,pie,pierde,piloto,pilotos,pista,plata,plaza,plazas,plus,po,podemos,poder,podría,podrían,podéis,política,pone,poner,porqué,portugal,posible,prat,precio,precios,pregunta,pregunto,premium,primer,primera,primero,prioridad,privado,problema,problemas,procedente,programa,promoción,pronto,propia,próximo,publicidad,pueda,puede,pueden,puedes,puedo,puente,puerta,puerto,pues,puesto,puntos,puntuales,puse,puta,puto,página,pésima,pésimo,q,qu,queda,queja,queremos,quería,quiebra,quiere,quieren,quieres,quiero,quinto,quién,r,razón,realizar,rebajas,recibido,recibí,reclamaciones,reclamación,reclamar,reclamo,reclutará,reconoce,reconocerá,regala,regalando,regalo,regreso,reina,renfe,reserva,reservar,reservas,residente,respecto,responden,responder,respuesta,resulta,retrasa,retrasado,retraso,retrasos,retuiteemos,rey,rico,roma,ropa,rt,ruta,rutas,ryanair,s,sabe,saben,saber,sabes,sabéis,sabía,sacar,sala,sale,salen,salida,salir,salió,saludo,saludos,samsung,san,santiago,santo,sección,seguimos,seguir,segunda,segundo,seguridad,seguro,según,semana,semanales,semanas,sentirte,separados,ser,seria,serio,servicio,sevilla,señora,señores,shanghai,share,si,sido,siempre,siendo,siento,sigo,sigue,siguen,siguiente,sindical,sindicato,sindicatos,sino,siquiera,sirve,sistema,sitio,sola,solo,solucionar,solución,sorpresa,spanair,subir,suerte,supuesto,sábado,sé,sólo,t,tal,tampoco,tan,tarda,tarde,tardes,tarifa,tarjeta,tarjetas,tax,tecnología,telefono,telefónica,teléfono,tema,tener,tenerife,tiempo,tierra,tipo,to,toca,toda,todas,todavía,tokyo,trabajadores,trabajar,trabajo,tras,trata,trato,travelthursday,través,trayecto,tren,tres,tripulación,tripulantes,turismo,turista,turquía,twitter,técnicos,uds,unas,une,unió,usar,usted,ustedes,usuario,v,va,vacaciones,vale,valencia,valija,valijas,vamos,van,varias,varios,vas,vaya,ve,veces,velázquez,venden,venezuela,venta,ventas,veo,ver,verano,verdad,vergonzoso,verguenza,vergüenza,vez,vi,via,viaja,viajado,viajamos,viajando,viajar,viajas,viaje,viajeros,viajes,viajo,vida,video,viene,viernes,vigo,vigor,vip,visitar,visto,volado,volamos,volando,volar,volver,volé,voy,vs,vuela,vueling,vuelo,vuelos,vuelta,vuelve,vuelvo,vía,vídeo,web,x,xd,xq,york,|,¡,¿,última,último,única,único,‍,–,‘,’,“,”,…,€,♀,✈,❤,️,🇪,🇸,🎉,🏻,🏼,🏽,👍,👎,👏,😀,😁,😂,😉,😊,😍,😘,😡,😢,😭,😱,🙄,🙈,🙏,🛫,🤔,🤣,🤦
0,Trabajar en #Ryanair como #TMA: https://t.co/ruUArBe1tO #empleo,neutral,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.351906,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.606896,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.24531,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.669075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [454]:
X_train_idf, X_test_idf, y_train_idf, y_test_idf = train_test_split(vectors, df_tfidf_airliners['sentiment'], test_size=0.2, random_state=11)

In [455]:
classifier_MultiNB.fit(X_train_idf, y_train_idf)

MultinomialNB()

In [461]:
classifier_MultiNB.score(X_test_idf, y_test_idf)

0.5978398983481575

In [462]:
y_pred_idf = classifier_MultiNB.predict(X_test_idf)

In [464]:
print(classification_report(y_test_idf, y_pred_idf))

              precision    recall  f1-score   support

    negative       0.65      0.82      0.73       755
     neutral       0.54      0.35      0.42       513
    positive       0.48      0.47      0.48       306

    accuracy                           0.60      1574
   macro avg       0.56      0.54      0.54      1574
weighted avg       0.58      0.60      0.58      1574



In [463]:
confusion_matrix(y_test_idf, y_pred_idf)

array([[621,  85,  49],
       [232, 177, 104],
       [ 96,  67, 143]], dtype=int64)

Curiously we got exacly the same result as in the Bag Of Words approach. 