# Sentiment Analysis (Model evaluation)
## On Covid related articles

#### Load Dataset 

In [1]:
import pandas as pd
airline_raw = pd.read_csv("./flightglobal_cleaned.csv") 

In [2]:
df = pd.DataFrame(airline_raw)
df = df.dropna()
df = df[df['Topic'].str.contains("Coronavirus")]

airline_Title = df['Title'].fillna("")
airline_Article = df['Article'].fillna("")

In [3]:
df = df.reset_index(drop=True)
df

Unnamed: 0.1,Unnamed: 0,PageLink,Title,Article,Date,Topic
0,1,https://www.flightglobal.com/fleets/tap-to-cut...,TAP to cut more aircraft as losses mount,TAP Air Portugal has disclosed plans to furthe...,30-Jun-20,"Coronavirus, Europe, Fleets, Networks"
1,11,https://www.flightglobal.com/fleets/lufthansas...,Lufthansa's reactivated fleet to reach 380 air...,Lufthansa Group plans to have reactivated half...,30-Jun-20,"Coronavirus, Europe, Fleets, Networks"
2,12,https://www.flightglobal.com/airlines/sas-deta...,SAS details extensive make-or-break recapitali...,Scandinavian operator SAS has detailed a recap...,30-Jun-20,"Air Transport, Coronavirus, Europe, SAS"
3,16,https://www.flightglobal.com/fleets/norwegian-...,Norwegian cancels 97 Boeing aircraft orders,Norwegian has cancelled orders for 97 Boeing a...,30-Jun-20,"Airframers, Airlines, Coronavirus, Europe, Fle..."
4,22,https://www.flightglobal.com/networks/porter-a...,Porter Airlines delays restart to 31 August,Canada's Porter Airlines will restart its oper...,30-Jun-20,"Airlines, Coronavirus, Networks, North America..."
...,...,...,...,...,...,...
703,2078,https://www.flightglobal.com/networks/british-...,British Airways suspends mainland Chinese serv...,British Airways is temporarily suspending all ...,29-Jan-20,"Air Transport, Airlines, Asia Pacific, Coronav..."
704,2082,https://www.flightglobal.com/networks/united-s...,United suspends some flights to China,"Citing a significant drop in load factors, Uni...",29-Jan-20,"Airlines, Asia Pacific, Coronavirus, Networks"
705,2086,https://www.flightglobal.com/airlines/hong-kon...,Hong Kong carriers to halve capacity into China,Hong Kong's four main carriers will halve thei...,29-Jan-20,"Cathay Dragon, Cathay Pacific, China, Coronavi..."
706,2105,https://www.flightglobal.com/air-transport/eas...,EASA issues safety bulletin on Wuhan coronavir...,As the spread of the novel coronavirus (2019-n...,28-Jan-20,"China, Coronavirus, Europe"


### 1) Using Lexicon by Hu and Liu

###### Tokenization and Lowercasing

In [4]:
from nltk.tokenize import word_tokenize

df['Month'] = pd.DatetimeIndex(df['Date']).month
df2 = df[['PageLink','Title','Article','Month','Topic']]

#tokenize
df2['sent'] = df['Article'].apply(word_tokenize)  


#lowercase
def lower(row):
    lower = [w.lower() for w in row['sent']]
    return lower

df2['sent'] = df2.apply(lower,axis=1)

df2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Unnamed: 0,PageLink,Title,Article,Month,Topic,sent
0,https://www.flightglobal.com/fleets/tap-to-cut...,TAP to cut more aircraft as losses mount,TAP Air Portugal has disclosed plans to furthe...,6,"Coronavirus, Europe, Fleets, Networks","[tap, air, portugal, has, disclosed, plans, to..."
1,https://www.flightglobal.com/fleets/lufthansas...,Lufthansa's reactivated fleet to reach 380 air...,Lufthansa Group plans to have reactivated half...,6,"Coronavirus, Europe, Fleets, Networks","[lufthansa, group, plans, to, have, reactivate..."
2,https://www.flightglobal.com/airlines/sas-deta...,SAS details extensive make-or-break recapitali...,Scandinavian operator SAS has detailed a recap...,6,"Air Transport, Coronavirus, Europe, SAS","[scandinavian, operator, sas, has, detailed, a..."
3,https://www.flightglobal.com/fleets/norwegian-...,Norwegian cancels 97 Boeing aircraft orders,Norwegian has cancelled orders for 97 Boeing a...,6,"Airframers, Airlines, Coronavirus, Europe, Fle...","[norwegian, has, cancelled, orders, for, 97, b..."
4,https://www.flightglobal.com/networks/porter-a...,Porter Airlines delays restart to 31 August,Canada's Porter Airlines will restart its oper...,6,"Airlines, Coronavirus, Networks, North America...","[canada, 's, porter, airlines, will, restart, ..."


##### Lemmatization

In [5]:
import nltk
from nltk.corpus import wordnet
from nltk.stem.wordnet import WordNetLemmatizer

#POS Tagging

#Mapping of NLTK’s POS tags to the format wordnet lemmatizer would accept. 
#The get_wordnet_pos() function defined below does this mapping job.

def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
#     print(tag)
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}
#     print(tag_dict)
    return tag_dict.get(tag, wordnet.NOUN)


#Lemmatization with WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

def lemma(row):
    lem = ([lemmatizer.lemmatize(w, get_wordnet_pos(w)) for w in row['sent']])
    return lem


df2['lem'] = df2.apply(lemma,axis =1)
df2.head()


Unnamed: 0,PageLink,Title,Article,Month,Topic,sent,lem
0,https://www.flightglobal.com/fleets/tap-to-cut...,TAP to cut more aircraft as losses mount,TAP Air Portugal has disclosed plans to furthe...,6,"Coronavirus, Europe, Fleets, Networks","[tap, air, portugal, has, disclosed, plans, to...","[tap, air, portugal, have, disclose, plan, to,..."
1,https://www.flightglobal.com/fleets/lufthansas...,Lufthansa's reactivated fleet to reach 380 air...,Lufthansa Group plans to have reactivated half...,6,"Coronavirus, Europe, Fleets, Networks","[lufthansa, group, plans, to, have, reactivate...","[lufthansa, group, plan, to, have, reactivate,..."
2,https://www.flightglobal.com/airlines/sas-deta...,SAS details extensive make-or-break recapitali...,Scandinavian operator SAS has detailed a recap...,6,"Air Transport, Coronavirus, Europe, SAS","[scandinavian, operator, sas, has, detailed, a...","[scandinavian, operator, sa, have, detailed, a..."
3,https://www.flightglobal.com/fleets/norwegian-...,Norwegian cancels 97 Boeing aircraft orders,Norwegian has cancelled orders for 97 Boeing a...,6,"Airframers, Airlines, Coronavirus, Europe, Fle...","[norwegian, has, cancelled, orders, for, 97, b...","[norwegian, have, cancel, order, for, 97, boei..."
4,https://www.flightglobal.com/networks/porter-a...,Porter Airlines delays restart to 31 August,Canada's Porter Airlines will restart its oper...,6,"Airlines, Coronavirus, Networks, North America...","[canada, 's, porter, airlines, will, restart, ...","[canada, 's, porter, airline, will, restart, i..."


##### Load Positive and Negative Lexicons

In [6]:
pos_lexicon = './positive-words.txt'
neg_lexicon = './negative-words.txt' 


# Read the positive sentiment lexicon.
pos_dict = {}
f = open(pos_lexicon, 'r', encoding = "ISO-8859-1")
for line in f:
    line = line.strip()
    pos_dict[line] = 1
f.close()

# Read the negative sentiment lexicon.
neg_dict = {}
f = open(neg_lexicon, 'r', encoding = "ISO-8859-1")
for line in f:
    line = line.strip()
    neg_dict[line] = 1
f.close()

##### Scoring and Sentiment Labeling

In [7]:
def f(row):
    score = 0
    for word in row['lem']:
    
# If the word w is inside the positive lexicon, then increase the score by 1.
        if word in pos_dict:
            score = score + 1

# If the word w is inside the negative lexicon, then decrease the score by 1.

        elif word in neg_dict:
            score = score - 1

    return score


def f1(row):
    if row['score'] > 0:
        sentiment = 'positive'
        
    elif row['score'] <0:
        sentiment = 'negative'
        
    else:
        sentiment = 'neutral'
    
    
    return sentiment

In [8]:
df2['score'] = df2.apply(f, axis=1)
df2['sentiment_lexicon'] = df2.apply(f1, axis=1)

df2

Unnamed: 0,PageLink,Title,Article,Month,Topic,sent,lem,score,sentiment_lexicon
0,https://www.flightglobal.com/fleets/tap-to-cut...,TAP to cut more aircraft as losses mount,TAP Air Portugal has disclosed plans to furthe...,6,"Coronavirus, Europe, Fleets, Networks","[tap, air, portugal, has, disclosed, plans, to...","[tap, air, portugal, have, disclose, plan, to,...",4,positive
1,https://www.flightglobal.com/fleets/lufthansas...,Lufthansa's reactivated fleet to reach 380 air...,Lufthansa Group plans to have reactivated half...,6,"Coronavirus, Europe, Fleets, Networks","[lufthansa, group, plans, to, have, reactivate...","[lufthansa, group, plan, to, have, reactivate,...",0,neutral
2,https://www.flightglobal.com/airlines/sas-deta...,SAS details extensive make-or-break recapitali...,Scandinavian operator SAS has detailed a recap...,6,"Air Transport, Coronavirus, Europe, SAS","[scandinavian, operator, sas, has, detailed, a...","[scandinavian, operator, sa, have, detailed, a...",1,positive
3,https://www.flightglobal.com/fleets/norwegian-...,Norwegian cancels 97 Boeing aircraft orders,Norwegian has cancelled orders for 97 Boeing a...,6,"Airframers, Airlines, Coronavirus, Europe, Fle...","[norwegian, has, cancelled, orders, for, 97, b...","[norwegian, have, cancel, order, for, 97, boei...",-3,negative
4,https://www.flightglobal.com/networks/porter-a...,Porter Airlines delays restart to 31 August,Canada's Porter Airlines will restart its oper...,6,"Airlines, Coronavirus, Networks, North America...","[canada, 's, porter, airlines, will, restart, ...","[canada, 's, porter, airline, will, restart, i...",-4,negative
...,...,...,...,...,...,...,...,...,...
703,https://www.flightglobal.com/networks/british-...,British Airways suspends mainland Chinese serv...,British Airways is temporarily suspending all ...,1,"Air Transport, Airlines, Asia Pacific, Coronav...","[british, airways, is, temporarily, suspending...","[british, airway, be, temporarily, suspend, al...",-3,negative
704,https://www.flightglobal.com/networks/united-s...,United suspends some flights to China,"Citing a significant drop in load factors, Uni...",1,"Airlines, Asia Pacific, Coronavirus, Networks","[citing, a, significant, drop, in, load, facto...","[cite, a, significant, drop, in, load, factor,...",8,positive
705,https://www.flightglobal.com/airlines/hong-kon...,Hong Kong carriers to halve capacity into China,Hong Kong's four main carriers will halve thei...,1,"Cathay Dragon, Cathay Pacific, China, Coronavi...","[hong, kong, 's, four, main, carriers, will, h...","[hong, kong, 's, four, main, carrier, will, ha...",-10,negative
706,https://www.flightglobal.com/air-transport/eas...,EASA issues safety bulletin on Wuhan coronavir...,As the spread of the novel coronavirus (2019-n...,1,"China, Coronavirus, Europe","[as, the, spread, of, the, novel, coronavirus,...","[a, the, spread, of, the, novel, coronavirus, ...",1,positive


##### Breakdown of Results

In [9]:
#Find proportion of sentiments
df2['sentiment_lexicon'].value_counts(normalize = True)

negative    0.468927
positive    0.446328
neutral     0.084746
Name: sentiment_lexicon, dtype: float64

In [10]:
#See sentiments by month
pd.crosstab(df2.Month, df2.sentiment_lexicon,  margins=True)

sentiment_lexicon,negative,neutral,positive,All
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,7,0,3,10
2,23,2,3,28
3,138,22,110,270
4,92,18,109,219
5,36,15,43,94
6,36,3,48,87
All,332,60,316,708


### 2) Using SentiWordNet

In [11]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk import sent_tokenize, word_tokenize, pos_tag
import nltk
# nltk.download('sentiwordnet')
lemmatizer = WordNetLemmatizer()

##### Functions for SentiWordNet

In [12]:
def penn_to_wn(tag):
#Convert between the PennTreebank tags to simple Wordnet tags

    if tag.startswith('J'):
        return wn.ADJ
    elif tag.startswith('N'):
        return wn.NOUN
    elif tag.startswith('R'):
        return wn.ADV
    elif tag.startswith('V'):
        return wn.VERB
    return None


#Score first synset of each word
def sentiment_sentiwordnet(row):
    sentiment = 0
    tokens_count = 0

    tagged_sentence = pos_tag(row['sent'])

    for word, tag in tagged_sentence:
        wn_tag = penn_to_wn(tag)
        if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV,wn.VERB):
            continue

        lemma = lemmatizer.lemmatize(word, pos=wn_tag)
        if not lemma:
            continue

        synsets = wn.synsets(lemma, pos=wn_tag)
        if not synsets:
            continue

        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())
        word_sent = swn_synset.pos_score() - swn_synset.neg_score()

        if word_sent != 0:
            sentiment += word_sent
            tokens_count += 1

    if tokens_count == 0:
        return 0
    sentiment = sentiment/tokens_count
    return sentiment



#Sentiment label
def f1(row):
    if row['sentiment score'] > 0.01:
        sentiment = 'positive'
        
    elif row['sentiment score'] <-0.01:
        sentiment = 'negative'
        
    else:
        sentiment = 'neutral'
    
    
    return sentiment

##### Tokenization and Lowercasing

In [13]:
df1 = df[['Month','PageLink','Article']]

#tokenize
df1['sent'] = df1['Article'].apply(word_tokenize)  


#lowercase
def lower(row):
    lower = [w.lower() for w in row['sent']]
    return lower

df1['sent'] = df1.apply(lower,axis=1)

df1.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


Unnamed: 0,Month,PageLink,Article,sent
0,6,https://www.flightglobal.com/fleets/tap-to-cut...,TAP Air Portugal has disclosed plans to furthe...,"[tap, air, portugal, has, disclosed, plans, to..."
1,6,https://www.flightglobal.com/fleets/lufthansas...,Lufthansa Group plans to have reactivated half...,"[lufthansa, group, plans, to, have, reactivate..."
2,6,https://www.flightglobal.com/airlines/sas-deta...,Scandinavian operator SAS has detailed a recap...,"[scandinavian, operator, sas, has, detailed, a..."
3,6,https://www.flightglobal.com/fleets/norwegian-...,Norwegian has cancelled orders for 97 Boeing a...,"[norwegian, has, cancelled, orders, for, 97, b..."
4,6,https://www.flightglobal.com/networks/porter-a...,Canada's Porter Airlines will restart its oper...,"[canada, 's, porter, airlines, will, restart, ..."


##### Sentiment scoring

In [14]:
df1['sentiment score'] = df1.apply(sentiment_sentiwordnet,axis=1)
df1['sentiment_swn'] = df1.apply(f1,axis=1)

df1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Month,PageLink,Article,sent,sentiment score,sentiment_swn
0,6,https://www.flightglobal.com/fleets/tap-to-cut...,TAP Air Portugal has disclosed plans to furthe...,"[tap, air, portugal, has, disclosed, plans, to...",0.057927,positive
1,6,https://www.flightglobal.com/fleets/lufthansas...,Lufthansa Group plans to have reactivated half...,"[lufthansa, group, plans, to, have, reactivate...",0.077703,positive
2,6,https://www.flightglobal.com/airlines/sas-deta...,Scandinavian operator SAS has detailed a recap...,"[scandinavian, operator, sas, has, detailed, a...",0.058594,positive
3,6,https://www.flightglobal.com/fleets/norwegian-...,Norwegian has cancelled orders for 97 Boeing a...,"[norwegian, has, cancelled, orders, for, 97, b...",0.053241,positive
4,6,https://www.flightglobal.com/networks/porter-a...,Canada's Porter Airlines will restart its oper...,"[canada, 's, porter, airlines, will, restart, ...",0.115196,positive
...,...,...,...,...,...,...
703,1,https://www.flightglobal.com/networks/british-...,British Airways is temporarily suspending all ...,"[british, airways, is, temporarily, suspending...",-0.044643,negative
704,1,https://www.flightglobal.com/networks/united-s...,"Citing a significant drop in load factors, Uni...","[citing, a, significant, drop, in, load, facto...",-0.153846,negative
705,1,https://www.flightglobal.com/airlines/hong-kon...,Hong Kong's four main carriers will halve thei...,"[hong, kong, 's, four, main, carriers, will, h...",-0.032500,negative
706,1,https://www.flightglobal.com/air-transport/eas...,As the spread of the novel coronavirus (2019-n...,"[as, the, spread, of, the, novel, coronavirus,...",0.006579,neutral


##### Breakdown of Results

In [15]:
#Find proportion of sentiments
df1['sentiment_swn'].value_counts(normalize = True)

positive    0.690678
negative    0.206215
neutral     0.103107
Name: sentiment_swn, dtype: float64

In [16]:
#See sentiments by month
pd.crosstab(df1.Month, df1.sentiment_swn,  margins=True)

sentiment_swn,negative,neutral,positive,All
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5,3,2,10
2,6,5,17,28
3,67,27,176,270
4,41,21,157,219
5,12,5,77,94
6,15,12,60,87
All,146,73,489,708


### 3) Using Vader

###### Lowercasing and Sentiment scoring

In [17]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

sia = SIA()
resultsTitle = []
resultsArticle = []

for Article in airline_Article:
    Article = Article.lower()
    pol_score = sia.polarity_scores(Article)
    pol_score['airline_Article'] = Article
    resultsArticle.append(pol_score)

In [18]:
dfArticle = pd.DataFrame.from_records(resultsArticle)
dfArticle = dfArticle .rename({"neg": "Neg_Base_A", "neu": "Neu_Base_A", "pos": "Pos_Base_A", "compound": "Compound_Base_A"}, axis=1)
dfArticle

Unnamed: 0,Neg_Base_A,Neu_Base_A,Pos_Base_A,Compound_Base_A,airline_Article
0,0.044,0.905,0.051,0.5267,tap air portugal has disclosed plans to furthe...
1,0.016,0.936,0.048,0.8990,lufthansa group plans to have reactivated half...
2,0.040,0.858,0.102,0.9744,scandinavian operator sas has detailed a recap...
3,0.057,0.908,0.036,-0.8706,norwegian has cancelled orders for 97 boeing a...
4,0.044,0.891,0.065,0.5605,canada's porter airlines will restart its oper...
...,...,...,...,...,...
703,0.070,0.876,0.054,-0.4357,british airways is temporarily suspending all ...
704,0.025,0.912,0.063,0.8151,"citing a significant drop in load factors, uni..."
705,0.030,0.941,0.029,-0.2732,hong kong's four main carriers will halve thei...
706,0.042,0.876,0.082,0.9325,as the spread of the novel coronavirus (2019-n...


In [19]:
airline_raw2 = pd.concat([df, dfArticle], axis=1)
airline_raw2 = airline_raw2.drop(['airline_Article'], axis=1)
airline_raw2['Date'] = pd.to_datetime(airline_raw2['Date'])
airline_raw2['Month'] = airline_raw2['Date'].dt.month

airline_raw2['Senti_Base_A'] = 'Neutral'
airline_raw2.loc[airline_raw2['Compound_Base_A'] > 0, 'Senti_Base_A'] = 'Positive'
airline_raw2.loc[airline_raw2['Compound_Base_A'] < 0, 'Senti_Base_A'] = 'Negative'

In [20]:
airline_raw2

Unnamed: 0.1,Unnamed: 0,PageLink,Title,Article,Date,Topic,Month,Neg_Base_A,Neu_Base_A,Pos_Base_A,Compound_Base_A,Senti_Base_A
0,1,https://www.flightglobal.com/fleets/tap-to-cut...,TAP to cut more aircraft as losses mount,TAP Air Portugal has disclosed plans to furthe...,2020-06-30,"Coronavirus, Europe, Fleets, Networks",6,0.044,0.905,0.051,0.5267,Positive
1,11,https://www.flightglobal.com/fleets/lufthansas...,Lufthansa's reactivated fleet to reach 380 air...,Lufthansa Group plans to have reactivated half...,2020-06-30,"Coronavirus, Europe, Fleets, Networks",6,0.016,0.936,0.048,0.8990,Positive
2,12,https://www.flightglobal.com/airlines/sas-deta...,SAS details extensive make-or-break recapitali...,Scandinavian operator SAS has detailed a recap...,2020-06-30,"Air Transport, Coronavirus, Europe, SAS",6,0.040,0.858,0.102,0.9744,Positive
3,16,https://www.flightglobal.com/fleets/norwegian-...,Norwegian cancels 97 Boeing aircraft orders,Norwegian has cancelled orders for 97 Boeing a...,2020-06-30,"Airframers, Airlines, Coronavirus, Europe, Fle...",6,0.057,0.908,0.036,-0.8706,Negative
4,22,https://www.flightglobal.com/networks/porter-a...,Porter Airlines delays restart to 31 August,Canada's Porter Airlines will restart its oper...,2020-06-30,"Airlines, Coronavirus, Networks, North America...",6,0.044,0.891,0.065,0.5605,Positive
...,...,...,...,...,...,...,...,...,...,...,...,...
703,2078,https://www.flightglobal.com/networks/british-...,British Airways suspends mainland Chinese serv...,British Airways is temporarily suspending all ...,2020-01-29,"Air Transport, Airlines, Asia Pacific, Coronav...",1,0.070,0.876,0.054,-0.4357,Negative
704,2082,https://www.flightglobal.com/networks/united-s...,United suspends some flights to China,"Citing a significant drop in load factors, Uni...",2020-01-29,"Airlines, Asia Pacific, Coronavirus, Networks",1,0.025,0.912,0.063,0.8151,Positive
705,2086,https://www.flightglobal.com/airlines/hong-kon...,Hong Kong carriers to halve capacity into China,Hong Kong's four main carriers will halve thei...,2020-01-29,"Cathay Dragon, Cathay Pacific, China, Coronavi...",1,0.030,0.941,0.029,-0.2732,Negative
706,2105,https://www.flightglobal.com/air-transport/eas...,EASA issues safety bulletin on Wuhan coronavir...,As the spread of the novel coronavirus (2019-n...,2020-01-28,"China, Coronavirus, Europe",1,0.042,0.876,0.082,0.9325,Positive


##### Breakdown of Results

In [21]:
#Find proportion of sentiments
airline_raw2['Senti_Base_A'].value_counts(normalize = True)

Positive    0.552260
Negative    0.446328
Neutral     0.001412
Name: Senti_Base_A, dtype: float64

In [22]:
pd.crosstab(airline_raw2.Month, airline_raw2.Senti_Base_A)

Senti_Base_A,Negative,Neutral,Positive
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,5,0,5
2,20,0,8
3,136,0,134
4,94,1,124
5,32,0,62
6,29,0,58


In [23]:
pd.crosstab(airline_raw2.Month, airline_raw2.Senti_Base_A).apply(lambda r: round(r/r.sum(),2), axis=1)

Senti_Base_A,Negative,Neutral,Positive
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.5,0.0,0.5
2,0.71,0.0,0.29
3,0.5,0.0,0.5
4,0.43,0.0,0.57
5,0.34,0.0,0.66
6,0.33,0.0,0.67


### 4) Using TextBlob

##### Sentiment scoring

In [24]:
from textblob import TextBlob

resultsArticletb = []
pol_score_article = {}

for Article in airline_Article:
    Article = Article.lower()
    article = TextBlob(Article)
    pol_score_article['polarity'] = article.sentiment.polarity
    pol_score_article['subjectivity'] = article.sentiment.subjectivity
    pol_score_article['airline_Article'] = Article
    resultsArticletb.append(pol_score_article.copy())


In [25]:
dfArticletb = pd.DataFrame.from_records(resultsArticletb)
dfArticletb = dfArticletb.rename({"polarity": "Polarity_Base_A", "subjectivity": "Subjectivity_Base_A"}, axis=1)

In [26]:
airline_raw2_tb = pd.concat([df, dfArticletb], axis=1)
airline_raw2_tb = airline_raw2_tb.drop(['airline_Article'], axis=1)
airline_raw2_tb['Date'] = pd.to_datetime(airline_raw2_tb['Date'])
airline_raw2_tb['Month'] = airline_raw2_tb['Date'].dt.month

airline_raw2_tb['Senti_Base_A'] = 'Neutral'
airline_raw2_tb.loc[airline_raw2_tb['Polarity_Base_A'] > 0, 'Senti_Base_A'] = 'Positive'
airline_raw2_tb.loc[airline_raw2_tb['Polarity_Base_A'] < 0, 'Senti_Base_A'] = 'Negative'

##### Breakdown of Results

In [27]:
#Find proportion of sentiments
airline_raw2_tb['Senti_Base_A'].value_counts(normalize = True)

Positive    0.865819
Negative    0.128531
Neutral     0.005650
Name: Senti_Base_A, dtype: float64

In [28]:
pd.crosstab(airline_raw2_tb.Month, airline_raw2_tb.Senti_Base_A)

Senti_Base_A,Negative,Neutral,Positive
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,0,9
2,4,0,24
3,38,1,231
4,35,2,182
5,8,0,86
6,5,1,81


In [29]:
pd.crosstab(airline_raw2_tb.Month, airline_raw2_tb.Senti_Base_A).apply(lambda r: round(r/r.sum(),2), axis=1)

Senti_Base_A,Negative,Neutral,Positive
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.1,0.0,0.9
2,0.14,0.0,0.86
3,0.14,0.0,0.86
4,0.16,0.01,0.83
5,0.09,0.0,0.91
6,0.06,0.01,0.93
