In [1]:
from nltk.corpus import sentiwordnet as swn
import spacy
nlp = spacy.load("en_core_web_md")

In [2]:
import statistics

viral_named_ents = {
    'PERSON',
    'NORP',
    'ORG',
    'GPE',
    'PRODUCT',
    'EVENT',
    'WORK_OF_ART',
    'LAW',
    'MONEY'}

def has_viral_named_ents(doc):
    for tok in doc:
        if tok.ent_type_ in viral_named_ents:
            return True
    return False

def has_comp_sup(doc):
    for tok in doc:
        if tok.tag_ in ['JJR', 'JJS', 'RBR', 'RBS']:
            return True
        if tok.lemma_ in ['more', 'most'] and tok.dep_ == 'advmod':
            return True
    return False

pos_tags_mapping = {'NOUN': 'n', 'VERB': 'v', 'ADJ': 'a', 'ADV': 'r'}
def has_emotional_words(doc):
    for tok in doc:
        swn_tag = pos_tags_mapping.get(tok.pos_)
        if swn_tag is not None:
            synsets = list(swn.senti_synsets(tok.lemma_, swn_tag))
            pos_scores = [ss.pos_score() for ss in synsets[:5]]
            neg_scores = [ss.neg_score() for ss in synsets[:5]]
            if pos_scores:
                if statistics.mean(pos_scores) >= 0.5 or statistics.mean(neg_scores) >= 0.5:
                    return True
    return False

In [3]:
has_viral_named_ents(nlp('Apple buys a new startup!'))

True

In [4]:
has_comp_sup(nlp('Ten most adorable cats in the world!'))

True

In [5]:
has_emotional_words(nlp('Five hated rockstars'))

True

In [6]:
has_emotional_words(nlp('Here are some ordinary cats for you.'))

False

In [7]:
with open("../../../tasks/02-structural-linguistics/data/examiner-headlines.txt") as f:
    examiner = f.readlines()

examiner[:5]

['Halep enters Rogers Cup final in straight sets win over Errani\n',
 "The phantoms of St. Mary's\n",
 "Talladega turmoil could spell trouble for NASCAR's Chase field\n",
 'Burn those calories! Try the Very Steep Trail.\n',
 "It's the end of the world... and I feel fine\n"]

In [8]:
def calc_percentage():
    viral_nes = 0
    comp_sup = 0
    emotional = 0
    for line in examiner:
        doc = nlp(line)
        if has_viral_named_ents(doc):
            viral_nes += 1
        if has_comp_sup(doc):
            comp_sup += 1
        if has_emotional_words(doc):
            emotional += 1
    total = len(examiner)
    return (viral_nes / total, comp_sup / total, emotional / total)

In [9]:
named_ents, comp_sup, emotional = calc_percentage()

In [10]:
print("Named entities: " + str(named_ents))
print("Comparative or superlative adjectives/adverbs: " + str(comp_sup))
print("Emotional words: " + str(emotional))

Named entities: 0.6914
Comparative or superlative adjectives/adverbs: 0.047
Emotional words: 0.1058
