### 2.2. Вірусні новини

In [1]:
import spacy
nlp = spacy.load('en_core_web_lg')

In [2]:
# loads a simplified version of the original SentiWordNet_3.0
import re
import collections

PosDict = collections.Counter()
NegDict = collections.Counter()

with open('SentiWordNet_3_Simplified_.txt') as fo:
    lines = fo.readlines()
    
for line in lines:
    fields = [field.strip() for field in re.split(r'\t+', line)]
    try: POS_word, pos_score, neg_score = fields
    except: raise ValueError(f'Line formatted incorrectly: {line}\n')
    pos_score = float(pos_score)
    neg_score = float(neg_score)
    if pos_score: PosDict[POS_word] = pos_score
    if neg_score: NegDict[POS_word] = neg_score

#### Іменовані сутності

In [3]:
def contains_NE(doc) -> bool:
    "checks the presence of named entities in doc"
    
    has_NE = False
    for ent in doc.ents:
        # GPE are excluded as they appear in almost every title
        if ent.label_ in ['MONEY','NORP','PERSON','PRODUCT','WORK_OF_ART','LAW']:
            has_NE = True
            break
    
    return has_NE

#### Емоційне забарвлення

In [4]:
def sentiment(doc) -> (float, float):
    "returns positive and negative scores for doc"
    
    pos_score: int = 0
    neg_score: int = 0

    for token in doc:
        if token.pos_ in ['ADJ', 'ADV', 'NOUN', 'VERB']:
            POS_word = token.pos_+'_'+token.text.lower()
            pos_score += PosDict[POS_word]
            neg_score += NegDict[POS_word]
    
    return pos_score, neg_score

#### Ступені порівняння

In [5]:
def contains_superlative(doc) -> bool:
    "checks for comparative or superlative adjectives"
    
    has_superlative = False
    for token in doc:
        if token.tag_ in ['JJR', 'JJS']:
            has_superlative = True
            break
            
    return has_superlative

#### Розрахунок

In [6]:
with open('examiner-headlines.txt') as fo:
    titles = fo.readlines()

NE_count: int = 0
superlative_count: int = 0
emotional_count: int = 0
viral_count: int = 0

most_positive_titles = []
most_negative_titles = []
most_viral_titles = []

for title in titles:
    doc = nlp(title)
    
    viral_score: int = 0
    
    # 1. named entities
    if contains_NE(doc):
        NE_count += 1
        viral_score += 1
    
    # 2. sentiment
    pos_score, neg_score = sentiment(doc)
    if pos_score > 1.2 or neg_score > 1.2:
        emotional_count += 1
    if pos_score > 2: most_positive_titles.append(title)
    if neg_score >=1.8 and neg_score < 2: most_negative_titles.append(title)
    viral_score += pos_score + neg_score
    
    # 3. comparative or superlative
    if contains_superlative(doc):
        superlative_count += 1
        viral_score += 0.5

    # 4. is it viral?
    if viral_score > 2:
        viral_count += 1
    if viral_score > 3.2:
        most_viral_titles.append(title)

#### Статистика

In [7]:
print(f'Viral titles: {viral_count} ({viral_count/len(titles)*100:.1f}%)')
print(f'Titles containing:')
print(f'- potentially viral NEs: {NE_count} ({NE_count/len(titles)*100:.1f}%)')
print(f'- highly emotional words: {emotional_count} ({emotional_count/len(titles)*100}%)')
print(f'- comparative or superlative adjectives: {superlative_count} ({superlative_count/len(titles)*100}%)')

Viral titles: 399 (8.0%)
Titles containing:
- potentially viral NEs: 1939 (38.8%)
- highly emotional words: 200 (4.0%)
- comparative or superlative adjectives: 195 (3.9%)


#### ⚡️ Most viral titles:

In [8]:
for title in most_viral_titles:
    print(f'∙ {title.strip()}')

∙ Tweety: Adorable hound mix pup is out of time at high-kill upstate shelter
∙ Not just for breakfast! Fun ways to enjoy Barbara's gluten-free cereals
∙ Best exercises for sexy, shapely, and lean legs
∙ It's time for HBO to force Floyd Jr's hand: Either press him to fight or kick him to the curb (pg 2)
∙ Historical indie Christian film 'Alone Yet Not Alone' in select theaters today
∙ Jennifer Lopez and Pitbull attract lowest common denominator in 'Dance Again'
∙ Democrats cannot seem to catch a break in good news for November
∙ Black Ferguson men protect white business owner's store with AR-15s: Loyalty
∙ von Kármán Lecture - CATASTROPHE and earth's evolution: when bad things happen to good planets
∙ Hams: Shy but sweet Siamese beauty is running out of time at high-kill shelter
∙ 10 ways to get 'lucky' in love on St. Paddys after infidelity
∙ Approach life with mildness, gentleness of spirit, and humility
∙ Commitment to health with natural beauty products; Sweet potato eye mask
∙ Glen

#### 🙂 Most positive titles:

In [9]:
for title in most_positive_titles:
    print(f'+ {title.strip()}')

+ Senior care workshops: Center for Elders' Independence offers free senior health series
+ Best exercises for sexy, shapely, and lean legs
+ The Beach Huts are a great way to enjoy our beautiful LI beaches and dine on quality local seafood
+ Beautiful hair, Beautiful skin, Beautiful brains!
+ Nothing Til Blood signs with Strike First Records, releases first music video
+ Hams: Shy but sweet Siamese beauty is running out of time at high-kill shelter
+ Approach life with mildness, gentleness of spirit, and humility
+ Country Fried Rock gives you good music for a great cause
+ Commitment to health with natural beauty products; Sweet potato eye mask
+ The truth, the whole truth and nothing but the truth
+ Teen Mom: Final season 'For the Best' recap


#### ☹️ Most negative titles:

In [10]:
for title in most_negative_titles:
    print(f'- {title.strip()}')

- The loss of a dog's companion can cause mourning and depression
- Illegal alien cop killer has extensive rap sheet, reports watchdog group
- Gopher: Beautiful male pit, heartworm negative, sits on death row at GCACS
- Historical indie Christian film 'Alone Yet Not Alone' in select theaters today
- Dangerous Medicine: social media, bad advice and natural medicines
- Hero dog saves 10-year-old dog buried alive during walk with owner
- Philly dog owners: Protect your pups from parvo
- Be aware of a plant that is most dangerous to pets like your sweet Chihuahua dog
