In [13]:
import os
import requests
import re
from textblob import TextBlob
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from GoogleNews import GoogleNews


In [2]:
SUBJECTIVITY_API_KEY = os.environ.get('SUBJECTIVITY_API_KEY')
POLARITY_API_KEY = os.environ.get('POLARITY_API_KEY')
INTENSITY_API_KEY = os.environ.get('INTENSITY_API_KEY')


- Notes from prof
    - here is a sample use for English: sentic.net/api/en/KEY.py?text=senticnet+is+pretty+cool

    - input text does not require any special formatting so feel free to use spaces instead of '+' or '%20'

    - ampersand, hashtag, semicolons, and braces ('&', '#', ';', '{', '}'), however, are illegal characters

    - hence, they should be replaced with colons (':') or removed entirely in the preprocessing phase

    - please note that:

    - 1) API keys are case-sensitive

    - 2) API keys will be valid for about one month

    - 3) API keys are personal and confidential

    - do not share nor use them from different devices or IP addresses

    - or else they will get terminated earlier

    - the capacity limit for our server is 8000 characters 

    - so our recommendation is to cap your input at about 1000 words

    - if you need to process bigger texts, you will have to split them into smaller parts

    - this is also a good idea in case you want to perform a finer-grained analysis of your input

    - all APIs, in fact, are designed to give you an overall judgement about the whole input

    - for more details, split your text into paragraphs or sentences and feed them to the API one by one

In [12]:
news_txt = """
The latest black eye for Boeing's top-selling 737 MAX aircraft occurred Friday when a cabin panel blowout forced an Alaska Airlines (ALK.N) flight to make an emergency landing.
U.S. regulators ordered a temporary grounding of certain planes for safety checks. That's not as severe a move as the grounding of all MAX-family jets worldwide nearly five years ago following a pair of fatal crashes.

Here is a timeline of recent issues surrounding Boeing's (BA.N) MAX planes:
Advertisement · Scroll to continue
OCTOBER 2018: A Lion Air MAX plane crashes in Indonesia, killing all 189 people on board.

NOVEMBER 2018: The FAA and Boeing say they are evaluating the need for software or design changes to 737 MAX jets following the Lion Air crash.

MARCH 2019: An Ethiopian Airlines MAX crashes, killing all 157 people on board. China's aviation regulator becomes the first in the world to ground the MAX, followed by others including the U.S. Federal Aviation Administration.

Advertisement · Scroll to continue
APRIL 2019: The FAA forms an international team to review the safety of the 737 MAX. Boeing cuts monthly production by nearly 20%.

JULY 2019: Boeing posts its largest-ever quarterly loss.

SEPTEMBER 2019: Boeing's board of directors creates a permanent safety committee to oversee development, manufacturing and operation of its aircraft.

OCTOBER 2019: Boeing fires Kevin McAllister, the top executive of its commercial airplanes division.

Advertisement · Scroll to continue
DECEMBER 2020: The company fires CEO Dennis Muilenburg in the wake of the twin crashes.

JANUARY 2020: Boeing suspends 737 production, its biggest assembly-line halt in more than 20 years.

MAY 2020: Boeing resumes 737 MAX production at a "low rate."

JUNE 2020: Boeing begins a series of long-delayed flight tests of its redesigned 737 MAX with regulators at the controls.

SEPTEMBER 2020: An 18-month investigation by a U.S. House of Representatives panel finds Boeing failed in its design and development of the MAX as well as its transparency with the FAA, and that the FAA failed in oversight and certification.

NOVEMBER 2020: The U.S. FAA lifts the grounding order, allowing the 737 MAX to fly again.

DECEMBER 2020: Congress passes legislation to reform how the FAA certifies new airplanes, including requiring manufacturers to disclose certain safety-critical information to the FAA.

JANUARY 2021: The European Union Aviation Safety Agency approves the MAX's return to service in Europe.

MARCH 2021: China's aviation regulator says major safety concerns with the MAX needed to be "properly addressed" before conducting flight tests.

APRIL 2021: Boeing halts 737 MAX deliveries after electrical problems re-ground part of the fleet.
NOVEMBER 2021: Current and former Boeing company directors reach a $237.5 million settlement with shareholders to settle lawsuits over safety oversight of the 737 MAX.

OCTOBER 2022: The FAA tells Boeing that some key documents submitted as part of the certification review of the 737 MAX 7 are incomplete and others need a reassessment.

DECEMBER 2022: Congress agrees to extend a deadline for new standards for modern cockpit alerts stemming from the 2020 legislation after intense lobbying from Boeing.

APRIL 2023: Boeing pauses deliveries of some 737 MAXs to deal with a new supplier quality problem involving noncompliant fittings.
JULY 2023: Boeing's first delivery of the 737 MAX 7 is delayed to 2024.

AUGUST 2023: Boeing identifies a new 737 MAX supplier quality problem involving improperly drilled holes on the aft pressure bulkhead.

SEPTEMBER 2023: Boeing 737 MAX deliveries fall to their lowest levels since August 2021.
DECEMBER 2023: Boeing makes its first direct delivery of a 787 Dreamliner to China since 2019, seen as a precursor to China potentially unfreezing deliveries of the 737 MAX.
JANUARY 2024: An Alaskan Air flight is forced to conduct an emergency landing after a cabin panel blowout on a brand-new 737 MAX 9 plane. The U.S. FAA grounds certain 737 MAX 9 aircraft for safety checks.
"""

char_limit = 8000
# Remove invalid characters for API
news_txt2 = re.sub(r'[&#;{}]', '', news_txt)

# Remove advertisement-like texts
news_txt3 = re.sub(r'Advertisement · Scroll to continue', '', news_txt2)
display(news_txt3)

# paragraph.strip() does not include '' in the new list
# Removes leading and trailing white spaces
passages2 = [paragraph.strip() for paragraph in news_txt3.split('\n') if paragraph.strip()]

# Split based on 8000 characters
# passages = [news_txt3[i: i + char_limit] for i in range(0, len(news_txt3), char_limit)]

passages2

'\nThe latest black eye for Boeing\'s top-selling 737 MAX aircraft occurred Friday when a cabin panel blowout forced an Alaska Airlines (ALK.N) flight to make an emergency landing.\nU.S. regulators ordered a temporary grounding of certain planes for safety checks. That\'s not as severe a move as the grounding of all MAX-family jets worldwide nearly five years ago following a pair of fatal crashes.\n\nHere is a timeline of recent issues surrounding Boeing\'s (BA.N) MAX planes:\n\nOCTOBER 2018: A Lion Air MAX plane crashes in Indonesia, killing all 189 people on board.\n\nNOVEMBER 2018: The FAA and Boeing say they are evaluating the need for software or design changes to 737 MAX jets following the Lion Air crash.\n\nMARCH 2019: An Ethiopian Airlines MAX crashes, killing all 157 people on board. China\'s aviation regulator becomes the first in the world to ground the MAX, followed by others including the U.S. Federal Aviation Administration.\n\n\nAPRIL 2019: The FAA forms an international

["The latest black eye for Boeing's top-selling 737 MAX aircraft occurred Friday when a cabin panel blowout forced an Alaska Airlines (ALK.N) flight to make an emergency landing.",
 "U.S. regulators ordered a temporary grounding of certain planes for safety checks. That's not as severe a move as the grounding of all MAX-family jets worldwide nearly five years ago following a pair of fatal crashes.",
 "Here is a timeline of recent issues surrounding Boeing's (BA.N) MAX planes:",
 'OCTOBER 2018: A Lion Air MAX plane crashes in Indonesia, killing all 189 people on board.',
 'NOVEMBER 2018: The FAA and Boeing say they are evaluating the need for software or design changes to 737 MAX jets following the Lion Air crash.',
 "MARCH 2019: An Ethiopian Airlines MAX crashes, killing all 157 people on board. China's aviation regulator becomes the first in the world to ground the MAX, followed by others including the U.S. Federal Aviation Administration.",
 'APRIL 2019: The FAA forms an internationa

# Sentiment analysis
- SenticNet
- TextBlob
- nltk
- BERT

TODO:
- Check whether need to unify the results
- Not included subjectivity yet

In [4]:
# number of tweets pass through API

def analyse(text, LANG, KEY):
    APIURL = 'https://sentic.net/api/' + LANG + '/' + KEY + '.py?text='
    
    # label is the concept, polarity that is returned from API
    label = str(requests.get(APIURL + text).content)[2:-3]
    return label

res1 = analyse(passages2[3], 'en', POLARITY_API_KEY)
res2 = analyse(passages2[5], 'en', INTENSITY_API_KEY)
res3 = analyse(passages2[8], 'en', SUBJECTIVITY_API_KEY)

display(res1, res2, res3)



'POSITIVE'

'55'

'SUBJECTIVE'

In [5]:
sentence = 'OCTOBER 2018: A Lion Air MAX plane crashes in Indonesia, killing all 189 people on board.'

blob = TextBlob(sentence)
polarity = blob.sentiment.polarity
subjectivity = blob.sentiment.subjectivity

print(f"Polarity: {polarity}")
print(f"Subjectivity: {subjectivity}")

Polarity: 0.0
Subjectivity: 0.0


In [6]:


# nltk.download('vader_lexicon')  # Download the VADER lexicon if not already downloaded

sentence = 'OCTOBER 2018: A Lion Air MAX plane crashes in Indonesia, killing all 189 people on board.'

sid = SentimentIntensityAnalyzer()
sentiment_scores = sid.polarity_scores(sentence)

print(sentiment_scores)

# VADER compound score: https://stackoverflow.com/questions/40325980/how-is-the-vader-compound-polarity-score-calculated-in-python-nltk

{'neg': 0.239, 'neu': 0.761, 'pos': 0.0, 'compound': -0.6597}


In [11]:
from transformers import pipeline

# Load the sentiment analysis pipeline with the chosen BERT model
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
sentiment_analysis = pipeline("sentiment-analysis", model=model_name)

# Example sentence
sentence = 'OCTOBER 2018: A Lion Air MAX plane crashes in Indonesia, killing all 189 people on board.'

# Get sentiment prediction
result = sentiment_analysis(sentence)

# Print the result
print(result)

# label is 1 to 5 stars. 1 is very negative, 5 is very positive
# Score is the confidence, between 0 to 1

config.json: 100%|██████████| 953/953 [00:00<00:00, 1.35MB/s]
pytorch_model.bin: 100%|██████████| 669M/669M [00:11<00:00, 59.6MB/s] 
  return self.fget.__get__(instance, owner)()
tokenizer_config.json: 100%|██████████| 39.0/39.0 [00:00<00:00, 877B/s]
vocab.txt: 100%|██████████| 872k/872k [00:00<00:00, 1.54MB/s]
special_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 284kB/s]


[{'label': '1 star', 'score': 0.8441336154937744}]
