## Import Libraries

In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# Import News API and Natural Language Toolkit
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load APIs

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
newsapi = NewsApiClient(api_key=os.environ["news_api"])

## Create Headline and Sentiment Analyzer Functions

In [5]:
# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2021-01-14", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-12-14", tz="America/New_York").isoformat()

# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [6]:
# Instantiate Sentiment Analyzer
Analyzer = SentimentIntensityAnalyzer()

In [7]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(Analyzer.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

## Facebook

In [None]:
# Get Facebook headlines
fb_headlines, dates = get_headlines("facebook")

In [None]:
fb_avg = headline_sentiment_summarizer_avg(fb_headlines)

In [None]:
fb_sentiment_df = pd.DataFrame({ "Avg_Score" : fb_avg })
fb_sentiment_df.index = pd.to_datetime(dates)
fb_sentiment_df.head()

In [None]:
fb_sentiment_df.to_csv("Sentiment_data/fb_sentiment_data.csv")

## Tesla

In [None]:
# Get Tesla
tsla_headlines, _ = get_headlines("tesla")

In [None]:
tsla_avg = headline_sentiment_summarizer_avg(tsla_headlines)

In [None]:
tsla_sentiment_df = pd.DataFrame({ "Avg_Score" : tsla_avg })
tsla_sentiment_df.index = pd.to_datetime(dates)
tsla_sentiment_df.head()

In [None]:
tsla_sentiment_df.to_csv("Sentiment_data/tsla_sentiment_data.csv")

## Microsoft

In [None]:
# Get Microsoft headlines
msft_headlines, _ = get_headlines("microsoft")

In [None]:
msft_avg = headline_sentiment_summarizer_avg(msft_headlines)

In [None]:
msft_sentiment_df = pd.DataFrame({ "Avg_Score" : msft_avg })
msft_sentiment_df.index = pd.to_datetime(dates)
msft_sentiment_df.head()

In [None]:
msft_sentiment_df.to_csv("Sentiment_data/msft_sentiment_data.csv")

## Amazon

In [8]:
# Get Amazon
amzn_headlines, dates = get_headlines("amazon")

Fetching news about 'amazon'
******************************
retrieving news from: 2021-01-14 00:00:00
retrieving news from: 2021-01-13 00:00:00
retrieving news from: 2021-01-12 00:00:00
retrieving news from: 2021-01-11 00:00:00
retrieving news from: 2021-01-10 00:00:00
retrieving news from: 2021-01-09 00:00:00
retrieving news from: 2021-01-08 00:00:00
retrieving news from: 2021-01-07 00:00:00
retrieving news from: 2021-01-06 00:00:00
retrieving news from: 2021-01-05 00:00:00
retrieving news from: 2021-01-04 00:00:00
retrieving news from: 2021-01-03 00:00:00
retrieving news from: 2021-01-02 00:00:00
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news 

In [9]:
amzn_avg = headline_sentiment_summarizer_avg(amzn_headlines)

In [10]:
amzn_sentiment_df = pd.DataFrame({ "Avg_Score" : amzn_avg })
amzn_sentiment_df.index = pd.to_datetime(dates)
amzn_sentiment_df.head()

Unnamed: 0,Avg_Score
2021-01-14,0.078675
2021-01-13,-0.01317
2021-01-12,0.147265
2021-01-11,0.019735
2021-01-10,-0.05361


In [12]:
amzn_sentiment_df.to_csv("../Sentiment_data/new_amzn_sentiment_data.csv")

## Apple

In [13]:
# Get Apple
aapl_headlines, dates = get_headlines("apple")

Fetching news about 'apple'
******************************
retrieving news from: 2021-01-14 00:00:00
retrieving news from: 2021-01-13 00:00:00
retrieving news from: 2021-01-12 00:00:00
retrieving news from: 2021-01-11 00:00:00
retrieving news from: 2021-01-10 00:00:00
retrieving news from: 2021-01-09 00:00:00
retrieving news from: 2021-01-08 00:00:00
retrieving news from: 2021-01-07 00:00:00
retrieving news from: 2021-01-06 00:00:00
retrieving news from: 2021-01-05 00:00:00
retrieving news from: 2021-01-04 00:00:00
retrieving news from: 2021-01-03 00:00:00
retrieving news from: 2021-01-02 00:00:00
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news f

In [14]:
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)

In [15]:
aapl_sentiment_df = pd.DataFrame({ "Avg_Score" : aapl_avg })
aapl_sentiment_df.index = pd.to_datetime(dates)
aapl_sentiment_df.head()

Unnamed: 0,Avg_Score
2021-01-14,-0.02651
2021-01-13,0.245915
2021-01-12,0.05227
2021-01-11,0.124295
2021-01-10,0.01428


In [16]:
aapl_sentiment_df.to_csv("../Sentiment_data/new_aapl_sentiment_data.csv")