## Import Libraries

In [25]:
import os
import pandas as pd
from datetime import datetime, timedelta
from pathlib import Path

In [26]:
# Import News API and Natural Language Toolkit
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load APIs

In [27]:
from dotenv import load_dotenv
load_dotenv()

True

In [28]:
newsapi = NewsApiClient(api_key=os.environ["news_api"])

## Create Headline and Sentiment Analyzer Functions

In [29]:
# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2021-01-12", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-12-12", tz="America/New_York").isoformat()

# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        try:
            articles = newsapi.get_everything(
                q=keyword,
                from_param=str(date),
                to=str(date),
                language="en",
                sort_by="relevancy",
                page=1,
            )
            headlines = []
            for i in range(0, len(articles["articles"])):
                headlines.append(articles["articles"][i]["title"])
            all_headlines.append(headlines)
            all_dates.append(date)
            date = date - timedelta(days=1)
        except:
            print("Done")
            break
    return all_headlines, all_dates

In [30]:
# Instantiate Sentiment Analyzer
Analyzer = SentimentIntensityAnalyzer()

In [31]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(Analyzer.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

## Amazon

In [18]:
# Get Amazon
amzn_headlines, _ = get_headlines("amazon")

Fetching news about 'amazon'
******************************
retrieving news from: 2021-01-12 00:00:00
retrieving news from: 2021-01-11 00:00:00
retrieving news from: 2021-01-10 00:00:00
retrieving news from: 2021-01-09 00:00:00
retrieving news from: 2021-01-08 00:00:00
retrieving news from: 2021-01-07 00:00:00
retrieving news from: 2021-01-06 00:00:00
retrieving news from: 2021-01-05 00:00:00
retrieving news from: 2021-01-04 00:00:00
retrieving news from: 2021-01-03 00:00:00
retrieving news from: 2021-01-02 00:00:00
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news from: 2020-12-23 00:00:00
retrieving news from: 2020-12-22 00:00:00
retrieving news 

In [19]:
amzn_avg = headline_sentiment_summarizer_avg(amzn_headlines)

In [32]:
amzn_sentiment_df = pd.DataFrame({ "Avg_Score" : amzn_avg })
amzn_sentiment_df.index = pd.to_datetime(amzn_sentiment_df.index)
amzn_sentiment_df.head()

Unnamed: 0,Avg_Score
1970-01-01 00:00:00.000000000,0.147265
1970-01-01 00:00:00.000000001,0.019735
1970-01-01 00:00:00.000000002,-0.05361
1970-01-01 00:00:00.000000003,0.22792
1970-01-01 00:00:00.000000004,0.12679


In [41]:
amzn_sentiment_df.to_csv(Path("../Sentiment_data/amzn_sentiment_data.csv"))

## Apple

In [37]:
# Get Apple
aapl_headlines, dates = get_headlines("apple")

Fetching news about 'apple'
******************************
retrieving news from: 2021-01-12 00:00:00
retrieving news from: 2021-01-11 00:00:00
retrieving news from: 2021-01-10 00:00:00
retrieving news from: 2021-01-09 00:00:00
retrieving news from: 2021-01-08 00:00:00
retrieving news from: 2021-01-07 00:00:00
retrieving news from: 2021-01-06 00:00:00
retrieving news from: 2021-01-05 00:00:00
retrieving news from: 2021-01-04 00:00:00
retrieving news from: 2021-01-03 00:00:00
retrieving news from: 2021-01-02 00:00:00
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news from: 2020-12-23 00:00:00
retrieving news from: 2020-12-22 00:00:00
retrieving news f

In [38]:
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)

In [39]:
aapl_sentiment_df = pd.DataFrame({ "Avg_Score" : aapl_avg })
aapl_sentiment_df.index = pd.to_datetime(aapl_sentiment_df.index)
aapl_sentiment_df.head()

Unnamed: 0,Avg_Score
1970-01-01 00:00:00.000000000,0.05227
1970-01-01 00:00:00.000000001,0.124295
1970-01-01 00:00:00.000000002,0.01428
1970-01-01 00:00:00.000000003,-0.070925
1970-01-01 00:00:00.000000004,0.02112


In [42]:
aapl_sentiment_df.to_csv("../Sentiment_data/aapl_sentiment_data.csv")