## Import Libraries

In [None]:
import os
import pandas as pd
from datetime import datetime, timedelta

In [None]:
# Import News API and Natural Language Toolkit
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load APIs

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
newsapi = NewsApiClient(api_key=os.environ["news_api"])

## Create Headline and Sentiment Analyzer Functions

In [None]:
# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2021-01-12", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-12-12", tz="America/New_York").isoformat()

# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [None]:
# Instantiate Sentiment Analyzer
Analyzer = SentimentIntensityAnalyzer()

In [None]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(Analyzer.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

## Facebook

In [None]:
# Get Facebook headlines
fb_headlines, dates = get_headlines("facebook")

In [None]:
fb_avg = headline_sentiment_summarizer_avg(fb_headlines)

In [None]:
fb_sentiment_df = pd.DataFrame({ "Avg_Score" : fb_avg })
fb_sentiment_df.index = pd.to_datetime(dates)
fb_sentiment_df.head()

In [None]:
fb_sentiment_df.to_csv("Sentiment_data/fb_sentiment_data.csv")

## Tesla

In [None]:
# Get Tesla
tsla_headlines, _ = get_headlines("tesla")

In [None]:
tsla_avg = headline_sentiment_summarizer_avg(tsla_headlines)

In [None]:
tsla_sentiment_df = pd.DataFrame({ "Avg_Score" : tsla_avg })
tsla_sentiment_df.index = pd.to_datetime(dates)
tsla_sentiment_df.head()

In [None]:
tsla_sentiment_df.to_csv("Sentiment_data/tsla_sentiment_data.csv")

## Microsoft

In [None]:
# Get Microsoft headlines
msft_headlines, _ = get_headlines("microsoft")

In [None]:
msft_avg = headline_sentiment_summarizer_avg(msft_headlines)

In [None]:
msft_sentiment_df = pd.DataFrame({ "Avg_Score" : msft_avg })
msft_sentiment_df.index = pd.to_datetime(dates)
msft_sentiment_df.head()

In [None]:
msft_sentiment_df.to_csv("Sentiment_data/msft_sentiment_data.csv")

## Amazon

In [None]:
# Get Amazon
amzn_headlines, _ = get_headlines("amazon")

In [None]:
amzn_avg = headline_sentiment_summarizer_avg(amzn_headlines)

In [None]:
amzn_sentiment_df = pd.DataFrame({ "Avg_Score" : amzn_avg })
amzn_sentiment_df.index = pd.to_datetime(dates)
amzn_sentiment_df.head()

In [None]:
amzn_sentiment_df.to_csv("Sentiment_data/amzn_sentiment_data.csv")

## Apple

In [None]:
# Get Apple
aapl_headlines, dates = get_headlines("apple")

In [None]:
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)

In [None]:
aapl_sentiment_df = pd.DataFrame({ "Avg_Score" : aapl_avg })
aapl_sentiment_df.index = pd.to_datetime(dates)
aapl_sentiment_df.head()

In [None]:
aapl_sentiment_df.to_csv("Sentiment_data/aapl_sentiment_data.csv")