# Correlating Returns

In [1]:
import alpaca_trade_api as tradeapi
import pandas as pd
from newsapi.newsapi_client import NewsApiClient
from datetime import date, datetime, timedelta
import os
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [2]:
# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWSAPI_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [37]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = '1D'

# Get current date and the date from one month ago
current_date = date.today()
past_date = date.today() - timedelta(weeks=4)

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=current_date,
    end=past_date,
    after=None,
    until=None,
).df

df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
2019-11-05 00:00:00-05:00,257.05,258.19,256.32,257.15,17738372
2019-11-06 00:00:00-05:00,256.84,257.49,255.365,257.24,15328064
2019-11-07 00:00:00-05:00,258.74,260.35,258.11,259.5,22267200
2019-11-08 00:00:00-05:00,258.69,260.44,256.85,260.14,15896858
2019-11-11 00:00:00-05:00,258.3,262.47,258.28,262.2,19401290


In [38]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df.drop(columns=['open', 'high', 'low', 'volume'], inplace=True)

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

df.head()

Unnamed: 0,close
2019-11-05,257.15
2019-11-06,257.24
2019-11-07,259.5
2019-11-08,260.14
2019-11-11,262.2


In [39]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change()
aapl_returns.head()

Unnamed: 0,close
2019-11-05,
2019-11-06,0.00035
2019-11-07,0.008786
2019-11-08,0.002466
2019-11-11,0.007919


In [40]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []
    date = current_date
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > past_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates



In [41]:
# Get first topic
aapl_headlines, dates = get_headlines("apple")

Fetching news about 'apple'
******************************
retrieving news from: 2020-03-30
retrieving news from: 2020-03-29
retrieving news from: 2020-03-28
retrieving news from: 2020-03-27
retrieving news from: 2020-03-26
retrieving news from: 2020-03-25
retrieving news from: 2020-03-24
retrieving news from: 2020-03-23
retrieving news from: 2020-03-22
retrieving news from: 2020-03-21
retrieving news from: 2020-03-20
retrieving news from: 2020-03-19
retrieving news from: 2020-03-18
retrieving news from: 2020-03-17
retrieving news from: 2020-03-16
retrieving news from: 2020-03-15
retrieving news from: 2020-03-14
retrieving news from: 2020-03-13
retrieving news from: 2020-03-12
retrieving news from: 2020-03-11
retrieving news from: 2020-03-10
retrieving news from: 2020-03-09
retrieving news from: 2020-03-08
retrieving news from: 2020-03-07
retrieving news from: 2020-03-06
retrieving news from: 2020-03-05
retrieving news from: 2020-03-04
retrieving news from: 2020-03-03


In [42]:
# Get second topic
trade_headlines, _ = get_headlines("trade")

Fetching news about 'trade'
******************************
retrieving news from: 2020-03-30
retrieving news from: 2020-03-29
retrieving news from: 2020-03-28
retrieving news from: 2020-03-27
retrieving news from: 2020-03-26
retrieving news from: 2020-03-25
retrieving news from: 2020-03-24
retrieving news from: 2020-03-23
retrieving news from: 2020-03-22
retrieving news from: 2020-03-21
retrieving news from: 2020-03-20
retrieving news from: 2020-03-19
retrieving news from: 2020-03-18
retrieving news from: 2020-03-17
retrieving news from: 2020-03-16
retrieving news from: 2020-03-15
retrieving news from: 2020-03-14
retrieving news from: 2020-03-13
retrieving news from: 2020-03-12
retrieving news from: 2020-03-11
retrieving news from: 2020-03-10
retrieving news from: 2020-03-09
retrieving news from: 2020-03-08
retrieving news from: 2020-03-07
retrieving news from: 2020-03-06
retrieving news from: 2020-03-05
retrieving news from: 2020-03-04
retrieving news from: 2020-03-03


In [43]:
# Get third topic
economy_headlines, _ = get_headlines("economy")

Fetching news about 'economy'
******************************
retrieving news from: 2020-03-30
retrieving news from: 2020-03-29
retrieving news from: 2020-03-28
retrieving news from: 2020-03-27
retrieving news from: 2020-03-26
retrieving news from: 2020-03-25
retrieving news from: 2020-03-24
retrieving news from: 2020-03-23
retrieving news from: 2020-03-22
retrieving news from: 2020-03-21
retrieving news from: 2020-03-20
retrieving news from: 2020-03-19
retrieving news from: 2020-03-18
retrieving news from: 2020-03-17
retrieving news from: 2020-03-16
retrieving news from: 2020-03-15
retrieving news from: 2020-03-14
retrieving news from: 2020-03-13
retrieving news from: 2020-03-12
retrieving news from: 2020-03-11
retrieving news from: 2020-03-10
retrieving news from: 2020-03-09
retrieving news from: 2020-03-08
retrieving news from: 2020-03-07
retrieving news from: 2020-03-06
retrieving news from: 2020-03-05
retrieving news from: 2020-03-04
retrieving news from: 2020-03-03


In [44]:
# Get fourth topic
iphone_headlines, _ = get_headlines("iphone")

Fetching news about 'iphone'
******************************
retrieving news from: 2020-03-30
retrieving news from: 2020-03-29
retrieving news from: 2020-03-28
retrieving news from: 2020-03-27
retrieving news from: 2020-03-26
retrieving news from: 2020-03-25
retrieving news from: 2020-03-24
retrieving news from: 2020-03-23
retrieving news from: 2020-03-22
retrieving news from: 2020-03-21
retrieving news from: 2020-03-20
retrieving news from: 2020-03-19
retrieving news from: 2020-03-18
retrieving news from: 2020-03-17
retrieving news from: 2020-03-16
retrieving news from: 2020-03-15
retrieving news from: 2020-03-14
retrieving news from: 2020-03-13
retrieving news from: 2020-03-12
retrieving news from: 2020-03-11
retrieving news from: 2020-03-10
retrieving news from: 2020-03-09
retrieving news from: 2020-03-08
retrieving news from: 2020-03-07
retrieving news from: 2020-03-06
retrieving news from: 2020-03-05
retrieving news from: 2020-03-04


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 500 requests over a 24 hour period (250 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [20]:
# Get fifth topic
gold_headlines, _ = get_headlines("gold")

Fetching news about 'gold'
******************************
retrieving news from: 2020-03-30
retrieving news from: 2020-03-29
retrieving news from: 2020-03-28
retrieving news from: 2020-03-27
retrieving news from: 2020-03-26
retrieving news from: 2020-03-25
retrieving news from: 2020-03-24
retrieving news from: 2020-03-23
retrieving news from: 2020-03-22
retrieving news from: 2020-03-21
retrieving news from: 2020-03-20
retrieving news from: 2020-03-19
retrieving news from: 2020-03-18
retrieving news from: 2020-03-17
retrieving news from: 2020-03-16
retrieving news from: 2020-03-15
retrieving news from: 2020-03-14
retrieving news from: 2020-03-13
retrieving news from: 2020-03-12
retrieving news from: 2020-03-11
retrieving news from: 2020-03-10
retrieving news from: 2020-03-09
retrieving news from: 2020-03-08
retrieving news from: 2020-03-07
retrieving news from: 2020-03-06
retrieving news from: 2020-03-05
retrieving news from: 2020-03-04
retrieving news from: 2020-03-03


In [45]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [46]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment



In [67]:
# Get averages of each topics sentiment
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)
trade_avg = headline_sentiment_summarizer_avg(trade_headlines)
economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
iphone_avg = headline_sentiment_summarizer_avg(iphone_headlines)
gold_avg = headline_sentiment_summarizer_avg(gold_headlines)


In [70]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "aapl_avg": aapl_avg,
        "trade_avg": trade_avg,
        "economy_avg": economy_avg,
        "iphone_avg": iphone_avg,
        "gold_avg": gold_avg,
    }
)


In [71]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [72]:
# Merge with AAPL returns
topic_sentiments = aapl_returns.join(topic_sentiments).dropna(how="any")

display(topic_sentiments)

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
2020-03-03,-0.031825,0.120545,0.08998,-0.16222,0.261435,0.13025
2020-03-04,0.04649,0.08213,-0.0085,-0.023815,0.03266,0.02118
2020-03-05,-0.032501,-0.029005,-0.15175,-0.017475,0.08253,0.125445
2020-03-06,-0.013451,0.053715,-0.085335,-0.07122,0.03782,0.112
2020-03-09,-0.07921,0.119485,-0.00603,-0.26786,0.104155,0.051715
2020-03-10,0.073659,0.122495,0.1304,-0.052925,0.0169,0.22252
2020-03-11,-0.036368,0.09772,-0.042475,-0.141795,0.048175,0.01982
2020-03-12,-0.099165,0.045705,-0.13623,-0.227705,0.06136,0.076135
2020-03-13,0.12121,0.094535,-0.176075,-0.302805,0.13366,0.15684
2020-03-16,-0.13159,0.143135,-0.03796,-0.06884,0.096685,-0.09735


In [66]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
close,1.0,0.11691,0.095208,0.284496,0.051821,0.291429
aapl_avg,0.11691,1.0,0.38043,0.178111,0.444952,-0.137073
trade_avg,0.095208,0.38043,1.0,0.342088,0.251751,0.164495
economy_avg,0.284496,0.178111,0.342088,1.0,-0.121478,-0.296394
iphone_avg,0.051821,0.444952,0.251751,-0.121478,1.0,0.010724
gold_avg,0.291429,-0.137073,0.164495,-0.296394,0.010724,1.0
