# Correlating Returns

In [2]:
import alpaca_trade_api as tradeapi
import pandas as pd
from newsapi.newsapi_client import NewsApiClient
from datetime import date, datetime, timedelta
import os

from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [3]:
# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWSAPI_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [4]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = '1D'

# Get current date and the date from one month ago
current_date = date.today()
past_date = date.today() - timedelta(weeks=4)

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=current_date,
    end=past_date,
    after=None,
    until=None,
).df

df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-01-06 00:00:00-05:00,293.79,299.96,292.75,299.78,27487207
2020-01-07 00:00:00-05:00,299.84,300.9,297.48,298.27,24353387
2020-01-08 00:00:00-05:00,297.16,304.4399,297.156,303.15,28984028
2020-01-09 00:00:00-05:00,307.25,310.43,306.2,309.71,38540792
2020-01-10 00:00:00-05:00,310.6,312.67,308.25,310.37,32304453


In [5]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df.drop(columns=['open', 'high', 'low', 'volume'], inplace=True)

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

df.head()

Unnamed: 0,close
2020-01-06,299.78
2020-01-07,298.27
2020-01-08,303.15
2020-01-09,309.71
2020-01-10,310.37


In [6]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change()
aapl_returns.head()

Unnamed: 0,close
2020-01-06,
2020-01-07,-0.005037
2020-01-08,0.016361
2020-01-09,0.021639
2020-01-10,0.002131


In [7]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []
    date = current_date
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > past_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates



In [8]:
# Get first topic
aapl_headlines, dates = get_headlines("apple")


Fetching news about 'apple'
******************************
retrieving news from: 2020-05-28
retrieving news from: 2020-05-27
retrieving news from: 2020-05-26
retrieving news from: 2020-05-25
retrieving news from: 2020-05-24
retrieving news from: 2020-05-23
retrieving news from: 2020-05-22
retrieving news from: 2020-05-21
retrieving news from: 2020-05-20
retrieving news from: 2020-05-19
retrieving news from: 2020-05-18
retrieving news from: 2020-05-17
retrieving news from: 2020-05-16
retrieving news from: 2020-05-15
retrieving news from: 2020-05-14
retrieving news from: 2020-05-13
retrieving news from: 2020-05-12
retrieving news from: 2020-05-11
retrieving news from: 2020-05-10
retrieving news from: 2020-05-09
retrieving news from: 2020-05-08
retrieving news from: 2020-05-07
retrieving news from: 2020-05-06
retrieving news from: 2020-05-05
retrieving news from: 2020-05-04
retrieving news from: 2020-05-03
retrieving news from: 2020-05-02
retrieving news from: 2020-05-01


In [12]:
# Get second topic
trade_headlines, _ = get_headlines("trade")


Fetching news about 'trade'
******************************
retrieving news from: 2020-05-28
retrieving news from: 2020-05-27
retrieving news from: 2020-05-26
retrieving news from: 2020-05-25
retrieving news from: 2020-05-24
retrieving news from: 2020-05-23
retrieving news from: 2020-05-22
retrieving news from: 2020-05-21
retrieving news from: 2020-05-20
retrieving news from: 2020-05-19
retrieving news from: 2020-05-18
retrieving news from: 2020-05-17
retrieving news from: 2020-05-16
retrieving news from: 2020-05-15
retrieving news from: 2020-05-14
retrieving news from: 2020-05-13
retrieving news from: 2020-05-12
retrieving news from: 2020-05-11
retrieving news from: 2020-05-10
retrieving news from: 2020-05-09
retrieving news from: 2020-05-08
retrieving news from: 2020-05-07
retrieving news from: 2020-05-06
retrieving news from: 2020-05-05
retrieving news from: 2020-05-04
retrieving news from: 2020-05-03
retrieving news from: 2020-05-02
retrieving news from: 2020-05-01


In [13]:
# Get third topic
economy_headlines, _ = get_headlines("economy")


Fetching news about 'economy'
******************************
retrieving news from: 2020-05-28
retrieving news from: 2020-05-27
retrieving news from: 2020-05-26
retrieving news from: 2020-05-25
retrieving news from: 2020-05-24
retrieving news from: 2020-05-23
retrieving news from: 2020-05-22
retrieving news from: 2020-05-21
retrieving news from: 2020-05-20
retrieving news from: 2020-05-19
retrieving news from: 2020-05-18
retrieving news from: 2020-05-17
retrieving news from: 2020-05-16
retrieving news from: 2020-05-15
retrieving news from: 2020-05-14
retrieving news from: 2020-05-13
retrieving news from: 2020-05-12
retrieving news from: 2020-05-11
retrieving news from: 2020-05-10
retrieving news from: 2020-05-09
retrieving news from: 2020-05-08
retrieving news from: 2020-05-07
retrieving news from: 2020-05-06
retrieving news from: 2020-05-05
retrieving news from: 2020-05-04
retrieving news from: 2020-05-03
retrieving news from: 2020-05-02
retrieving news from: 2020-05-01


In [14]:
# Get fourth topic
iphone_headlines, _ = get_headlines("iphone")

Fetching news about 'iphone'
******************************
retrieving news from: 2020-05-28
retrieving news from: 2020-05-27
retrieving news from: 2020-05-26
retrieving news from: 2020-05-25
retrieving news from: 2020-05-24
retrieving news from: 2020-05-23
retrieving news from: 2020-05-22
retrieving news from: 2020-05-21
retrieving news from: 2020-05-20
retrieving news from: 2020-05-19
retrieving news from: 2020-05-18
retrieving news from: 2020-05-17
retrieving news from: 2020-05-16
retrieving news from: 2020-05-15
retrieving news from: 2020-05-14
retrieving news from: 2020-05-13
retrieving news from: 2020-05-12
retrieving news from: 2020-05-11
retrieving news from: 2020-05-10
retrieving news from: 2020-05-09
retrieving news from: 2020-05-08
retrieving news from: 2020-05-07
retrieving news from: 2020-05-06
retrieving news from: 2020-05-05
retrieving news from: 2020-05-04
retrieving news from: 2020-05-03
retrieving news from: 2020-05-02
retrieving news from: 2020-05-01


In [15]:
# Get fifth topic
gold_headlines, _ = get_headlines("gold")

Fetching news about 'gold'
******************************
retrieving news from: 2020-05-28
retrieving news from: 2020-05-27
retrieving news from: 2020-05-26
retrieving news from: 2020-05-25
retrieving news from: 2020-05-24
retrieving news from: 2020-05-23
retrieving news from: 2020-05-22
retrieving news from: 2020-05-21
retrieving news from: 2020-05-20
retrieving news from: 2020-05-19
retrieving news from: 2020-05-18
retrieving news from: 2020-05-17
retrieving news from: 2020-05-16
retrieving news from: 2020-05-15
retrieving news from: 2020-05-14
retrieving news from: 2020-05-13
retrieving news from: 2020-05-12
retrieving news from: 2020-05-11
retrieving news from: 2020-05-10
retrieving news from: 2020-05-09
retrieving news from: 2020-05-08
retrieving news from: 2020-05-07
retrieving news from: 2020-05-06
retrieving news from: 2020-05-05
retrieving news from: 2020-05-04
retrieving news from: 2020-05-03
retrieving news from: 2020-05-02
retrieving news from: 2020-05-01


In [16]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [17]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment


In [18]:
# Get averages of each topics sentiment
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)
trade_avg = headline_sentiment_summarizer_avg(trade_headlines)
economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
iphone_avg = headline_sentiment_summarizer_avg(iphone_headlines)
gold_avg = headline_sentiment_summarizer_avg(gold_headlines)


In [19]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "aapl_avg": aapl_avg,
        "trade_avg": trade_avg,
        "economy_avg": economy_avg,
        "iphone_avg": iphone_avg,
        "gold_avg": gold_avg,
    }
)


In [20]:
# Set the index value of the sentiment average DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [21]:
# Merge with AAPL returns
topic_sentiments = aapl_returns.join(topic_sentiments).dropna(how="any")

display(topic_sentiments)

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
2020-05-01,-0.016735,0.17091,-0.00799,-0.1833,0.168095,0.235765
2020-05-04,0.014183,0.04228,-0.012815,-0.09806,0.144365,0.01474
2020-05-05,0.015725,-0.02291,0.022435,0.202385,0.156015,0.158055
2020-05-06,0.009134,-0.01746,-0.01692,-0.05095,0.110515,-0.00168
2020-05-07,0.010815,0.115685,-0.009965,0.120275,0.096245,0.00388
2020-05-08,0.021065,0.086715,-0.01225,-0.06229,0.093315,0.09322
2020-05-11,0.015659,0.03976,0.026455,-0.009245,0.047985,0.12059
2020-05-12,-0.011095,0.04417,-0.01412,-0.04639,0.02504,0.10231
2020-05-13,-0.012295,0.07488,-0.14603,-0.19354,0.06733,0.11194
2020-05-14,0.006565,0.06106,-0.048185,-0.0741,0.190125,-0.09293


In [22]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
close,1.0,-0.542671,0.255229,0.0719431,-0.0383285,-0.270828
aapl_avg,-0.542671,1.0,0.141425,0.0202085,0.220245,0.350097
trade_avg,0.255229,0.141425,1.0,0.3377,0.164244,0.104259
economy_avg,0.0719431,0.0202085,0.3377,1.0,0.0102939,-0.141479
iphone_avg,-0.0383285,0.220245,0.164244,0.0102939,1.0,-0.00553019
gold_avg,-0.270828,0.350097,0.104259,-0.141479,-0.00553019,1.0
