In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from pathlib import Path
import json
import requests
import datetime

In [33]:
# Load .env enviroment variables
load_dotenv()

# Set  StockNews API Key
newsapi = NewsApiClient(api_key=os.environ["news_api"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_Key")
alpaca_secret_key = os.getenv("Alpaca_secret_key")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')
#Set 

In [34]:
# Set the ticker
ticker = ["GSK"]

# Set timeframe to '1D'
timeframe = "1D"

# Set date and the date for 2008
start_date = pd.Timestamp("2008-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2008-12-31", tz="America/New_York").isoformat()

# Get the entire year of 2008 worth of historical data for GSK
gsk_df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=start_date,
    end=end_date,
    after=None,
    until=None,
).df

# Display data
gsk_df.head()

Unnamed: 0_level_0,GSK,GSK,GSK,GSK,GSK
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2008-08-11 00:00:00-04:00,48.72,48.88,48.39,48.46,668310
2008-08-12 00:00:00-04:00,48.26,48.26,47.66,48.03,1006341
2008-08-13 00:00:00-04:00,47.63,48.14,47.53,47.79,891118
2008-08-14 00:00:00-04:00,47.17,48.0,47.17,47.78,843431
2008-08-15 00:00:00-04:00,47.16,47.72,47.16,47.58,1203574


In [35]:
# Drop Outer Table Level
gsk_df = gsk_df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
gsk_df = gsk_df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
gsk_df.index = gsk_df.index.date

# Display sample data
gsk_df.head()

Unnamed: 0,close
2008-08-11,48.46
2008-08-12,48.03
2008-08-13,47.79
2008-08-14,47.78
2008-08-15,47.58


In [36]:
#Update Column Name
gsk_df['GSK'] = gsk_df['close']
gsk_df1 = gsk_df.drop(columns = 'close')
gsk_df1.head()

Unnamed: 0,GSK
2008-08-11,48.46
2008-08-12,48.03
2008-08-13,47.79
2008-08-14,47.78
2008-08-15,47.58


In [37]:
# Set the ticker
ticker = ["PFE"]

# Set timeframe to '1D'
timeframe = "1D"

# Set date and the date for 2008
start_date = pd.Timestamp("2008-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2008-12-31", tz="America/New_York").isoformat()

# Get the entire year of 2008 worth of historical data for PFE
pfe_df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=start_date,
    end=end_date,
    after=None,
    until=None,
).df

# Display data
pfe_df.head()

Unnamed: 0_level_0,PFE,PFE,PFE,PFE,PFE
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2008-08-11 00:00:00-04:00,19.81,20.11,19.69,19.85,39508859
2008-08-12 00:00:00-04:00,19.83,20.03,19.67,19.72,33634563
2008-08-13 00:00:00-04:00,19.75,19.85,19.56,19.65,32346824
2008-08-14 00:00:00-04:00,19.68,20.09,19.51,19.79,34005676
2008-08-15 00:00:00-04:00,19.87,20.13,19.86,19.98,37347287


In [38]:
# Drop Outer Table Level
pfe_df = pfe_df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
pfe_df = pfe_df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
pfe_df.index = pfe_df.index.date

# Display sample data
pfe_df.head()

Unnamed: 0,close
2008-08-11,19.85
2008-08-12,19.72
2008-08-13,19.65
2008-08-14,19.79
2008-08-15,19.98


In [39]:
#Update Column Name
pfe_df['PFE'] = pfe_df['close']
pfe_df1 = pfe_df.drop(columns = 'close')
pfe_df1.head()

Unnamed: 0,PFE
2008-08-11,19.85
2008-08-12,19.72
2008-08-13,19.65
2008-08-14,19.79
2008-08-15,19.98


In [40]:
#Combine DataFrames
pharma_df = pd.concat([gsk_df1, pfe_df1], axis="columns", join="inner")
pharma_df.head()

Unnamed: 0,GSK,PFE
2008-08-11,48.46,19.85
2008-08-12,48.03,19.72
2008-08-13,47.79,19.65
2008-08-14,47.78,19.79
2008-08-15,47.58,19.98


In [41]:
# Use the `pct_change` function to calculate daily returns
pharma_returns = pharma_df.pct_change().dropna()

# Display sample data
pharma_returns.head()

Unnamed: 0,GSK,PFE
2008-08-12,-0.008873,-0.006549
2008-08-13,-0.004997,-0.00355
2008-08-14,-0.000209,0.007125
2008-08-15,-0.004186,0.009601
2008-08-18,-0.011559,-0.016517


In [42]:
# Fetch news about GSK
gsk_news = newsapi.get_everything(
    q=" GSK AND GlaxoSmithKline",
    language="en"
    
)

# Show the total number of news
gsk_news["totalResults"]

216

In [43]:
# Fetch news about PFE
pfe_news = newsapi.get_everything(
    q=" PFE AND Pfizer",
    language="en"
    
)

# Show the total number of news
pfe_news["totalResults"]

319

In [51]:
gsk_sentiment = []
analyzer = SentimentIntensityAnalyzer()

for article in gsk_news["articles"]:
    try:
        date = article["publishedAt"][:10] # Measure for the date
        text = article["content"][0:198]
        sentiment = analyzer.polarity_scores(text) # Getting polarity scores so we can build our sentiment from the analyzer
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        compound = sentiment["compound"] # Compound sentiment
        
        gsk_sentiment.append({
            "text": text,
            "date": date,
            "positive": pos,
            "neutral": neu,
            "negative": neg,
            "compound": compound # Compound sentiment
        })
        
    except AttributeError:
        pass
gsk_sentiment_df = pd.DataFrame(gsk_sentiment)
gsk_sentiment_df['date'] = pd.to_datetime(gsk_sentiment_df['date'])
gsk_sentiment_df.set_index('date', inplace=True)
# Set the reorganized columns for the dataframe
gsk_sentiment_df.describe()

Unnamed: 0,positive,neutral,negative,compound
count,20.0,20.0,20.0,20.0
mean,0.03565,0.9528,0.01155,0.096225
std,0.063242,0.072283,0.029523,0.28723
min,0.0,0.751,0.0,-0.5423
25%,0.0,0.927,0.0,0.0
50%,0.0,1.0,0.0,0.0
75%,0.06275,1.0,0.0,0.220025
max,0.192,1.0,0.108,0.7506


In [49]:
pfe_sentiment = []
analyzer = SentimentIntensityAnalyzer()

for article in pfe_news["articles"]:
    try:
        date = article["publishedAt"][:10] # Measure for the date
        text = article["content"][0:198]
        sentiment = analyzer.polarity_scores(text) # Getting polarity scores so we can build our sentiment from the analyzer
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        compound = sentiment["compound"] # Compound sentiment
        
        pfe_sentiment.append({
            "text": text,
            "date": date,
            "positive": pos,
            "neutral": neu,
            "negative": neg,
            "compound": compound # Compound sentiment
        })
        
    except AttributeError:
        pass
pfe_sentiment_df = pd.DataFrame(pfe_sentiment)
pfe_sentiment_df['date'] = pd.to_datetime(pfe_sentiment_df['date'])
pfe_sentiment_df.set_index('date', inplace=True)
# Set the reorganized columns for the dataframe
pfe_sentiment_df.describe()

Unnamed: 0,positive,neutral,negative,compound
count,20.0,20.0,20.0,20.0
mean,0.067,0.8846,0.0484,0.0549
std,0.103185,0.095642,0.060897,0.511426
min,0.0,0.634,0.0,-0.7506
25%,0.0,0.82525,0.0,-0.322475
50%,0.0,0.909,0.018,0.0
75%,0.10275,0.94125,0.08875,0.4767
max,0.366,1.0,0.192,0.8779


In [52]:
#Save News DF to CSV for further use 
file_path = Path("Data/pfe_news.csv")
pfe_sentiment_df.to_csv(file_path, index=False, encoding='utf-8-sig')

In [53]:
#Save News DF to CSV for further use 
file_path = Path("Data/gsk_news.csv")
gsk_sentiment_df.to_csv(file_path, index=False, encoding='utf-8-sig')

In [63]:
#Save pharma df to CSV for further use 
pharma_path = Path("Data/pharma_returns.csv")
pharma_df.to_csv(pharma_path)

In [54]:
gsk_sentiment_df

Unnamed: 0_level_0,text,positive,neutral,negative,compound
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-07-01,"By Reuters Staff\r\nLONDON, July 1 (Reuters) -...",0.0,1.0,0.0,0.0
2021-07-22,By Reuters Staff\r\nJuly 22 (Reuters) - GlaxoS...,0.0,1.0,0.0,0.0
2021-07-01,By Reuters Staff\r\nJuly 1 (Reuters) - GlaxoSm...,0.0,1.0,0.0,0.0
2021-07-02,By Reuters Staff\r\nFILE PHOTO: A GlaxoSmithKl...,0.0,1.0,0.0,0.0
2021-07-13,A GlaxoSmithKline (GSK) logo is seen at the GS...,0.097,0.903,0.0,0.4215
2021-07-01,"LONDON, July 1 (Reuters) - Activist investor E...",0.0,1.0,0.0,0.0
2021-07-20,"By Reuters Staff\r\n(Adds detail, background)\...",0.0,1.0,0.0,0.0
2021-07-17,By Reuters Staff\r\nJuly 17 (Reuters) - GlaxoS...,0.0,1.0,0.0,0.0
2021-07-02,A GlaxoSmithKline (GSK) logo is seen at the GS...,0.0,1.0,0.0,0.0
2021-07-16,A GlaxoSmithKline (GSK) logo is seen at the GS...,0.0,1.0,0.0,0.0
