In [1]:
# Dependencies
import os
import json
import pandas as pd
import requests as req
import matplotlib.pyplot as plt
import numpy as np
import datetime
import dateutil
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

## Alpha Vantage

In [2]:
from alpha import api_key

In [3]:
alpha_url = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&apikey=api_key"

In [4]:
stock_data=req.get(alpha_url).json()

In [5]:
print(json.dumps(stock_data, indent = 4, sort_keys=True))

{
    "Meta Data": {
        "1. Information": "Daily Prices (open, high, low, close) and Volumes",
        "2. Symbol": "MSFT",
        "3. Last Refreshed": "2018-08-30 16:00:01",
        "4. Output Size": "Compact",
        "5. Time Zone": "US/Eastern"
    },
    "Time Series (Daily)": {
        "2018-04-11": {
            "1. open": "92.0100",
            "2. high": "93.2900",
            "3. low": "91.4800",
            "4. close": "91.8600",
            "5. volume": "24872110"
        },
        "2018-04-12": {
            "1. open": "92.4300",
            "2. high": "94.1600",
            "3. low": "92.4300",
            "4. close": "93.5800",
            "5. volume": "26758879"
        },
        "2018-04-13": {
            "1. open": "94.0500",
            "2. high": "94.1800",
            "3. low": "92.4400",
            "4. close": "93.0800",
            "5. volume": "23346063"
        },
        "2018-04-16": {
            "1. open": "94.0700",
            "2. high": "94.660

In [9]:
# set dates
stock_dates = []

start_date = datetime.date(2018,4,10)

for i in range(142):
    stock_date = start_date + datetime.timedelta(i)
    stock_dates.append(stock_date.isoformat())

In [13]:
high_prices = []
low_prices = []
open_prices = []
close_prices = []
volume_shares = []
dates = []

for stock_date in stock_dates:
    try:
        open_prices.append(stock_data["Time Series (Daily)"][stock_date]["1. open"])
        high_prices.append(stock_data["Time Series (Daily)"][stock_date]["2. high"])
        low_prices.append(stock_data["Time Series (Daily)"][stock_date]["3. low"])
        close_prices.append(stock_data["Time Series (Daily)"][stock_date]["4. close"])
        volume_shares.append(stock_data["Time Series (Daily)"][stock_date]["5. volume"])
        if stock_date in stock_data["Time Series (Daily)"].keys():
            dates.append(stock_date)
    except:
        pass

In [14]:
Stock_Data = pd.DataFrame({"Date":dates,
                           "S&P Open Price": open_prices,
                           "S&P Low Price": low_prices,
                           "S&P High Prices": high_prices,
                           "S&P Close Price": close_prices,
                           "S&P Volume": volume_shares})

In [15]:
Stock_Data

Unnamed: 0,Date,S&P Open Price,S&P Low Price,S&P High Prices,S&P Close Price,S&P Volume
0,2018-04-11,92.0100,91.4800,93.2900,91.8600,24872110
1,2018-04-12,92.4300,92.4300,94.1600,93.5800,26758879
2,2018-04-13,94.0500,92.4400,94.1800,93.0800,23346063
3,2018-04-16,94.0700,93.4200,94.6600,94.1700,20288083
4,2018-04-17,95.0000,94.8800,96.5400,96.0700,26771000
5,2018-04-18,96.2200,95.5200,96.7200,96.4400,21043287
6,2018-04-19,96.4400,95.3400,97.0700,96.1100,23552541
7,2018-04-20,95.9100,94.0500,96.1100,95.0000,31154377
8,2018-04-23,95.7436,94.6300,96.2900,95.3500,22331829
9,2018-04-24,96.2420,92.4100,96.4700,93.1200,34524799


## New York Times

In [24]:
from nytimes import api_key

In [25]:
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"

n = 200
pages = range(n)
snippet = []
pub_date = []


for page in pages:
    params = {
    "api_key" : api_key,
    "q" : "apple",
    "begin_data" : "20180410",
    "end_data" : "20180829",
    "fl": ["snippet","pub_date"],
    "page": page
}
    try:
        nyt_data = req.get(url, params=params).json()
        for i in range(10):
            snippet.append(nyt_data["response"]["docs"][i]["snippet"])
            pub_date.append(nyt_data["response"]["docs"][i]["pub_date"][:10])
    except:
        pass
    

In [28]:
nyt_data = req.get(url, params=params).json()

In [29]:
analyzer = SentimentIntensityAnalyzer()
sentences = snippet
vader_scores = []
date_nyt = []

for sentence in sentences:
    snt = analyzer.polarity_scores(sentence)
    vader_scores.append(snt["compound"])
    date_nyt.append(pub_date)

In [30]:
Nyt_Data = pd.DataFrame({"Date": date_nyt,
                         "Nyt_VaderScore": vader_scores})

In [31]:
Nyt_Data

Unnamed: 0,Date,Nyt_VaderScore
0,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",-0.8100
1,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.4215
2,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.3291
3,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.0000
4,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",-0.7579
5,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.7906
6,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.0000
7,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.2960
8,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.3182
9,"[2018-08-06, 2018-06-06, 2018-08-28, 2018-08-2...",0.5859


## CNN

In [None]:
from cnn import api_key

In [None]:
cnn_url = "https://newsapi.org/v2/everything?sources=cnn&apiKey=fc8819e4d99247a589aa9a4ca8f649c8"

In [None]:
params = {
    "q" : "apple",
    "begin_data" : "20180410",
    "end_data" : "20180829",
    "date":"YYYY-MM-DD"
}

In [None]:
cnn_data = req.get(cnn_url, params=params).json()

In [None]:
cnn_data

In [None]:
cnn_data["articles"][0]["description"]

In [None]:
new_date=dateExampl.split("T")[0]

In [None]:
new_date

In [None]:
publishedAt = []
description = []
n = 0
for i in cnn_data:
    try:
        publishedAt.append(cnn_data["articles"][0]["publishedAt"])
        description.append(cnn_data["articles"][0]['description'])
        n= n+1
    except:
        pass
    


In [None]:
description

In [None]:
description = []
dates = dates


for date in dates:
    description.append(cnn_data["articles"][0]["description"])

In [None]:
sentences = description
vader_scores_cnn = []

for sentence in sentences:
    snt = analyzer.polarity_scores(sentence)
    vader_scores_cnn.append(snt["compound"])

In [None]:
CNN_Data = pd.DataFrame({"Date": dates,
                         "Cnn_VaderScore": vader_scores_cnn})

In [None]:
CNN_Data

## FOX NEWS

In [None]:
from fox import api_key

In [None]:
fox_url = "https://newsapi.org/v2/everything?sources=fox-news&apiKey=fc8819e4d99247a589aa9a4ca8f649c8"

In [None]:
params = {
    "q" : "apple",
    "begin_data" : "20180410",
    "end_data" : "20180829",
    "date":"YYYY-MM-DD"
}

In [None]:
fox_data=req.get(fox_url, params=params).json()

In [None]:
print(json.dumps(fox_data, indent=4, sort_keys=True))

In [None]:
fox_data["articles"][0]["description"]

In [None]:
description = []
dates = dates


for date in dates:
    description.append(fox_data["articles"][0]["description"])

In [None]:
sentences = description
vader_scores_fox = []

for sentence in sentences:
    snt = analyzer.polarity_scores(sentence)
    vader_scores_fox.append(snt["compound"])

In [None]:
Fox_Data = pd.DataFrame({"Date": dates,
                         "Fox_VaderScore": vader_scores_fox})

In [None]:
Fox_Data

## THE WALL STREET JOURNAL API

In [None]:
from wsj import api_key

In [None]:
wsj_url = "https://newsapi.org/v2/everything?sources=the-wall-street-journal&apiKey=fc8819e4d99247a589aa9a4ca8f649c8"

In [None]:
params = {
    #"api_key" : api_key,
    "q" : "apple",
    "begin_data" : "20180410",
    "end_data" : "20180829",
    "date":"YYYY-MM-DD"
    #"page":100
}
    

In [None]:
wsj_url

In [None]:
analyzer = SentimentIntensityAnalyzer()

In [None]:
wsj_data=req.get(wsj_url, params=params).json()

In [None]:
print(json.dumps(wsj_data, indent=4, sort_keys=True))

In [None]:
wsj_data["articles"][0]["description"]

In [None]:
description = []
dates = dates


for date in dates:
    description.append(wsj_data["articles"][0]["description"])

In [None]:
description

In [None]:
sentences = description
vader_scores_wsj = []

for sentence in sentences:
    snt = analyzer.polarity_scores(sentence)
    vader_scores_wsj.append(snt["compound"])

In [None]:
Wsj_Data = pd.DataFrame({"Date": dates,
                         "Wsj_VaderScore": vader_scores_wsj})

In [None]:
Wsj_Data

## Merged all data

In [None]:
# merged Nyt_Data and CNN_Data on Date
Nyt_CNN = pd.merge(Nyt_Data, CNN_Data, on="Date", how="inner")
# merged Fox_Data and Wsj_Data on Date
Fox_Wsj = pd.merge(Fox_Data, Wsj_Data, on="Date", how="inner")
# merge again the above two merged data (Nyt_CNN and Fox_Wsj)
Nyt_CNN_Fox_Wsj = pd.merge(Nyt_CNN, Fox_Wsj, on="Date", how="inner")

In [None]:
Nyt_CNN_Fox_Wsj

In [None]:
#Lastly this merged with stock data
STOCK_DATA = pd.merge(Stock_Data, Nyt_CNN_Fox_Wsj, on="Date", how="inner")

In [None]:
STOCK_DATA