In [None]:
import requests
import pandas as pd
from time import sleep
import os
from utils.ipynb_helpers import read_data, write_df, convert_tz
from dotenv import load_dotenv
# Create a .env file and add your keys
load_dotenv()

# Location to save raw data from data providers
DATA_RAW = "data/raw"


equities = ['XOM', 'CVX', 'COP', 'BP', 'PBR']
more_equities = ['WTI']
crude_oil = []#['CL=F', 'BZ=F'] # wti, brent, 
random = ["TSLA", "AAPL"]
tickers = equities + more_equities + crude_oil


##### Get Data From Data Provider

In [None]:
# Alpha Vantage API Key https://www.alphavantage.co/support/#api-key
ALPHA_VANTAGE_API_KEY = os.environ.get("ALPHA_VANTAGE_API_KEY")

def add_et_tz(data):
    """Add ET timezone to timezone-unlabled df"""
    t = pd.to_datetime(data.index).to_series(keep_tz=True)
    data.index = t.dt.tz_localize('US/Eastern')
    return data

def csv_str_to_df(decoded_content, ticker):
    """CSV string to df"""
    lines = decoded_content.splitlines()
    data = pd.DataFrame([row.split(',') for row in lines[1:]], 
                    columns=["date", *lines[0].split(',')[1:]])

    data = data.reset_index(drop=True).set_index('date')
    data.index = pd.to_datetime(data.index)


    # Add timezome -- we assume it is sent in with unlabled eastern time 
    if data.index.to_series(keep_tz=True).dt.tz is None:
        print("CONVERTING TIME")
        data = add_et_tz(data)
        data = convert_tz(data, time_zone="UTC")
    data = pd.concat([data], axis=1, keys=[ticker])
    return data

def alpha_vantage_get_ticker_data(ticker, time="1min", year=1, month=1):
    """Function to get (ticker, year, month) data using alpha vantage's time series intraday extended API"""
    CSV_URL = f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol={ticker}&interval={time}&slice=year{year}month{month}&apikey={ALPHA_VANTAGE_API_KEY}"

    while True:
        with requests.Session() as s:
            download = s.get(CSV_URL)
            decoded_content = download.content.decode('utf-8')
            print(f"ticker: {ticker}, y{year} m{month}; response length: {len(decoded_content)}")

            if len(decoded_content) == 236:
                # API too many requests
                sleep(60)
            elif len(decoded_content) <= 243:
                # Token doesn't exist or something
                print(f"Error getting {ticker}, y{year}, m{month}. We are skipping")
                print(decoded_content)
                return None
            else:
                return csv_str_to_df(decoded_content, ticker)


def use_alpha_vantage(tickers, out_file, time="1min"):
    """Function to get multiple full tickers data using alpha vantage's time series intraday extended API"""
    dfs = []
    for ticker in tickers:
        t_dfs = []
        for year in range(1,3):
            for month in range(1,13):
                df_temp = alpha_vantage_get_ticker_data(ticker, time=time, year=year, month=month)
                if df_temp is not None:
                    t_dfs.append(df_temp)

        if len(t_dfs):
            dfs.append(pd.concat(t_dfs, axis=0))
        else:
            print(f"Skipped {ticker}.")
    df = pd.concat(dfs, axis=1, sort=True)
    df.index.rename('date', inplace=True)

    write_df(df, out_file)

    return df

In [None]:
# Alpaca
APCA_API_BASE_URL = os.environ.get("APCA_API_BASE_URL")
APCA_API_KEY_ID = os.environ.get("APCA_API_KEY_ID")
APCA_API_SECRET_KEY = os.environ.get("APCA_API_SECRET_KEY")

import alpaca_trade_api as tradeapi

def use_alpaca(tickers, alpaca, out_file, timeframe="1Minute", start="2017-01-01"):
    dfs = []
    for ticker in tickers:
        print("Getting", ticker)
        df = alpaca.get_bars(ticker, timeframe, start).df
        print("Recieved", ticker)
        df.index.name = 'date'
        df = pd.concat([df], axis=1, keys=[ticker])
        dfs.append(df)
    df = pd.concat(dfs, axis=1, sort=True)
    df.index.rename('date', inplace=True)

    if out_file is not None:
        write_df(df, out_file)

    return df

In [None]:
# Polygon
POLYGON_API_KEY = os.environ.get("POLYGON_API_KEY")

In [None]:
# Alpha Vantage
df = use_alpha_vantage(tickers, os.path.join(DATA_RAW, "realdata.csv"))

In [None]:
# Alpaca
alpaca = tradeapi.REST(key_id=APCA_API_KEY_ID, secret_key=APCA_API_SECRET_KEY, base_url=APCA_API_BASE_URL)
account = alpaca.get_account()
print(account.status)

extended_tickers = ["TTE", "EQNR", "EOG", "ENB", "SLB"]
tickers = tickers + extended_tickers + random
df = use_alpaca(tickers, alpaca, os.path.join(DATA_RAW, "realdata_alp_1h.csv"), timeframe="1Hour")

In [None]:
df.head()

## Extras

##### Read Data From All-Data CSV (Multi Index Columns)

In [None]:
df_all = read_data(os.path.join(DATA_RAW, "realdata.csv"))
# df = read_data("tsla_aapl.csv")
print(df_all.head())
print(df.head())
print(df_all.columns)
print(df.columns)

##### Concatenate two datasets

In [None]:
run = False
if run and not df.columns.equals(df_all.columns):
    df_new = write_df(pd.concat([df_all,df], axis=1), os.path.join(DATA_RAW, "realdata.csv"))