In [None]:
import requests
import pandas as pd
from time import sleep
import datetime
import os
from dotenv import load_dotenv
# Create a .env file and add your keys
load_dotenv()


equities = ['XOM','CVX', 'SHEL', 'COP', 'BP', 'PBR']
more_equities = ['WTI']
crude_oil = []#['CL=F', 'BZ=F'] # wti, brent, 
random = ["TSLA", "AAPL"]
tickers = equities + more_equities + crude_oil


def convert_tz(data, time_zone='US/Eastern'):
    # print(data.index)
    t = data.index.to_series(keep_tz=True)
    t = t.dt.tz_convert(time_zone)
    data.index = t
    return data

def write_df(data, out_file):
    # Save flatten
    og_cols = data.columns.copy()
    data.columns = data.columns.to_flat_index()

    data.columns = pd.Index(["_".join(col) for col in data.columns])

    if os.path.exists(out_file):
        # Move current file to data/old
        data_old = "data/old"
        if not os.path.exists(data_old):
            os.makedirs(data_old)
        new_file_name = f"{out_file[:out_file.rfind('.')].replace('/','_')}_{datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S')}{out_file[out_file.rfind('.'):]}"
        os.rename(out_file, os.path.join(data_old, new_file_name))

    data.to_csv(out_file)
    data.columns = og_cols

def read_data(out_file="realdata.csv"):
    data = pd.read_csv(out_file, index_col=0)

    converter = lambda col: tuple(col.split("_"))
    # ast.literal_eval
    data.columns = data.columns.map(converter)
    data.index = pd.to_datetime(data.index)
    return data

##### Get Data From Data Provider

In [None]:
# Alpha Vantage API Key https://www.alphavantage.co/support/#api-key
ALPHA_VANTAGE_API_KEY = os.environ.get("ALPHA_VANTAGE_API_KEY")

def add_et_tz(data):
    """Add ET timezone to timezone-unlabled df"""
    t = pd.to_datetime(data.index).to_series(keep_tz=True)
    data.index = t.dt.tz_localize('US/Eastern')
    return data

def csv_str_to_df(decoded_content, ticker):
    """CSV string to df"""
    lines = decoded_content.splitlines()
    data = pd.DataFrame([row.split(',') for row in lines[1:]], 
                    columns=["date", *lines[0].split(',')[1:]])

    data = data.reset_index(drop=True).set_index('date')
    data.index = pd.to_datetime(data.index)


    # Add timezome -- we assume it is sent in with unlabled eastern time 
    if data.index.to_series(keep_tz=True).dt.tz is None:
        print("CONVERTING TIME")
        data = add_et_tz(data)
        data = convert_tz(data, time_zone="UTC")
    data = pd.concat([data], axis=1, keys=[ticker])
    return data

def alpha_vantage_get_ticker_data(ticker, time="1min", year=1, month=1):
    """Function to get (ticker, year, month) data using alpha vantage's time series intraday extended API"""
    CSV_URL = f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol={ticker}&interval={time}&slice=year{year}month{month}&apikey={ALPHA_VANTAGE_API_KEY}"

    while True:
        with requests.Session() as s:
            download = s.get(CSV_URL)
            decoded_content = download.content.decode('utf-8')
            print(f"ticker: {ticker}, y{year} m{month}; response length: {len(decoded_content)}")

            if len(decoded_content) == 236:
                # API too many requests
                sleep(60)
            elif len(decoded_content) <= 243:
                # Token doesn't exist or something
                print(f"Error getting {ticker}, y{year}, m{month}. We are skipping")
                print(decoded_content)
                return None
            else:
                return csv_str_to_df(decoded_content, ticker)


def use_alpha_vantage(tickers, time= "1min", out_file="realdata.csv"):
    """Function to get multiple full tickers data using alpha vantage's time series intraday extended API"""
    dfs = []
    for ticker in tickers:
        t_dfs = []
        for year in range(1,3):
            for month in range(1,13):
                df_temp = alpha_vantage_get_ticker_data(ticker, time=time, year=year, month=month)
                if df_temp is not None:
                    t_dfs.append(df_temp)

        if len(t_dfs):
            dfs.append(pd.concat(t_dfs, axis=0))
        else:
            print(f"Skipped {ticker}.")
    df = pd.concat(dfs, axis=1, sort=True)
    df.index.rename('date', inplace=True)

    write_df(df, out_file)

    return df

In [None]:
# Alpaca
APCA_API_BASE_URL = os.environ.get("APCA_API_BASE_URL")
APCA_API_KEY_ID = os.environ.get("APCA_API_KEY_ID")
APCA_API_SECRET_KEY = os.environ.get("APCA_API_SECRET_KEY")

import alpaca_trade_api as tradeapi

def use_alpaca(tickers, alpaca, timeframe="1Minute", out_file="realdata_alp.csv", start="2017-01-01"):
    dfs = []
    for ticker in tickers:
        print("Getting", ticker)
        df = alpaca.get_bars(ticker, timeframe, start).df
        print("Recieved", ticker)
        df.index.name = 'date'
        df = pd.concat([df], axis=1, keys=[ticker])
        dfs.append(df)
    df = pd.concat(dfs, axis=1, sort=True)
    df.index.rename('date', inplace=True)

    if out_file is not None:
        write_df(df, out_file)

    return df

In [None]:
# Polygon
POLYGON_API_KEY = os.environ.get("POLYGON_API_KEY")

In [None]:
# Alpaca
alpaca = tradeapi.REST(key_id=APCA_API_KEY_ID, secret_key=APCA_API_SECRET_KEY, base_url=APCA_API_BASE_URL)
account = alpaca.get_account()
print(account.status)

extended_tickers = ["TTE", "EQNR", "EOG", "ENB", "SLB"]
df = use_alpaca(extended_tickers, alpaca, timeframe="1Minute", out_file="realdata_alp.csv")
# df = use_alpaca([], alpaca, out_file=None, start="2022-11-09    ")


In [None]:
# Alpha Vantage
df = use_alpha_vantage(tickers, out_file="realdata.csv")

In [None]:
# ticker = "XOM"
# year = 1
# month = 1

# # Minute
# with requests.Session() as s:
#     download = s.get(f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol={'CVX'}&interval={'1min'}&slice=year{'1'}month{'1'}&apikey={ALPHA_VANTAGE_API_KEY}")
#     decoded_content = download.content.decode('utf-8')

#     print(decoded_content)

# # Daily
# with requests.Session() as s:
#     ticker = "XOM"
#     download = s.get(f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/minute/2020-07-22/2020-07-22?adjusted=true&sort=asc&limit=5000&apiKey={POLYGON_API_KEY}")
#     decoded_content = download.content.decode('utf-8')

#     print(decoded_content)

# alpha_vantage_get_ticker_data("CVX")


# symbols = "XOM"
# timeframe = "1Minute"
# start = "2017-01-01"
# end = "2017-01-01"
# data = alpaca.get_bars(symbols, timeframe, start).df
# print(data.columns)
# df_new.to_csv("tsla_aapl.csv")
# write_df(df, out_file="realdata_alp.csv")
# df
df.head()

##### Read Data From All-Data CSV (Multi Index Columns)

In [None]:
df_all = read_data("realdata.csv")
# df = read_data("tsla_aapl.csv")
print(df_all.head())
print(df.head())
print(df_all.columns)
print(df.columns)

##### Concatenate two datasets

In [None]:
if not df.columns.equals(df_all.columns):
    df_new = write_df(pd.concat([df_all,df], axis=1), "realdata.csv")