In [1]:
import pandas as pd
import numpy as np

from dotenv import load_dotenv
import os
from pathlib import Path
import requests
import sys

sys.path.append('../') # Change the python path at runtime
from src.utils import path as path_yq


In [2]:
load_dotenv()
cur_dir = Path.cwd()

POLYGON_API_KEY = os.environ.get("POLYGON_API_KEY")



# Fetch Tick Data
- Add feature to pull from data instead of fetching

In [18]:
# TODO: Scale this up to n years
ticker = "NVDA"
max_limit = 50000
start_date = "2000-01-01"
end_date = "2024-12-31"

api_url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit={max_limit}&apiKey={POLYGON_API_KEY}"

str = "N" # Reset
str = input("Confirm?")

if str == "Y":
    resp = requests.get(api_url)
    print(f"Request made.")

Request made.


In [19]:
resp

<Response [200]>

In [20]:
cols = ["Date", "Open", "High", "Low", "Close", "Volume", "VWAP", "Transactions"]
if resp.status_code == 200:
    dict_list = resp.json().get('results')
    df = pd.DataFrame(dict_list)

    column_map = {
        't': 'Timestamp',
        'o': 'Open',
        'h': 'High',
        'l': 'Low',
        'c': 'Close', 
        'n': 'Transactions', # Number of trades (market activity)
        'v': 'Volume', # Number of shares traded (intensity of the activity)
        'vw': 'VWAP'
    }

    df.rename(columns=column_map, inplace=True)

    df['Datetime'] = pd.to_datetime(df['Timestamp'], unit='ms')
    df['Date'] = df['Datetime'].dt.normalize() # Remove the time and return date object

    df = df[cols]
    df.set_index(keys="Date", inplace=True)
else:
    print(f"Error fetching data: {resp.status_code}, {resp.text}")

In [21]:
df


Unnamed: 0_level_0,Open,High,Low,Close,Volume,VWAP,Transactions
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-04-04,267.28,275.5800,266.1300,273.60,39770782.0,272.5287,499185
2022-04-05,272.54,273.1900,258.2000,259.31,43654352.0,262.9166,639993
2022-04-06,249.34,253.0000,240.0300,244.07,70076148.0,245.2649,988183
2022-04-07,244.41,247.2200,234.7800,242.08,55799236.0,241.5490,714654
2022-04-08,239.17,239.2300,230.6201,231.19,52478064.0,233.2497,671401
...,...,...,...,...,...,...,...
2024-03-22,911.41,947.7799,908.3401,942.89,58641936.0,935.1787,1161163
2024-03-25,939.41,967.6599,935.1000,950.02,55204733.0,954.4802,1113327
2024-03-26,958.51,963.7500,925.0200,925.61,51164758.0,944.5258,984593
2024-03-27,931.12,932.4000,891.2300,902.50,58604623.0,903.2442,1189234


In [23]:
root_dir = path_yq.get_root_dir(cur_dir=cur_dir)
csv_path = Path.joinpath(root_dir, "data", f"{ticker}_{start_date}_{end_date}.csv")

# Get df
try:
    df.to_csv(csv_path)
except NameError:
    print(f"df not defined, trying to fetch from csv")
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,VWAP,Transactions
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-04-04,267.28,275.5800,266.1300,273.60,39770782.0,272.5287,499185
2022-04-05,272.54,273.1900,258.2000,259.31,43654352.0,262.9166,639993
2022-04-06,249.34,253.0000,240.0300,244.07,70076148.0,245.2649,988183
2022-04-07,244.41,247.2200,234.7800,242.08,55799236.0,241.5490,714654
2022-04-08,239.17,239.2300,230.6201,231.19,52478064.0,233.2497,671401
...,...,...,...,...,...,...,...
2024-03-22,911.41,947.7799,908.3401,942.89,58641936.0,935.1787,1161163
2024-03-25,939.41,967.6599,935.1000,950.02,55204733.0,954.4802,1113327
2024-03-26,958.51,963.7500,925.0200,925.61,51164758.0,944.5258,984593
2024-03-27,931.12,932.4000,891.2300,902.50,58604623.0,903.2442,1189234
