# Retrieve most recent EOD positions from Quote Media and store in h5 format

In [21]:
import pandas as pd
import numpy as np
import requests
import h5py
from datetime import datetime
import io
import zipfile

# Set up API key and base URL
API_KEY = "tw2sxkKZo_y1UvMcnSux"
BASE_URL = "https://data.nasdaq.com/api/v3/datatables/QUOTEMEDIA"

# Function to download data from API
def get_data(endpoint, params):
    url = f"{BASE_URL}/{endpoint}"
    params["api_key"] = API_KEY
    print(f"sending request {url}")
    response = requests.get(url, params=params)
    print(f"response {response} {response.json}")
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API request failed with status code {response.status_code}")

# Function to download and process ZIP file
def download_and_process_zip(url):
    print(f"Downloading data from {url}")
    response = requests.get(url)
    if response.status_code == 200:
        z = zipfile.ZipFile(io.BytesIO(response.content))
        csv_filename = z.namelist()[0]  # Assume the first file in the ZIP is the CSV we want
        with z.open(csv_filename) as f:
            df = pd.read_csv(f)
        return df
    else:
        raise Exception(f"Failed to download ZIP file. Status code: {response.status_code}")

# Download tickers
print("Downloading tickers...")
tickers_response = get_data("TICKERS", {"qopts.export": "true"})
tickers_download_link = tickers_response['datatable_bulk_download']['file']['link']
tickers_df = download_and_process_zip(tickers_download_link)

# Download adjusted EOD price data
print("Downloading adjusted EOD price data...")
start_date = "2000-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")

params = {
    "date.gte": start_date,
    "date.lte": end_date,
    "qopts.columns": "ticker,date,adj_open,adj_high,adj_low,adj_close,adj_volume",
    "qopts.export": "true"
}

price_response = get_data("PRICES", params)
price_download_link = price_response['datatable_bulk_download']['file']['link']
prices_df = download_and_process_zip(price_download_link)

# Convert date column to datetime
prices_df["date"] = pd.to_datetime(prices_df["date"])
prices_df.rename(columns={"adj_open":'open','adj_high':'high','adj_low':'low','adj_close':'close','adj_volume':'volume'},inplace=True)
# Store data in H5 format
print("Storing data in H5 format...")
with h5py.File("quotemedia_eod_data.h5", "w") as f:
    # Store tickers data
    tickers_group = f.create_group("tickers")
    for column in tickers_df.columns:
        if tickers_df[column].dtype == 'object':
            # Convert string columns to ASCII
            ascii_values = [s.encode('ascii', 'ignore') if isinstance(s, str) else b'' for s in tickers_df[column].values]
            tickers_group.create_dataset(column, data=ascii_values, dtype=h5py.special_dtype(vlen=bytes))
        else:
            tickers_group.create_dataset(column, data=tickers_df[column].values)
    
    # Store prices data
    prices_group = f.create_group("prices")
    for column in prices_df.columns:
        if column == "date":
            prices_group.create_dataset(column, data=prices_df[column].astype(int))
        elif prices_df[column].dtype == 'object':
            # Convert string columns to ASCII
            ascii_values = [s.encode('ascii', 'ignore') if isinstance(s, str) else b'' for s in prices_df[column].values]
            prices_group.create_dataset(column, data=ascii_values, dtype=h5py.special_dtype(vlen=bytes))
        else:
            prices_group.create_dataset(column, data=prices_df[column].values)

print("Data successfully stored in quotemedia_eod_data.h5")


Downloading tickers...
sending request https://data.nasdaq.com/api/v3/datatables/QUOTEMEDIA/TICKERS
response <Response [200]> <bound method Response.json of <Response [200]>>
Downloading data from https://aws-gis-link-pro-us-east-1-datahub.s3.amazonaws.com/export/QUOTEMEDIA/TICKERS/QUOTEMEDIA_TICKERS_6d75499fefd916e54334b292986eafcc.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAX5EW3SB5DJBNMMH2%2F20241018%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241018T181745Z&X-Amz-Expires=1800&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEOP%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIHeIBoHP%2F%2BM8rtTgXYKIPL3hWLHokN34Jvdt1DvtyeJkAiEAo9rg%2BLsUcKNocn36ymnCb%2BXRleninoYgxe11kdol%2F0gqjAUISxACGgw1NDM2Mjk3NDIyMDIiDDb%2BhOqCjsPWr50TbCrpBPnKk%2FljdzLv81YG6AOdrpFRffvvMgO9TqvdF854jhJQxPxLrDxFXVp5riGwf0roXmx2FP3S0%2BUW%2F4bjbnha9wecgmX%2F1h3nEvT1AnXDSF8Be8DKZY0opppm7pgqZGJ6p9ZLlussCMexyJPQ1K02kjh58nmShVwYKF8WbioOJXcd5zZZrglh0DV2Q2UqY6rGMco%2FcKTQ1o3T8YAT%2BNH8Uw3xrBd%2FdBF%2BlANXGquK3Iso9%2

In [17]:
prices_df

Unnamed: 0,ticker,date,adj_open,adj_high,adj_low,adj_close,adj_volume
0,JTKWY,2022-03-11,6.1700,7.3200,5.7900,6.7200,9440097.0
1,JTKWY,2022-03-10,6.1600,6.1750,5.9350,6.0700,2261623.0
2,FG_1,2020-06-01,8.1000,8.3900,8.1000,8.3900,3086317.0
3,FLWS,2022-03-09,14.5700,14.9588,14.4100,14.4500,662492.0
4,RENW_,2020-01-29,21.9768,21.9900,21.9700,21.9900,319.0
...,...,...,...,...,...,...,...
44311754,NATO,2024-10-15,25.6000,25.6000,25.5800,25.5800,123.0
44311755,RDACU,2024-10-15,10.0100,10.0200,10.0100,10.0200,13427.0
44311756,STFS,2024-10-15,3.5400,3.5400,2.9900,3.0200,175495.0
44311757,UPB,2024-10-15,23.0600,23.6500,21.9500,23.0000,242502.0


In [16]:
tickers_df

Unnamed: 0,ticker,exchange,company_name
0,BIOA_WS,NYSE,BioAmber Inc. Warrant expiring May 9 2017
1,CDE_WS,NYSE,Coeur D'Alene Mines Corporation
2,FINQ,NASDAQ,Purefunds Solactive FinTech ETF
3,IMED,NASDAQ,PureFunds ETFx HealthTech ETF
4,CVETV,NASDAQ,Covetrus Inc.
...,...,...,...
22977,ATAKR,NASDAQ,Aurora Technology Acquisition Corp. Rights
22978,MURF,NASDAQ,Conduit Pharmaceuticals Inc Com
22979,RACY,NASDAQ,Relativity Acquisition Corp.
22980,ACAX,NASDAQ,Alset Capital Acquisition Corp.


In [5]:
#/home/morgan/repos/edge-seeker/.zipline/custom_data/quotemedia_eod_data.h5
import pandas as pd
from pathlib import Path
import warnings
import numpy as np
from tqdm import tqdm
import logging
import os
zipline_root = os.path.expanduser('~/repos/edge-seeker/.zipline')
custom_data_path = Path(zipline_root, 'custom_data')
print(f'custom_path {custom_data_path}')
pd.read_hdf(custom_data_path / 'quotemedia_eod_data.h5')

custom_path /home/morgan/repos/edge-seeker/.zipline/custom_data


ValueError: Dataset(s) incompatible with Pandas data types, not table, or no datasets found in HDF5 file.