### ***Grabbing Raw Historical Data from Alpaca API***

There's a pull limit for each request to the API, but this loop takes care of that and saves the all the history to a csv file. Just change the symbol, timeframe (or interval), start date, and end date. DONT CHANGE THE LIMIT: It wouldn't be a big deal, but 10000 is the max. Changing it will only make the download slower. 

CHANGE THE DESIRED DESTINATION FOLDER IN 'folder' VARIABLE @ BOTTOM OF CELL

-- 

Alpaca Reference Data Request URL 

https://docs.alpaca.markets/reference/cryptobars-1 


In [None]:
import os
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone

"""
Use these varibles to set the parameters for the data you want to download.
DONT CHANGE THE LIMIT
"""
symbol = "ETH/USD"
timeframe = "5Min"
limit = 10000
start_time = datetime(2020, 3, 22, tzinfo=timezone.utc)
end_time = datetime(2025, 3, 22, tzinfo=timezone.utc)  # your desired range
url = "https://data.alpaca.markets/v1beta3/crypto/us/bars?"

start_time_total = start_time  # Save the original start time

all_data = []

while start_time < end_time:
    params = {
        "symbols": symbol,
        "start": start_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
        "limit": limit,
        "timeframe": timeframe,
    }
    headers = {"accept": "application/json"}

    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    bars = data["bars"].get(symbol)
    if not bars:
        break
    
    all_data.extend(bars)

    # Track progress
    total_duration = (end_time - start_time_total).total_seconds()
    elapsed = (start_time - start_time_total).total_seconds()
    progress = (elapsed / total_duration) * 100
    print(f"Progress: {progress:.2f}%")

    # Get the timestamp of the last bar and add 1 minute to avoid duplicates
    last_timestamp = pd.to_datetime(bars[-1]["t"])
    start_time = last_timestamp + timedelta(minutes=5)


# Convert to properly formatted DataFrame
df = pd.DataFrame(all_data)
df["t"] = pd.to_datetime(df["t"])
df.set_index("t", inplace=True)
df.rename(columns={"o": "open", "h": "high", "l": "low", "c": "close", "v": "volume", "n": "trade_count", "vw": "vwap"}, inplace=True)

# Save to CSV
folder = "C:\\Users\\braed\\quant-strategies-hub\\momentum-altdata-crypto"
safe_symbol = symbol.replace("/", "_")
filename = f"{safe_symbol}_{timeframe}.csv"
full_path = os.path.join(folder, filename)
df.to_csv(full_path)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Progress: 0.00%
Progress: 16.03%
Progress: 16.42%
Progress: 16.81%
Progress: 17.20%
Progress: 17.59%
Progress: 17.98%
Progress: 18.37%
Progress: 18.75%
Progress: 19.14%
Progress: 19.53%
Progress: 19.91%
Progress: 20.30%
Progress: 20.69%
Progress: 21.07%
Progress: 21.46%
Progress: 21.84%
Progress: 22.22%
Progress: 22.60%
Progress: 22.98%
Progress: 23.36%
Progress: 23.74%
Progress: 24.12%
Progress: 24.50%
Progress: 24.88%
Progress: 25.28%
Progress: 25.66%
Progress: 26.04%
Progress: 26.42%
Progress: 26.80%
Progress: 27.18%
Progress: 27.57%
Progress: 27.95%
Progress: 28.33%
Progress: 28.71%
Progress: 29.09%
Progress: 29.47%
Progress: 29.85%
Progress: 30.23%
Progress: 30.61%
Progress: 30.99%
Progress: 31.38%
Progress: 31.76%
Progress: 32.14%
Progress: 32.52%
Progress: 32.90%
Progress: 33.28%
Progress: 33.66%
Progress: 34.04%
Progress: 34.42%
Progress: 34.80%
Progress: 35.19%
Progress: 35.57%
Progress: 35.95%
Progress: 36.34%
Progress: 36.72%
Progress: 37.10%
Progress: 37.48%
Progress: 37.87