In [9]:
import pandas as pd
import time
from datetime import datetime as dt, date, timedelta
from polygon import RESTClient
from secret import API_KEY

In [10]:
# Getting API key adn client
api_key = API_KEY
client = RESTClient(api_key)

In [11]:
# Setting Variables
ticker = 'AAPL'
multiplier = 1
timespan = 'minute'
date_ranges = [
    ('2023-01-01', '2023-03-01'),
    ('2023-03-02', '2023-05-01'),
    ('2023-05-02', '2023-07-01'),
    ('2023-07-02', '2023-09-01'),
    ('2023-09-02', '2023-11-01'),
    ('2023-11-02', '2024-01-01'),
    ('2024-01-02', '2024-03-01'),
    ('2024-03-02', '2024-05-01'),
    ('2024-05-02', '2024-07-01'),
    ('2024-07-02', '2024-09-01'),
    ('2024-09-02', '2024-11-01'),
    ('2024-11-02', '2024-12-31')
]
adjusted = True
limit= 50000

In [12]:
# Initializing list for data
data_list = []

# Looping through date ranges
for from_date, to_date in date_ranges:
    # Waiting so no error
    time.sleep(12)
    # Getting data
    agg = client.get_aggs(
        ticker=ticker,
        multiplier=multiplier,
        timespan=timespan,
        from_=from_date,
        to=to_date,
        adjusted=adjusted,
        limit=limit
    )

    # Adding data to list
    data_list.extend(agg)

# Creating dataframe
data = pd.DataFrame(data_list)

In [13]:
# Converting timestamps to datetime
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms', utc=True)

# Separating date, time, hour, and minutes
data['date'] = data['timestamp'].dt.date
data['time'] = data['timestamp'].dt.time
data['hour'] = data['timestamp'].dt.hour
data['minute'] = data['timestamp'].dt.minute

# Setting market open/close times
market_open = dt.strptime('09:30:00', '%H:%M:%S').time()
market_close = dt.strptime('16:00:00', '%H:%M:%S').time()

# Filtering extended hour data out
data_market_hours = data[
    (data['time'] >= market_open) &
    (data['time'] <= market_close)
].copy()

# REmoving duplicates, dropping redundant columns, reseting index
data_market_hours = data_market_hours[data_market_hours.duplicated() == False]
data_market_hours.drop(['otc', 'hour', 'minute'], axis=1, inplace=True)
data_market_hours.reset_index(drop=True, inplace=True)

data = data_market_hours.copy()

In [14]:
# Exporting to csv
data.to_csv('raw_data.csv')