In [25]:
import pandas as pd
from datetime import datetime, timedelta
import holidays
import requests
import logging
import json
import time
from sec_api import QueryApi
from time import sleep
from collections import Counter

In [None]:
# Loading the dataset
file_path = "backtest_data_step4.parquet"
data = pd.read_parquet(file_path)

# News calendar -  06/03/2020 - 17/02/2025

## Macroevents
- ISM Services PMI, Debt Ceiling / Government shutdown Deadlines omitted.

### 1. FOMC meetings and IR decisions - From website https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm

In [28]:
# FOMC Meetings data
fomc_meetings = [
    {"Event_Type": "FOMC Meeting", "Date": "2020-03-15", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-03-19", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-03-23", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-03-31", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-04-29", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-06-10", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-07-29", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-09-16", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-11-05", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2020-12-16", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-01-27", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-03-17", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-04-28", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-06-16", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-07-28", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-09-22", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-11-03", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2021-12-15", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-01-26", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-03-16", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-05-04", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-06-15", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-07-27", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-09-21", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-11-02", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2022-12-14", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-02-01", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-03-22", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-05-03", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-06-14", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-07-26", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-09-20", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-11-01", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2023-12-13", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-01-31", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-03-20", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-05-01", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-06-12", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-07-31", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-09-18", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-11-07", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2024-12-18", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "FOMC Meeting", "Date": "2025-01-29", "Time": "14:00:00", "Ticker": None},
]

### 2. Federal reserve chairman speeches - https://www.federalreserve.gov/newsevents/speeches.htm#

In [29]:
# Chairman Speeches data
speeches = [
    {"Event_Type": "Chairman Speech", "Date": "2020-04-09", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2020-05-13", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2020-05-21", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2020-06-19", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2020-08-27", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2020-10-06", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-02-10", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-03-18", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-05-03", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-08-17", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-08-27", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-09-24", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-11-08", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-11-09", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2021-11-29", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2022-03-21", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2022-05-24", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2022-06-17", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2022-08-26", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2022-09-28", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2022-11-30", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-01-10", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-06-29", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-08-25", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-09-28", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-10-19", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-10-25", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-11-08", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-11-09", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2023-12-01", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2024-04-03", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2024-05-19", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2024-08-23", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2024-09-26", "Time": "10:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2024-09-30", "Time": "14:00:00", "Ticker": None},
    {"Event_Type": "Chairman Speech", "Date": "2024-11-14", "Time": "14:00:00", "Ticker": None},
]

### 3. U.S. Election Days - usa.gov/election-day, https://en.wikipedia.org/wiki/List_of_elections_in_the_United_States, https://www.britannica.com/topic/list-of-United-States-presidential-elections-2080835

In [30]:
elections = [{
        "Event_Type": "U.S. Presidential Election",
        "Date": "2020-11-03",
        "Time": "19:00:00",
        "Ticker": None,
    },
    {
        "Event_Type": "U.S. Midterm Election",
        "Date": "2022-11-08",
        "Time": "19:00:00",
        "Ticker": None,
    },
    {
        "Event_Type": "U.S. Presidential Election",
        "Date": "2024-11-05",
        "Time": "19:00:00",
        "Ticker": None,
    }
]

### 4. Non-Farm Payrolls, CPI/PPI Releases, GDP Reports, ISM Manufacturing PMI - FRED API 

In [None]:
# FRED API key
API_KEY = "#########################"
BASE_URL = "https://api.stlouisfed.org/fred/"

# logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [32]:
# FRED release IDs for high-impact events
RELEASE_IDS = {
    "Employment Situation": {"id": 50, "event_type": "Non-Farm Payrolls", "time": "08:30:00"},
    "Consumer Price Index": {"id": 10, "event_type": "CPI Release", "time": "08:30:00"},
    "Gross Domestic Product": {"id": 53, "event_type": "GDP Report", "time": "08:30:00"}
}

In [None]:
def get_macro_events(start_date="2020-03-06", end_date="2025-02-15"):
    """Fetch high-impact macro events from FRED API and return as a single list of dictionaries."""
    events = []
    
    for release_name, info in RELEASE_IDS.items():
        release_id = info["id"]
        event_type = info["event_type"]
        event_time = info["time"]
        
        logger.info(f"Fetching release dates for {release_name} (ID: {release_id})")
        url = f"{BASE_URL}release/dates"
        params = {
            "api_key": API_KEY,
            "file_type": "json",
            "release_id": release_id,
            "realtime_start": start_date,
            "realtime_end": end_date,
            "limit": 10000,
            "sort_order": "asc",
            "include_release_dates_with_no_data": "true"
        }
        
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            releases = data.get("release_dates", [])
            
            for release in releases:
                event_date = release["date"]
                if start_date <= event_date <= end_date:
                    events.append({
                        "Event_Type": event_type,
                        "Date": event_date,
                        "Time": event_time,
                        "Ticker": None
                    })
        except requests.RequestException as e:
            logger.error(f"Error fetching release dates for release_id {release_id}: {e}")
            continue
    
    # Sorting events by date
    events = sorted(events, key=lambda x: x["Date"])
    
    # first 10 events and total count
    print("\nMacro Events List (first 10):")
    for event in events[:10]:
        print(event)
    print(f"\nTotal number of events: {len(events)}")
    
    # Saving to JSON
    with open("macro_events.json", "w") as f:
        json.dump(events, f, indent=4)
    logger.info("Macro events saved to macro_events.json")
    
    return events

if __name__ == "__main__":
    macro_events = get_macro_events()

2025-04-25 18:18:57,829 - INFO - Fetching release dates for Employment Situation (ID: 50)


2025-04-25 18:18:58,292 - INFO - Fetching release dates for Consumer Price Index (ID: 10)
2025-04-25 18:18:58,527 - INFO - Fetching release dates for Gross Domestic Product (ID: 53)
2025-04-25 18:18:58,788 - INFO - Macro events saved to macro_events.json



Macro Events List (first 10):
{'Event_Type': 'Non-Farm Payrolls', 'Date': '2020-03-06', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'CPI Release', 'Date': '2020-03-11', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'GDP Report', 'Date': '2020-03-26', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'Non-Farm Payrolls', 'Date': '2020-04-03', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'CPI Release', 'Date': '2020-04-10', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'GDP Report', 'Date': '2020-04-29', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'Non-Farm Payrolls', 'Date': '2020-05-08', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'Non-Farm Payrolls', 'Date': '2020-05-11', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'CPI Release', 'Date': '2020-05-12', 'Time': '08:30:00', 'Ticker': None}
{'Event_Type': 'GDP Report', 'Date': '2020-05-28', 'Time': '08:30:00', 'Ticker': None}

Total number of events: 195


### 5. ECB/BOE/BOJ Policy Decisions / Not meetings only decisions
- https://www.ecb.europa.eu/press/press_conference/visual-mps/2025/html/mopo_statement_explained_april.en.html
- https://www.bankofengland.co.uk/news/events
- https://www.boj.or.jp/en/mopo/mpmsche_minu/index.htm


In [34]:
ecb_boe_boj_policy_decisions = [
    # ECB Policy Decisions (2020-03-06 to 2025-02-15)
    {"Event_Type": "ECB Policy Decision", "Date": "2020-03-12", "Time": "12:45:00", "Ticker": None},  # COVID-19 measures
    {"Event_Type": "ECB Policy Decision", "Date": "2020-04-30", "Time": "11:45:00", "Ticker": None},  # PEPP expansion
    {"Event_Type": "ECB Policy Decision", "Date": "2020-06-04", "Time": "11:45:00", "Ticker": None},  # PEPP increased
    {"Event_Type": "ECB Policy Decision", "Date": "2020-07-16", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2020-09-10", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2020-10-29", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2020-12-10", "Time": "12:45:00", "Ticker": None},  # PEPP extended
    {"Event_Type": "ECB Policy Decision", "Date": "2021-01-21", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2021-03-11", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2021-04-22", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2021-06-10", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2021-07-22", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2021-09-09", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2021-10-28", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2021-12-16", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2022-02-03", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2022-03-10", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2022-04-14", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2022-06-09", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2022-07-21", "Time": "11:45:00", "Ticker": None},  # First rate hike
    {"Event_Type": "ECB Policy Decision", "Date": "2022-09-08", "Time": "11:45:00", "Ticker": None},  # 0.75% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2022-10-27", "Time": "11:45:00", "Ticker": None},  # 0.75% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2022-12-15", "Time": "12:45:00", "Ticker": None},  # 0.5% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2023-02-02", "Time": "12:45:00", "Ticker": None},  # 0.5% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2023-03-16", "Time": "12:45:00", "Ticker": None},  # 0.5% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2023-05-04", "Time": "11:45:00", "Ticker": None},  # 0.25% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2023-06-15", "Time": "11:45:00", "Ticker": None},  # 0.25% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2023-07-27", "Time": "11:45:00", "Ticker": None},  # 0.25% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2023-09-14", "Time": "11:45:00", "Ticker": None},  # 0.25% hike
    {"Event_Type": "ECB Policy Decision", "Date": "2023-10-26", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2023-12-14", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2024-01-25", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2024-03-07", "Time": "12:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2024-04-11", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2024-06-06", "Time": "11:45:00", "Ticker": None},  # 0.25% cut
    {"Event_Type": "ECB Policy Decision", "Date": "2024-07-18", "Time": "11:45:00", "Ticker": None},
    {"Event_Type": "ECB Policy Decision", "Date": "2024-09-12", "Time": "11:45:00", "Ticker": None},  # 0.25% cut
    {"Event_Type": "ECB Policy Decision", "Date": "2024-10-17", "Time": "11:45:00", "Ticker": None},  # 0.25% cut
    {"Event_Type": "ECB Policy Decision", "Date": "2024-12-12", "Time": "12:45:00", "Ticker": None},  # 0.25% cut
    {"Event_Type": "ECB Policy Decision", "Date": "2025-01-30", "Time": "12:45:00", "Ticker": None},  # 0.25% cut
    # BOE Policy Decisions (2020-03-06 to 2025-02-15)
    {"Event_Type": "BOE Policy Decision", "Date": "2020-03-11", "Time": "12:00:00", "Ticker": None},  # Emergency cut to 0.25%
    {"Event_Type": "BOE Policy Decision", "Date": "2020-03-26", "Time": "12:00:00", "Ticker": None},  # Cut to 0.1%, QE expansion
    {"Event_Type": "BOE Policy Decision", "Date": "2020-05-07", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2020-06-18", "Time": "12:00:00", "Ticker": None},  # QE increased
    {"Event_Type": "BOE Policy Decision", "Date": "2020-08-06", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2020-09-17", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2020-11-05", "Time": "12:00:00", "Ticker": None},  # QE increased
    {"Event_Type": "BOE Policy Decision", "Date": "2020-12-17", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-02-04", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-03-18", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-05-06", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-06-24", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-08-05", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-09-23", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-11-04", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2021-12-16", "Time": "12:00:00", "Ticker": None},  # Hike to 0.25%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-02-03", "Time": "12:00:00", "Ticker": None},  # Hike to 0.5%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-03-17", "Time": "12:00:00", "Ticker": None},  # Hike to 0.75%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-05-05", "Time": "12:00:00", "Ticker": None},  # Hike to 1%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-06-16", "Time": "12:00:00", "Ticker": None},  # Hike to 1.25%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-08-04", "Time": "12:00:00", "Ticker": None},  # Hike to 1.75%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-09-22", "Time": "12:00:00", "Ticker": None},  # Hike to 2.25%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-11-03", "Time": "12:00:00", "Ticker": None},  # Hike to 3%
    {"Event_Type": "BOE Policy Decision", "Date": "2022-12-15", "Time": "12:00:00", "Ticker": None},  # Hike to 3.5%
    {"Event_Type": "BOE Policy Decision", "Date": "2023-02-02", "Time": "12:00:00", "Ticker": None},  # Hike to 4%
    {"Event_Type": "BOE Policy Decision", "Date": "2023-03-23", "Time": "12:00:00", "Ticker": None},  # Hike to 4.25%
    {"Event_Type": "BOE Policy Decision", "Date": "2023-05-11", "Time": "12:00:00", "Ticker": None},  # Hike to 4.5%
    {"Event_Type": "BOE Policy Decision", "Date": "2023-06-22", "Time": "12:00:00", "Ticker": None},  # Hike to 5%
    {"Event_Type": "BOE Policy Decision", "Date": "2023-08-03", "Time": "12:00:00", "Ticker": None},  # Hike to 5.25%
    {"Event_Type": "BOE Policy Decision", "Date": "2023-09-21", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2023-11-02", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2023-12-14", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2024-02-01", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2024-03-21", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2024-05-09", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2024-06-20", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2024-08-01", "Time": "12:00:00", "Ticker": None},  # Cut to 5%
    {"Event_Type": "BOE Policy Decision", "Date": "2024-09-19", "Time": "12:00:00", "Ticker": None},
    {"Event_Type": "BOE Policy Decision", "Date": "2024-11-07", "Time": "12:00:00", "Ticker": None},  # Cut to 4.75%
    {"Event_Type": "BOE Policy Decision", "Date": "2024-12-19", "Time": "12:00:00", "Ticker": None},  # Cut to 4.5%
    # BOJ Policy Decisions (2020-03-06 to 2025-02-15, prioritizing Outlook Reports and major changes)
    {"Event_Type": "BOJ Policy Decision", "Date": "2020-03-16", "Time": "03:00:00", "Ticker": None},  # Emergency QE
    {"Event_Type": "BOJ Policy Decision", "Date": "2020-04-27", "Time": "03:00:00", "Ticker": None},  # Enhanced QE
    {"Event_Type": "BOJ Policy Decision", "Date": "2020-07-15", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2020-10-29", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2021-01-21", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2021-04-27", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2021-07-16", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2021-10-28", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2022-01-18", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2022-04-28", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2022-07-21", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2022-10-28", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2022-12-20", "Time": "03:00:00", "Ticker": None},  # YCC adjustment
    {"Event_Type": "BOJ Policy Decision", "Date": "2023-01-18", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2023-04-28", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2023-07-28", "Time": "03:00:00", "Ticker": None},  # YCC tweak
    {"Event_Type": "BOJ Policy Decision", "Date": "2023-10-31", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2024-01-23", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2024-03-19", "Time": "03:00:00", "Ticker": None},  # End negative rates
    {"Event_Type": "BOJ Policy Decision", "Date": "2024-04-26", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2024-07-31", "Time": "03:00:00", "Ticker": None},  # Rate hike to 0.25%
    {"Event_Type": "BOJ Policy Decision", "Date": "2024-10-31", "Time": "03:00:00", "Ticker": None},  # Outlook Report
    {"Event_Type": "BOJ Policy Decision", "Date": "2025-01-24", "Time": "03:00:00", "Ticker": None}   # Outlook Report
]

In [None]:
with open('macro_events.json', 'r') as file:
    macro_events = json.load(file)

# Combining lists of dictionaries into a single list
all_events = (
    fomc_meetings +
    speeches +
    elections +
    macro_events +
    ecb_boe_boj_policy_decisions
)

# DataFrame from the combined list
macro_news_df = pd.DataFrame(all_events)
# Ensuring the columns are in the desired order
macro_news_df = macro_news_df[['Event_Type', 'Date', 'Time', 'Ticker']]
# Performing date and time transformation
macro_news_df['Date'] = pd.to_datetime(macro_news_df['Date'] + ' ' + macro_news_df['Time'])
macro_news_df['Time'] = macro_news_df['Date'].dt.strftime('%H:%M:%S')
# Sorting by Date for consistency
macro_news_df = macro_news_df.sort_values('Date')
# Resetting index for clean DataFrame
macro_news_df = macro_news_df.reset_index(drop=True)

In [49]:
macro_news_df.to_csv("macro_news_df.csv", index=False)

## Microevents
- SEC Filings, Stock Buybacks, Stock Splits omitted.

In [None]:
unique_tickers = data['Ticker'].unique().tolist()

print(f"Number of unique tickers: {len(unique_tickers)}")
ticker_list = unique_tickers

df = pd.DataFrame({
    "Ticker": ticker_list,
    "Date": "",
    "Time": "",
    "Event_Type": ""
})

# Saving locally as JSON
local_file = "ticker_dataset.json"
df.to_json(local_file, orient="records", indent=4)

Number of unique tickers: 4798


### 1. Earnings Reports
- https://www.dolthub.com/repositories/post-no-preference/earnings/data/master/earnings_calendar

In [None]:
# Loading the CSV
df = pd.read_csv("earnings_calendar_dolthub.csv")

# Filtering for your date range and tickers
df = df[
    (df["date"] >= "2020-03-06") &
    (df["date"] <= "2025-02-15") &
    (df["act_symbol"].isin(ticker_list))
]

# Map 'when' to Time in HH:MM:SS format
def map_time(when_str):
    if pd.isna(when_str):  # Handling NaN values
        return "00:00:00"
    when_str = str(when_str)
    if "After market close" in when_str:
        return "16:30:00"  # Post-market, 4:30 PM ET
    elif "Before market open" in when_str:
        return "07:00:00"  # Pre-market, 7:00 AM ET
    else:
        return "00:00:00"  # Default if timing unclear

df["Time"] = df["when"].apply(map_time)

# Formatting the calendar
df["Event_Type"] = "Earnings Report"
df["Ticker"] = df["act_symbol"]
df["Date"] = df["date"] + " 00:00:00"

# Selecting and reordering columns : Event_Type, Date, Time, Ticker
earnings_calendar = df[["Event_Type", "Date", "Time", "Ticker"]]

# Sorting by Date and Ticker
earnings_calendar = earnings_calendar.sort_values(["Date", "Ticker"])

# Saving
earnings_calendar.to_csv("earnings_calendar.csv", index=False)

### 2. SEC 8 K Fillings - Earnings Guidance and Regulation FD Disclosures
https://sec-api.io/, https://www.sec.gov/search-filings
Sec api key - 106775ac5034e56b271d6a8f4819c8520220b113bac7039b0a641c70c772a251

In [None]:
# Initializing
query_api = QueryApi(api_key="106775ac5034e56b271d6a8f4819c8520220b113bac7039b0a641c70c772a251")

# Normalizing ticker list to uppercase
ticker_list = [ticker.upper() for ticker in ticker_list]

# Initializing the list
earnings_guidance_reg_fd_disclosures = []  # Updated variable name

# Date range and yearly buckets
start_date = datetime(2020, 3, 6)
end_date = datetime(2025, 2, 17)
current_date = start_date

# Batch size (adjusted to stay under 3,500 char limit)
batch_size = 350  # 4,798 ÷ 350 = ~14 batches

# Looping through yearly buckets
call_count = 0
while current_date < end_date:
    # Date range for the current year
    year_end = datetime(current_date.year + 1, 1, 1) - timedelta(days=1)
    if year_end > end_date:
        year_end = end_date
    date_range = f"filedAt:[{current_date.strftime('%Y-%m-%d')} TO {year_end.strftime('%Y-%m-%d')}]"

    print(f"\n=== Processing Year: {current_date.strftime('%Y')} ===")
    print(f"Date Range: {date_range}")

    # Processing tickers in batches
    for i in range(0, len(ticker_list), batch_size):
        batch_tickers = ticker_list[i:i + batch_size]
        print(f"\nBatch {i//batch_size}: Tickers = {batch_tickers[:5]}... (Total {len(batch_tickers)} tickers)")

        # Constructing query with broader item filtering (only 2.02 and 7.01)
        query = {
            "query": f"formType:\"8-K\" AND (items:\"2.02\" OR items:\"7.01\") AND ticker:({' OR '.join(batch_tickers)}) AND ({date_range})",
            "from": "0",
            "size": "50",
            "sort": [{"filedAt": {"order": "desc"}}]
        }

        # Pagination: 1 call per batch to fit within 100 calls
        max_pages = 1
        for page in range(max_pages):
            query["from"] = str(page * 50)
            try:
                filings = query_api.get_filings(query)
                call_count += 1
                total_filings = filings["total"]["value"]
                print(f"Batch {i//batch_size} in {current_date.strftime('%Y')}, Page {page}: Total Filings = {total_filings}, Filings Returned = {len(filings['filings'])}")

                if len(filings['filings']) == 0:
                    print("No filings returned for this batch.")
                    continue

                for idx, filing in enumerate(filings["filings"]):
                    ticker = filing["ticker"].upper()
                    print(f"\nFiling {idx + 1}/{len(filings['filings'])} in Batch {i//batch_size}:")
                    print(f"  Ticker: {ticker}")
                    print(f"  Filed At: {filing['filedAt']}")
                    print(f"  Form Type: {filing['formType']}")
                    print(f"  Description: {filing['description']}")
                    print(f"  Link to Filing: {filing['linkToFilingDetails']}")

                    items = filing.get("items", [])
                    print(f"  Items (Raw): {items}")

                    # Robust parsing of items
                    parsed_items = []
                    for item in items:
                        item_number = item.split(":")[0].strip() if ":" in item else item.strip()
                        number = item_number.replace("Item ", "").strip()
                        parsed_items.append(number)
                    print(f"  Parsed Items: {parsed_items}")
                    print(f"  Parsed Items (Repr): {[repr(item) for item in parsed_items]}")

                    # Debugging condition evaluation
                    event_type = None
                    event_subtype = None
                    print(f"  Checking for '2.02' in {parsed_items}: {'2.02' in parsed_items}")
                    if "2.02" in parsed_items:
                        event_type = "Earnings Guidance"
                        event_subtype = "Earnings Release"
                    print(f"  Checking for '7.01' in {parsed_items}: {'7.01' in parsed_items}")
                    if "7.01" in parsed_items and not event_type:
                        event_type = "Regulation FD Disclosure"
                        description = filing.get("description", "").lower()
                        if "investor day" in description or "capital markets day" in description or "investor conference" in description:
                            event_subtype = "Hosted Investor Event"
                        else:
                            for item in items:
                                if "7.01" in item:
                                    item_desc = item.lower()
                                    if "investor day" in item_desc or "capital markets day" in item_desc or "investor conference" in item_desc:
                                        event_subtype = "Hosted Investor Event"
                                        break
                                    elif "press release" in item_desc or "announc" in item_desc:
                                        event_subtype = "Press Release"
                                        break
                            if not event_subtype:
                                event_subtype = "Other Reg FD Disclosure"

                    print(f"  Event Type Determined: {event_type if event_type else 'None'}")
                    print(f"  Event Subtype Determined: {event_subtype if event_subtype else 'None'}")

                    if event_type:
                        filed_at = pd.to_datetime(filing["filedAt"])
                        date = filed_at.strftime("%Y-%m-%d 00:00:00")
                        time = filed_at.strftime("%H:%M:%S")
                        earnings_guidance_reg_fd_disclosures.append({
                            "Date": date,
                            "Time": time,
                            "Event_Type": event_type,
                            "Event_Subtype": event_subtype,
                            "Ticker": ticker,
                            "Items_Details": items,
                            "Description": filing.get("description", ""),
                            "Link": filing.get("linkToFilingDetails", "")
                        })
                        print(f"  Event Added: {earnings_guidance_reg_fd_disclosures[-1]}")

                if page * 50 + 50 >= min(total_filings, 10000):
                    print("Pagination stopped: Reached end of filings.")
                    break

            except Exception as e:
                print(f"Error processing page {page} for batch {i//batch_size} in {current_date.strftime('%Y')}: {e}")
                sleep(5)
                break

            sleep(1)

    # Moving to next year
    current_date = datetime(year_end.year + 1, 1, 1)

# Saving to a file or print
print(f"Total API Calls Made: {call_count}")
print(f"Total Events: {len(earnings_guidance_reg_fd_disclosures)}")
if earnings_guidance_reg_fd_disclosures:
    print("First 5 Events:")
    for event in earnings_guidance_reg_fd_disclosures[:5]:
        print(event)
    print("\nAll Events:")
    for idx, event in enumerate(earnings_guidance_reg_fd_disclosures):
        print(f"Event {idx + 1}: {event}")
else:
    print("No events captured.")


=== Processing Year: 2020 ===
Date Range: filedAt:[2020-03-06 TO 2020-12-31]

Batch 0: Tickers = ['AACBU', 'AACG', 'AADI', 'AADR', 'AAL']... (Total 350 tickers)
Batch 0 in 2020, Page 0: Total Filings = 639, Filings Returned = 50

Filing 1/50 in Batch 0:
  Ticker: AIMD
  Filed At: 2020-12-30T15:21:07-05:00
  Form Type: 8-K
  Description: Form 8-K - Current report - Item 1.01 Item 3.02 Item 7.01 Item 9.01
  Link to Filing: https://www.sec.gov/Archives/edgar/data/1014763/000165495420014016/amar_8k.htm
  Items (Raw): ['Item 1.01: Entry into a Material Definitive Agreement', 'Item 3.02: Unregistered Sales of Equity Securities', 'Item 7.01: Regulation FD Disclosure', 'Item 9.01: Financial Statements and Exhibits']
  Parsed Items: ['1.01', '3.02', '7.01', '9.01']
  Parsed Items (Repr): ["'1.01'", "'3.02'", "'7.01'", "'9.01'"]
  Checking for '2.02' in ['1.01', '3.02', '7.01', '9.01']: False
  Checking for '7.01' in ['1.01', '3.02', '7.01', '9.01']: True
  Event Type Determined: Regulation FD 

In [None]:
# Saving earnings_guidance_reg_fd_disclosures to a JSON file
#with open("earnings_guidance_reg_fd_disclosures.json", "w") as f:
    #json.dump(earnings_guidance_reg_fd_disclosures, f, indent=4)

In [None]:
# Loading the earnings_guidance_reg_fd_disclosures from JSON
with open("earnings_guidance_reg_fd_disclosures.json", "r") as f:
    earnings_guidance_reg_fd_disclosures = json.load(f)

In [None]:
# Creating the filings list
filings = [
    {
        "Event_Type": event["Event_Type"],
        "Date": event["Date"],
        "Time": event["Time"],
        "Ticker": event["Ticker"]
    }
    for event in earnings_guidance_reg_fd_disclosures
]

In [None]:
# Converting filings to a DataFrame for easier comparison
filings_df = pd.DataFrame(filings)

# Identifying overlaps
# Filtering for Earnings Guidance events in filings and Earnings Reports in earnings_calendar
earnings_guidance_df = filings_df[filings_df["Event_Type"] == "Earnings Guidance"]
earnings_reports_df = earnings_calendar[earnings_calendar["Event_Type"] == "Earnings Report"]

# Merging to find overlaps on Ticker and Date
overlaps = pd.merge(
    earnings_guidance_df,
    earnings_reports_df,
    on=["Ticker", "Date"],
    how="inner",
    suffixes=("_filings", "_calendar")
)

# Printing the overlaps (as before)
if not overlaps.empty:
    print(f"Found {len(overlaps)} overlapping records (same Ticker and Date):")
    for idx, row in overlaps.iterrows():
        print(f"\nOverlap {idx + 1}:")
        print(f"  Ticker: {row['Ticker']}")
        print(f"  Date: {row['Date']}")
        print(f"  Earnings Guidance Time: {row['Time_filings']}")
        print(f"  Earnings Report Time: {row['Time_calendar']}")
        
        # Checking if the times are within the same hour
        guidance_time = pd.to_datetime(row["Time_filings"], format="%H:%M:%S")
        report_time = pd.to_datetime(row["Time_calendar"], format="%H:%M:%S")
        time_diff = abs((guidance_time - report_time).total_seconds()) / 3600  # Difference in hours
        if time_diff <= 1:
            print(f"  Times are within the same hour (difference: {time_diff:.2f} hours)")
        else:
            print(f"  Times are NOT within the same hour (difference: {time_diff:.2f} hours)")
else:
    print("No overlapping records found between Earnings Guidance and Earnings Reports.")

# Updating timestamps in earnings_calendar for overlapping records
# Creating a dictionary mapping (Ticker, Date) to the correct Time from filings
time_updates = {}
for _, row in overlaps.iterrows():
    ticker = row["Ticker"]
    date = row["Date"]
    correct_time = row["Time_filings"]
    time_updates[(ticker, date)] = correct_time

# Updating the Time column in earnings_calendar
earnings_calendar_updated = earnings_calendar.copy()
for idx, row in earnings_calendar_updated.iterrows():
    ticker = row["Ticker"]
    date = row["Date"]
    key = (ticker, date)
    if key in time_updates:
        earnings_calendar_updated.at[idx, "Time"] = time_updates[key]

print("\nUpdated earnings_calendar with corrected timestamps (first 5 rows):")
print(earnings_calendar_updated.head())
print(f"\nTotal records in updated earnings_calendar: {len(earnings_calendar_updated)}")
print("\nFirst 5 records in filings (unchanged):")
for event in filings[:5]:
    print(event)

# Saving the updated earnings_calendar to CSV
earnings_calendar_updated.to_csv("earnings_calendar_updated.csv", index=False)
print("\nSaved updated earnings_calendar to earnings_calendar_updated.csv")


=== Checking for Overlaps Between Earnings Guidance and Earnings Reports ===
Found 867 overlapping records (same Ticker and Date):

Overlap 1:
  Ticker: APDN
  Date: 2020-12-17 00:00:00
  Earnings Guidance Time: 16:06:03
  Earnings Report Time: 16:30:00
  Times are within the same hour (difference: 0.40 hours)

Overlap 2:
  Ticker: AOUT
  Date: 2020-12-15 00:00:00
  Earnings Guidance Time: 16:06:04
  Earnings Report Time: 16:30:00
  Times are within the same hour (difference: 0.40 hours)

Overlap 3:
  Ticker: ADBE
  Date: 2020-12-10 00:00:00
  Earnings Guidance Time: 08:04:03
  Earnings Report Time: 07:00:00
  Times are NOT within the same hour (difference: 1.07 hours)

Overlap 4:
  Ticker: ASO
  Date: 2020-12-10 00:00:00
  Earnings Guidance Time: 07:08:26
  Earnings Report Time: 07:00:00
  Times are within the same hour (difference: 0.14 hours)

Overlap 5:
  Ticker: ALOT
  Date: 2020-12-07 00:00:00
  Earnings Guidance Time: 07:39:31
  Earnings Report Time: 07:00:00
  Times are within

### 3. Product Launches / Service announcements / Partnerships and Milestones
GCP Vertex AI setup

In [None]:
with open('merged_product_launches.json', 'r') as file:
    product_launches = json.load(file)

# Creating product_launches list with required keys
product_launches = [
    {
        "Event_Type": item["Event_Type"],
        "Date": item["Date"],
        "Time": item["Time"],
        "Ticker": item["Ticker"],
        "Description": item["Description"]  
    }
    for item in product_launches
]

In [None]:
start_date = datetime.strptime("2020-01-01", "%Y-%m-%d")
end_date = datetime.strptime("2025-04-25", "%Y-%m-%d")

# Definining high-impact keywords
high_impact_keywords = [
    "acquisition", "merger", "business combination", "definitive agreement",
    "FDA", "NMPA", "approval", "topline results", "Phase 2", "Phase 3",
    "IPO", "financing", "million", "strategic collaboration", "Fast Track",
    "Breakthrough Therapy", "Orphan Drug", "clinical hold", "Complete Response Letter",
    "Emergency Use Authorization", "proof-of-concept", "strategic partnership",
    "joint venture", "discontinuation", "patent issuance", "licensing agreement",
    "regulatory clearance", "significant contract", "uplisting", "divestiture",
    "New Drug Application", "Priority Review", "strategic alliance", "reverse merger",
    "bankruptcy", "public offering", "delisting", "clinical data", "commercial launch",
    "clinical trial", "patient dosing", "study results", "drug designation", "investment",
    "asset sale", "collaboration agreement", "supply agreement", "contract award",
    "expansion plan"
]

# Creating filtered product_launches list
product_launches = [
    {
        "Event_Type": (
            "Acquisition" if any(keyword in item["Description"].lower() for keyword in ["acquisition", "merger", "divestiture", "business combination", "reverse merger", "asset sale"]) else
            "Clinical Trial" if any(keyword in item["Description"].lower() for keyword in ["phase", "topline", "clinical trial", "clinical data", "patient dosing", "study results"]) else
            "Regulatory Approval" if any(keyword in item["Description"].lower() for keyword in ["FDA", "NMPA", "approval", "New Drug Application", "Priority Review", "drug designation", "regulatory clearance"]) else
            item["Event_Type"]
        ),
        "Date": item["Date"],
        "Time": item["Time"],
        "Ticker": item["Ticker"]
    }
    for item in product_launches
    if (
        start_date <= datetime.strptime(item["Date"], "%Y-%m-%d") <= end_date and
        item["Ticker"] in ticker_list and
        (
            item["Event_Type"] in ["Product Launch", "Service Announcement"] or
            any(keyword in item["Description"].lower() for keyword in high_impact_keywords)
        )
    )
]

print(f"Filtered product_launches: {len(product_launches)}")
event_types = Counter(item["Event_Type"] for item in product_launches)
for event_type, count in event_types.items():
    print(f"{event_type}: {count} ({count/len(product_launches)*100:.2f}%)")

Filtered product_launches: 13357
Acquisition: 2908 (21.77%)
Product Launch: 2721 (20.37%)
Service Announcement: 1627 (12.18%)
Regulatory Approval: 556 (4.16%)
Clinical Trial: 1748 (13.09%)
Partnership/Milestone: 3797 (28.43%)


### 4. S&P 500 Inclusions and exclusions
- https://www.spglobal.com/spdji/en/media-center/news-announcements/#indexNews
- https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#Selected_changes_to_the_list_of_S&P_500_components
- Scraping for the official website news or wikipedia changes.
- Time - Quarterly usually after hours, to prevent market disruption - Change effective on the next trading session / day.

In [43]:
sp500_inclusions = [
    {"Date": "2020-03-02 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "IR"},
    {"Date": "2020-03-02 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "XEC"},
    {"Date": "2020-04-01 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "HWM"},
    {"Date": "2020-04-01 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ARNC"},
    {"Date": "2020-04-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "OTIS"},
    {"Date": "2020-04-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CARR"},
    {"Date": "2020-04-06 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "M"},
    {"Date": "2020-04-06 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "RTN"},
    {"Date": "2020-05-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "DPZ"},
    {"Date": "2020-05-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "DXCM"},
    {"Date": "2020-05-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CPRI"},
    {"Date": "2020-05-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "AGN"},
    {"Date": "2020-05-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "WST"},
    {"Date": "2020-05-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "HP"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "BIO"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TDY"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TYL"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ADS"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "HOG"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "JWN"},
    {"Date": "2020-09-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "ETSY"},
    {"Date": "2020-09-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TER"},
    {"Date": "2020-09-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CTLT"},
    {"Date": "2020-09-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "HRB"},
    {"Date": "2020-09-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "COTY"},
    {"Date": "2020-09-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "KSS"},
    {"Date": "2020-10-07 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "POOL"},
    {"Date": "2020-10-07 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ETFC"},
    {"Date": "2020-10-09 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "VNT"},
    {"Date": "2020-10-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "NBL"},
    {"Date": "2020-12-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TSLA"},
    {"Date": "2020-12-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "AIV"},
    {"Date": "2021-01-07 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "ENPH"},
    {"Date": "2021-01-07 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "TIF"},
    {"Date": "2021-01-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TRMB"},
    {"Date": "2021-01-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CXO"},
    {"Date": "2021-02-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "MPWR"},
    {"Date": "2021-02-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "FTI"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "NXPI"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "PENN"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "GNRC"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CZR"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "FLS"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "SLG"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "XRX"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "VNT"},
    {"Date": "2021-04-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "PTC"},
    {"Date": "2021-04-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "VAR"},
    {"Date": "2021-05-14 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CRL"},
    {"Date": "2021-05-14 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "FLIR"},
    {"Date": "2021-06-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "OGN"},
    {"Date": "2021-06-04 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "HFC"},
    {"Date": "2021-07-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "MRNA"},
    {"Date": "2021-07-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ALXN"},
    {"Date": "2021-08-30 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TECH"},
    {"Date": "2021-08-30 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "MXIM"},
    {"Date": "2021-09-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "MTCH"},
    {"Date": "2021-09-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CDAY"},
    {"Date": "2021-09-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "BRO"},
    {"Date": "2021-09-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "PRGO"},
    {"Date": "2021-09-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "UNM"},
    {"Date": "2021-09-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "NOV"},
    {"Date": "2021-12-14 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "EPAM"},
    {"Date": "2021-12-14 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "KSU"},
    {"Date": "2021-12-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "SBNY"},
    {"Date": "2021-12-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "SEDG"},
    {"Date": "2021-12-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "FDS"},
    {"Date": "2021-12-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "LEG"},
    {"Date": "2021-12-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "HBI"},
    {"Date": "2021-12-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "WU"},
    {"Date": "2022-02-02 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CEG"},
    {"Date": "2022-02-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "GPS"},
    {"Date": "2022-02-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "NDSN"},
    {"Date": "2022-02-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "XLNX"},
    {"Date": "2022-03-02 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "MOH"},
    {"Date": "2022-03-02 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "INFO"},
    {"Date": "2022-04-04 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CPT"},
    {"Date": "2022-04-04 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "PBCT"},
    {"Date": "2022-04-11 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "WBD"},
    {"Date": "2022-04-11 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DISCA"},
    {"Date": "2022-04-11 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DISCK"},
    {"Date": "2022-06-08 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "VICI"},
    {"Date": "2022-06-08 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CERN"},
    {"Date": "2022-06-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "KDP"},
    {"Date": "2022-06-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "ON"},
    {"Date": "2022-06-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "UA"},
    {"Date": "2022-06-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "UAA"},
    {"Date": "2022-06-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "IPGP"},
    {"Date": "2022-09-19 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CSGP"},
    {"Date": "2022-09-19 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "INVH"},
    {"Date": "2022-09-19 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "PVH"},
    {"Date": "2022-09-19 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "PENN"},
    {"Date": "2022-10-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "PCG"},
    {"Date": "2022-10-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "EQT"},
    {"Date": "2022-10-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CTXS"},
    {"Date": "2022-10-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DRE"},
    {"Date": "2022-10-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TRGP"},
    {"Date": "2022-10-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "NLSN"},
    {"Date": "2022-11-01 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "ACGL"},
    {"Date": "2022-11-01 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "TWTR"},
    {"Date": "2022-12-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "MBC"},
    {"Date": "2022-12-19 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "FSLR"},
    {"Date": "2022-12-19 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "FBHS"},
    {"Date": "2022-12-19 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "MBC"},
    {"Date": "2022-12-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "STLD"},
    {"Date": "2022-12-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ABMD"},
    {"Date": "2023-01-04 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "GEHC"},
    {"Date": "2023-01-05 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "VNO"},
    {"Date": "2023-03-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "BG"},
    {"Date": "2023-03-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "PODD"},
    {"Date": "2023-03-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "SBNY"},
    {"Date": "2023-03-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "SIVB"},
    {"Date": "2023-03-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "FICO"},
    {"Date": "2023-03-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "LUMN"},
    {"Date": "2023-05-04 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "AXON"},
    {"Date": "2023-05-04 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "FRC"},
    {"Date": "2023-06-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "PANW"},
    {"Date": "2023-06-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DISH"},
    {"Date": "2023-08-25 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "KVUE"},
    {"Date": "2023-08-25 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "AAP"},
    {"Date": "2023-09-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "BX"},
    {"Date": "2023-09-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "ABNB"},
    {"Date": "2023-09-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "LNC"},
    {"Date": "2023-09-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "NWL"},
    {"Date": "2023-10-02 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "VLTO"},
    {"Date": "2023-10-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DXC"},
    {"Date": "2023-10-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "HUBB"},
    {"Date": "2023-10-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "LULU"},
    {"Date": "2023-10-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "OGN"},
    {"Date": "2023-10-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ATVI"},
    {"Date": "2023-12-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "UBER"},
    {"Date": "2023-12-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "JBL"},
    {"Date": "2023-12-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "BLDR"},
    {"Date": "2023-12-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "SEE"},
    {"Date": "2023-12-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ALK"},
    {"Date": "2023-12-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "SEDG"},
    {"Date": "2024-03-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "SMCI"},
    {"Date": "2024-03-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "DECK"},
    {"Date": "2024-03-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "WHR"},
    {"Date": "2024-03-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ZION"},
    {"Date": "2024-04-01 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "SOLV"},
    {"Date": "2024-04-02 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "GEV"},
    {"Date": "2024-04-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "XRAY"},
    {"Date": "2024-04-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "VFC"},
    {"Date": "2024-05-08 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "VST"},
    {"Date": "2024-05-08 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "PXD"},
    {"Date": "2024-06-24 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "KKR"},
    {"Date": "2024-06-24 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "CRWD"},
    {"Date": "2024-06-24 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "GDDY"},
    {"Date": "2024-06-24 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "RHI"},
    {"Date": "2024-06-24 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CMA"},
    {"Date": "2024-06-24 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ILMN"},
    {"Date": "2024-09-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "PLTR"},
    {"Date": "2024-09-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "DELL"},
    {"Date": "2024-09-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "ERIE"},
    {"Date": "2024-09-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "AAL"},
    {"Date": "2024-09-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ETSY"},
    {"Date": "2024-09-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "BIO"},
    {"Date": "2024-09-30 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "AMTM"},
    {"Date": "2024-10-01 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "BBWI"},
    {"Date": "2024-11-26 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TPL"},
    {"Date": "2024-11-26 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "MRO"},
    {"Date": "2024-12-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "APO"},
    {"Date": "2024-12-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "WDAY"},
    {"Date": "2024-12-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "LII"},
    {"Date": "2024-12-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "QRVO"},
    {"Date": "2024-12-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "AMTM"},
    {"Date": "2024-12-23 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CTLT"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "BIO"},
    {"Date": "2020-06-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "TYL"},
    {"Date": "2021-12-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Inclusion", "Ticker": "SBNY"},
    {"Date": "2020-10-07 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ETFC"},
    {"Date": "2021-03-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "VNT"},
    {"Date": "2021-05-14 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "FLIR"},
    {"Date": "2021-07-21 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ALXN"},
    {"Date": "2021-08-30 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "MXIM"},
    {"Date": "2022-02-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "XLNX"},
    {"Date": "2022-04-11 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DISCA"},
    {"Date": "2022-04-11 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DISCK"},
    {"Date": "2022-06-08 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CERN"},
    {"Date": "2022-10-03 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "CTXS"},
    {"Date": "2022-10-12 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "NLSN"},
    {"Date": "2022-11-01 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "TWTR"},
    {"Date": "2022-12-22 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ABMD"},
    {"Date": "2023-03-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "SBNY"},
    {"Date": "2023-03-15 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "SIVB"},
    {"Date": "2023-06-20 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "DISH"},
    {"Date": "2023-10-18 00:00:00", "Time": "17:15:00", "Event_Type": "S&P 500 Exclusion", "Ticker": "ATVI"}
]

In [None]:
# Filtering sp500_inclusions to keep only events where Ticker is in ticker_list
sp500_changes = [event for event in sp500_inclusions if event["Ticker"] in ticker_list]
print(len(sp500_changes))

48


In [45]:
# DataFrame from sp500_changes
df = pd.DataFrame(sp500_changes)
df.sort_values("Date", inplace=True)
print(df)
df.to_csv("sp500_changes.csv", index=False)

                   Date      Time         Event_Type Ticker
0   2020-05-12 00:00:00  17:15:00  S&P 500 Inclusion    DPZ
1   2020-05-12 00:00:00  17:15:00  S&P 500 Inclusion   DXCM
2   2020-09-21 00:00:00  17:15:00  S&P 500 Inclusion   ETSY
3   2020-09-21 00:00:00  17:15:00  S&P 500 Inclusion    TER
4   2020-10-07 00:00:00  17:15:00  S&P 500 Inclusion   POOL
5   2020-12-21 00:00:00  17:15:00  S&P 500 Inclusion   TSLA
6   2021-01-07 00:00:00  17:15:00  S&P 500 Inclusion   ENPH
7   2021-01-21 00:00:00  17:15:00  S&P 500 Inclusion   TRMB
8   2021-02-12 00:00:00  17:15:00  S&P 500 Inclusion   MPWR
12  2021-03-22 00:00:00  17:15:00  S&P 500 Exclusion    XRX
11  2021-03-22 00:00:00  17:15:00  S&P 500 Inclusion    CZR
10  2021-03-22 00:00:00  17:15:00  S&P 500 Inclusion   PENN
9   2021-03-22 00:00:00  17:15:00  S&P 500 Inclusion   NXPI
13  2021-04-20 00:00:00  17:15:00  S&P 500 Inclusion    PTC
14  2021-07-21 00:00:00  17:15:00  S&P 500 Inclusion   MRNA
15  2021-08-30 00:00:00  17:15:00  S&P 5

In [None]:
# Normalizing date format to "YYYY-MM-DD HH:MM:SS"
def normalize_date(date_str):
    try:
        # If the date string lacks a time component, append " 00:00:00"
        if len(date_str.split()) == 1:
            date_str = date_str + " 00:00:00"
        # Parsing and reformatting to ensure consistency
        return pd.to_datetime(date_str).strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        print(f"Error parsing date {date_str}: {e}")
        return None

# Converting each dataset to DataFrame and standardize Date format
# earnings_calendar
earnings_calendar_df = pd.DataFrame(earnings_calendar)
earnings_calendar_df["Date"] = earnings_calendar_df["Date"].apply(normalize_date)

# filings
filings_df = pd.DataFrame(filings)
filings_df["Date"] = filings_df["Date"].apply(normalize_date)

# product_launches
product_launches_df = pd.DataFrame(product_launches)
product_launches_df["Date"] = product_launches_df["Date"].apply(normalize_date)

# sp500_changes
sp500_changes_df = pd.DataFrame(sp500_changes)
sp500_changes_df["Date"] = sp500_changes_df["Date"].apply(normalize_date)

# Drop any rows where Date couldn't be parsed
earnings_calendar_df = earnings_calendar_df.dropna(subset=["Date"])
filings_df = filings_df.dropna(subset=["Date"])
product_launches_df = product_launches_df.dropna(subset=["Date"])
sp500_changes_df = sp500_changes_df.dropna(subset=["Date"])

# Combining into micro_news_df
micro_news_df = pd.DataFrame()

# Concatenating all DataFrames
micro_news_df = pd.concat(
    [earnings_calendar_df, filings_df, product_launches_df, sp500_changes_df],
    ignore_index=True
)

# Ensuring consistent column names and sort by Date and Ticker
micro_news_df = micro_news_df[["Event_Type", "Date", "Time", "Ticker"]]
# Final check
micro_news_df["Date"] = pd.to_datetime(micro_news_df["Date"]).dt.strftime("%Y-%m-%d %H:%M:%S")
micro_news_df.sort_values(["Date", "Ticker"], inplace=True)

# Saving
micro_news_df.to_csv("micro_news_df.csv", index=False)

### Combining

In [52]:
macro_news_df = pd.read_csv("macro_news_df.csv")
micro_news_df = pd.read_csv("micro_news_df.csv")

combined_df = pd.concat([micro_news_df, macro_news_df], ignore_index=True)

combined_df.sort_values(["Date", "Ticker"], inplace=True)

combined_df.to_csv("combined_news_df.csv", index=False)

### Impact day

In [None]:
# Defining U.S. holidays for trading day calculations
us_holidays = holidays.US(years=range(2020, 2026))

# Defininig NASDAQ full closure holidays (2020-2025) within March 6, 2020, to February 15, 2025
nasdaq_holidays = [
    # 2020
    "2020-04-10",  # Good Friday
    "2020-05-25",  # Memorial Day
    "2020-07-03",  # Independence Day (observed)
    "2020-09-07",  # Labor Day
    "2020-11-26",  # Thanksgiving
    "2020-12-25",  # Christmas
    # 2021
    "2021-01-01",  # New Year's Day
    "2021-01-18",  # Martin Luther King Jr. Day
    "2021-02-15",  # Presidents' Day
    "2021-04-02",  # Good Friday
    "2021-05-31",  # Memorial Day
    "2021-07-05",  # Independence Day (observed)
    "2021-09-06",  # Labor Day
    "2021-11-25",  # Thanksgiving
    "2021-12-24",  # Christmas (observed)
    # 2022
    "2022-01-17",  # Martin Luther King Jr. Day
    "2022-02-21",  # Presidents' Day
    "2022-04-15",  # Good Friday
    "2022-05-30",  # Memorial Day
    "2022-06-20",  # Juneteenth (observed)
    "2022-07-04",  # Independence Day
    "2022-09-05",  # Labor Day
    "2022-11-24",  # Thanksgiving
    "2022-12-26",  # Christmas (observed)
    # 2023
    "2023-01-02",  # New Year's Day (observed)
    "2023-01-16",  # Martin Luther King Jr. Day
    "2023-02-20",  # Presidents' Day
    "2023-04-07",  # Good Friday
    "2023-05-29",  # Memorial Day
    "2023-06-19",  # Juneteenth
    "2023-07-04",  # Independence Day
    "2023-09-04",  # Labor Day
    "2023-11-23",  # Thanksgiving
    "2023-12-25",  # Christmas
    # 2024
    "2024-01-01",  # New Year's Day
    "2024-01-15",  # Martin Luther King Jr. Day
    "2024-02-19",  # Presidents' Day
    "2024-03-29",  # Good Friday
    "2024-05-27",  # Memorial Day
    "2024-06-19",  # Juneteenth
    "2024-07-04",  # Independence Day
    "2024-09-02",  # Labor Day
    "2024-11-28",  # Thanksgiving
    "2024-12-25",  # Christmas
    # 2025 (up to Feb 15)
    "2025-01-01",  # New Year's Day
    "2025-01-09",  # National Day of Mourning (Jimmy Carter)
    "2025-01-20",  # Martin Luther King Jr. Day
]

nasdaq_non_trading_dates = [datetime.strptime(date, "%Y-%m-%d").date() for date in nasdaq_holidays]

# Getting the next trading day
def get_next_trading_day(date):
    if isinstance(date, datetime):
        date = date.date()
    elif isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d").date()
    
    next_day = date + timedelta(days=1)
    while (next_day.weekday() >= 5 or next_day in nasdaq_non_trading_dates):
        next_day += timedelta(days=1)
    return next_day

# Calculating Impact_Date
def calculate_impact_date(row):
    # Extracting the event date (without time) and time from the row
    event_date = pd.to_datetime(row["Date"]).date()
    event_time_str = row["Time"]  #  "08:30:00"
    
    # Parsing the event time
    event_time = datetime.strptime(event_time_str, "%H:%M:%S").time()
    
    # NASDAQ market hours
    market_open = datetime.strptime("09:30:00", "%H:%M:%S").time()   # 9:30 AM
    market_close = datetime.strptime("16:00:00", "%H:%M:%S").time()  # 4:00 PM
    
    # Determining the impact date
    if event_time < market_open:
        # Before market open: Impact is the same day
        return event_date
    elif event_time >= market_close:
        # After market close: Impact is the next trading day
        return get_next_trading_day(event_date)
    else:
        # During market hours: Impact is the same day
        return event_date

# Applying, Formatting, Sorting
combined_df["Impact_Date"] = combined_df.apply(calculate_impact_date, axis=1)
combined_df["Impact_Date"] = combined_df["Impact_Date"].apply(lambda x: x.strftime("%Y-%m-%d"))
final_news = combined_df.sort_values(["Date", "Ticker"]).reset_index(drop=True)

In [54]:
final_news.to_csv("final_news.csv", index=False)