In [26]:
import pandas as pd
import re
from datetime import datetime, timedelta
import numpy as np 

import warnings
warnings.filterwarnings('ignore')

In [27]:
df = pd.read_csv('data/df_with_category.csv')

In [28]:
cpi_tickers = df[df['ticker_name'].str.contains('CPI')].copy()

In [29]:
cpi_tickers['create_ts'] = pd.to_datetime(cpi_tickers['create_ts'], utc=True).dt.tz_localize(None)

In [30]:
cpi_tickers['category'] = cpi_tickers['ticker_name'].str.split('-').str[0]
cpi_tickers['month'] = cpi_tickers['ticker_name'].str.split('-').str[1]
cpi_tickers['inflation'] = cpi_tickers['ticker_name'].str.split('-').str[2]

In [31]:
CPI_REFERENCE = {
    '24OCT': '2024-11-13',
    '24NOV': '2024-12-11', 
    '24DEC': '2025-01-15',
    '25JAN': '2025-02-12',
    '25FEB': '2025-03-12',
    '25MAR': '2025-04-10',
    '25APR': '2025-05-13',
    '25MAY': '2025-06-11',
    '25JUN': '2025-07-15',
    '25JUL': '2025-08-12',
}

In [32]:
cpi_tickers['release_date'] = pd.to_datetime(cpi_tickers['month'].map(CPI_REFERENCE))


In [33]:
cpi_tickers.dropna(subset=['release_date'], inplace=True)

In [34]:
cpi_tickers

Unnamed: 0,create_ts,ticker_name,contracts_traded,price,category,month,inflation,release_date
306968,2024-10-10 20:48:52,CPIAPPAREL-24OCT-T0.5,150,21,CPIAPPAREL,24OCT,T0.5,2024-11-13
306969,2024-10-10 20:48:52,CPIAPPAREL-24OCT-T1.1,150,56,CPIAPPAREL,24OCT,T1.1,2024-11-13
306970,2024-10-10 20:48:52,CPIAPPAREL-24OCT-T1.1,16,56,CPIAPPAREL,24OCT,T1.1,2024-11-13
306971,2024-10-10 20:48:52,CPIAPPAREL-24OCT-T0.5,16,20,CPIAPPAREL,24OCT,T0.5,2024-11-13
306972,2024-10-10 20:48:52,CPIAPPAREL-24OCT-T0.2,33,44,CPIAPPAREL,24OCT,T0.2,2024-11-13
...,...,...,...,...,...,...,...,...
22689873,2025-08-12 04:00:24,KXCPI-25JUL-T0.3,2276,15,KXCPI,25JUL,T0.3,2025-08-12
22689874,2025-08-12 04:00:24,KXCPI-25JUL-T0.3,200,12,KXCPI,25JUL,T0.3,2025-08-12
22689875,2025-08-12 04:00:23,KXCPI-25JUL-T0.2,2000,53,KXCPI,25JUL,T0.2,2025-08-12
22689877,2025-08-12 04:00:22,KXCPI-25JUL-T0.2,50,53,KXCPI,25JUL,T0.2,2025-08-12


In [35]:
cpi_tickers.groupby('category')['contracts_traded'].sum().sort_values(ascending=False)

category
KXCPIYOY        19369478
KXCPI            7386065
KXCPICORE         718884
KXCPICOREYOY      473188
KXCPIGAS          105477
KXCPISHELTER       42837
KXCPIAPPAREL       35576
KXCPIUSEDCAR       26022
KXCPIFOOD          25959
CPIYOY             19880
CPI                12235
CPICOREYOY          8021
CPIGAS              4826
CPIAPPAREL          4662
CPICORE             4428
KXCPIYOYBANK        3071
CPIFOOD             1212
CPISHELTER            61
CPIUSEDCAR            47
Name: contracts_traded, dtype: int64

In [36]:
cpi_sel = cpi_tickers[cpi_tickers['category'] == 'KXCPIYOY'].copy()

In [37]:
cpi_sel['threshold'] = cpi_sel['inflation'].str[1:].astype(float)


In [38]:
cpi_sel['before_release'] = cpi_sel['release_date'] - pd.Timedelta(days=1)


In [66]:
def get_monthly_predictions(curr_month, days=1):
    curr_month['before_release'] = curr_month['release_date'] - pd.Timedelta(days=days)
    last_day = curr_month[curr_month['create_ts'].dt.date == curr_month['before_release']]
    final_vals = last_day.groupby('ticker_name').last()

    ret = {}
    ret['release_date'] = curr_month['release_date'].iloc[0]
    ret['before_release'] = curr_month['before_release'].iloc[0]

    
    highest_prob = final_vals[final_vals['price'] > 50].iloc[-1]
    lowest_prob = final_vals[final_vals['price'] < 50].iloc[0]

    ret['highest_prob'] = highest_prob['threshold']
    ret['lowest_prob'] = lowest_prob['threshold']
    ret['highest_prob_price'] = highest_prob['price']
    ret['lowest_prob_price'] = lowest_prob['price']

    return pd.Series(ret)

monthly_results = cpi_sel.groupby('month').apply(get_monthly_predictions, days=1)

In [69]:
monthly_results = monthly_results.reset_index()

In [74]:
monthly_results['month'] = pd.to_datetime(monthly_results['month'], format='%y%b')

In [51]:
lowest_prob = final_vals[final_vals['price'] < 50].iloc[0]

In [53]:
highest_prob['threshold']

2.7

In [57]:
highest_prob['price']

65

In [55]:
lowest_prob['threshold']

2.8

In [58]:
lowest_prob['price']

19