In [1]:

import polars as pl
from datetime import datetime

In [2]:
df = pl.read_csv("data/df_with_category.csv")


In [3]:
df

create_ts,ticker_name,contracts_traded,price,category
str,str,i64,i64,str
"""2025-01-01 23:59:27-05:00""","""KXINAUG-25-MZ""",29,47,"""US Politics"""
"""2025-01-01 23:59:27-05:00""","""KXINAUG-25-MZ""",25,47,"""US Politics"""
"""2025-01-01 23:59:27-05:00""","""KXINAUG-25-MZ""",50,47,"""US Politics"""
"""2025-01-01 23:59:27-05:00""","""CABINETTULSI-25-FEB01""",10,91,
"""2025-01-01 23:59:25-05:00""","""KXSNOWNYM-25JAN-T8.0""",222,21,"""Weather"""
…,…,…,…,…
"""2025-08-12 00:00:02-04:00""","""KXWTAMATCH-25AUG11RYBMER-RYB""",22,53,"""Sports"""
"""2025-08-12 00:00:02-04:00""","""KXFEDDECISION-25SEP-H0""",1100,20,"""Macroeconomics"""
"""2025-08-12 00:00:01-04:00""","""KXCPI-25JUL-T0.2""",163,52,"""Macroeconomics"""
"""2025-08-12 00:00:01-04:00""","""KXNEWTAYLOR-25""",10,95,"""Pop Culture"""


In [4]:
df = df.with_columns([
    pl.col('create_ts').str.to_datetime().alias("parsed_ts")
])

In [5]:
monthly_analysis = (
    df
    .with_columns([
        # Extract year-month for grouping
        pl.col("parsed_ts").dt.truncate("1mo").alias("month")
    ])
    .group_by(["month", "category"])
    .agg([
        pl.col("contracts_traded").sum().alias("category_volume"),
        pl.count().alias("trade_count")
    ])
)


(Deprecated in version 0.20.5)
  pl.count().alias("trade_count")


In [6]:
monthly_analysis

month,category,category_volume,trade_count
"datetime[μs, UTC]",str,i64,u32
2025-08-01 00:00:00 UTC,"""Finance""",7284207,44617
2025-04-01 00:00:00 UTC,"""Weather""",617806,9466
2025-06-01 00:00:00 UTC,,4496842,59217
2025-06-01 00:00:00 UTC,"""Sports""",631305022,1961290
2025-02-01 00:00:00 UTC,"""Pop Culture""",21773942,249450
…,…,…,…
2025-07-01 00:00:00 UTC,"""Temperature""",12026002,234827
2025-03-01 00:00:00 UTC,"""Macroeconomics""",7336357,53383
2025-08-01 00:00:00 UTC,"""Pop Culture""",4779741,41745
2025-04-01 00:00:00 UTC,"""Macroeconomics""",52362968,183280


In [7]:
monthly_totals = (
    monthly_analysis
    .group_by("month")
    .agg([
        pl.col("category_volume").sum().alias("month_total_volume")
    ])
)

In [8]:
monthly_totals

month,month_total_volume
"datetime[μs, UTC]",i64
2025-03-01 00:00:00 UTC,513106674
2025-01-01 00:00:00 UTC,163794962
2025-08-01 00:00:00 UTC,252817590
2025-06-01 00:00:00 UTC,792124144
2025-04-01 00:00:00 UTC,453241714
2025-02-01 00:00:00 UTC,165962441
2025-07-01 00:00:00 UTC,740539131
2025-05-01 00:00:00 UTC,653881696


In [9]:
monthly_volume_pct = (
    monthly_analysis
    .join(monthly_totals, on="month")
    .with_columns([
        (pl.col("category_volume") / pl.col("month_total_volume") * 100)
        .round(2)
        .alias("volume_percentage")
    ])
    .select([
        "month", 
        "category", 
        "category_volume", 
        "volume_percentage",
        "trade_count",
        "month_total_volume"
    ])
    .sort(["month", "category"])
)

In [10]:
monthly_volume_pct

month,category,category_volume,volume_percentage,trade_count,month_total_volume
"datetime[μs, UTC]",str,i64,f64,u32,i64
2025-01-01 00:00:00 UTC,,11995749,7.32,80623,163794962
2025-01-01 00:00:00 UTC,"""Crypto""",29386592,17.94,401451,163794962
2025-01-01 00:00:00 UTC,"""Finance""",5048518,3.08,24608,163794962
2025-01-01 00:00:00 UTC,"""Global Politics""",2001419,1.22,13341,163794962
2025-01-01 00:00:00 UTC,"""Macroeconomics""",3781036,2.31,16395,163794962
…,…,…,…,…,…
2025-08-01 00:00:00 UTC,"""Pop Culture""",4779741,1.89,41745,252817590
2025-08-01 00:00:00 UTC,"""Sports""",169495856,67.04,747358,252817590
2025-08-01 00:00:00 UTC,"""Temperature""",4955938,1.96,81782,252817590
2025-08-01 00:00:00 UTC,"""US Politics""",7210846,2.85,45569,252817590


In [11]:
volume_pct_pivot = (
    monthly_volume_pct
    .select(["month", "category", "volume_percentage"])
    .pivot(
        values="volume_percentage", 
        index="month", 
        columns="category"
    )
    .fill_null(0)
)

  monthly_volume_pct


In [None]:
data = volume_pct_pivot.to_pandas()
data = data.rename(columns={'null': 'Uncategorized'})
data = data[:-1]