In [1]:
import dai
import pandas as pd
import numpy as np

In [2]:
df_daily = dai.query("SELECT DISTINCT date FROM all_trading_days WHERE market_code = 'CN' AND date >= '2005-01-01'").df()
df_daily = df_daily.sort_values("date").reset_index(drop=True)
df_daily

Unnamed: 0,date
0,2005-01-04
1,2005-01-05
2,2005-01-06
3,2005-01-07
4,2005-01-10
...,...
5338,2026-12-25
5339,2026-12-28
5340,2026-12-29
5341,2026-12-30


In [3]:
minute_offsets = np.r_[565, np.arange(571, 691), np.arange(781, 901)]
dates = pd.to_datetime(df_daily["date"]).dt.normalize() 
df_minute = pd.DataFrame({"date": np.repeat(dates.values, len(minute_offsets)) + pd.to_timedelta(np.tile(minute_offsets, len(dates)), unit="m")})
df_minute = df_minute.sort_values("date").reset_index(drop=True)

In [4]:
df_minute["dt_day"]    = df_minute["date"].dt.strftime("%Y%m%d")
df_minute["dt_hour"]   = df_minute["date"].dt.strftime("%Y%m%d") + df_minute["date"].dt.hour.map(lambda x: f"{x:02d}")
df_minute["dt_minute"] = df_minute["date"].dt.strftime("%Y%m%d%H%M")

hh = df_minute["date"].dt.hour
mm = df_minute["date"].dt.minute
tmin = hh * 60 + mm 

is_pm = hh.ge(12)
df_minute["dt_halfday"] = df_minute["dt_day"] + np.where(is_pm, "02", "01")

hour_idx = pd.cut(tmin, bins=[-1, 629, 690, 839, 900], labels=[1, 2, 3, 4]).astype(int)
df_minute["dt_trade_hour"] = df_minute["dt_day"] + hour_idx.map(lambda x: f"{x:02d}")

seq = df_minute.groupby("dt_day").cumcount() + 1  # 1..241

def make_bucket(dt_day, seq, step):
    idx = ((seq - 1) // step) + 1
    return dt_day + idx.map(lambda x: f"{x:02d}")

df_minute["dt_trade_30m"] = make_bucket(df_minute["dt_day"], seq, 30)
df_minute["dt_trade_20m"] = make_bucket(df_minute["dt_day"], seq, 20)
df_minute["dt_trade_15m"] = make_bucket(df_minute["dt_day"], seq, 15)
df_minute["dt_trade_10m"] = make_bucket(df_minute["dt_day"], seq, 10)
df_minute["dt_trade_5m"]  = make_bucket(df_minute["dt_day"], seq, 5)

In [5]:
seq0 = df_minute.groupby("dt_day").cumcount()
df_minute["minute_of_day_trade"] = seq0
df_minute["minute_of_halfday_trade"] = np.where(seq0.lt(121), seq0, seq0 - 121)
df_minute["minute_of_hour_trade"] = np.where(seq0.eq(0), 0, ((seq0 - 1) % 60) + 1)
df_minute["minute_of_30m_trade"] = np.where(seq0.eq(0), 0, ((seq0 - 1) % 30) + 1)
df_minute["minute_of_20m_trade"] = np.where(seq0.eq(0), 0, ((seq0 - 1) % 20) + 1)
df_minute["minute_of_15m_trade"] = np.where(seq0.eq(0), 0, ((seq0 - 1) % 15) + 1)
df_minute["minute_of_10m_trade"] = np.where(seq0.eq(0), 0, ((seq0 - 1) % 10) + 1)
df_minute["minute_of_05m_trade"] = np.where(seq0.eq(0), 0, ((seq0 - 1) % 5) + 1)

df_minute["minute_of_day_order"] = df_minute.groupby("dt_day")["minute_of_day_trade"].shift(-1).fillna(0).astype(int)
df_minute["minute_of_halfday_order"] = df_minute.groupby("dt_day")["minute_of_halfday_trade"].shift(-1).fillna(0).astype(int)
df_minute["minute_of_hour_order"] = df_minute.groupby("dt_day")["minute_of_hour_trade"].shift(-1).fillna(0).astype(int)
df_minute["minute_of_30m_order"] = df_minute.groupby("dt_day")["minute_of_30m_trade"].shift(-1).fillna(0).astype(int)
df_minute["minute_of_20m_order"] = df_minute.groupby("dt_day")["minute_of_20m_trade"].shift(-1).fillna(0).astype(int)
df_minute["minute_of_15m_order"] = df_minute.groupby("dt_day")["minute_of_15m_trade"].shift(-1).fillna(0).astype(int)
df_minute["minute_of_10m_order"] = df_minute.groupby("dt_day")["minute_of_10m_trade"].shift(-1).fillna(0).astype(int)
df_minute["minute_of_05m_order"] = df_minute.groupby("dt_day")["minute_of_05m_trade"].shift(-1).fillna(0).astype(int)

In [6]:
hh=df_minute["date"].dt.hour
mm=df_minute["date"].dt.minute
seq_full=df_minute.groupby("dt_day").cumcount()
is_continuous=((hh.eq(9)&mm.ge(31))|(hh.eq(10))|(hh.eq(11))|(hh.eq(13))|(hh.eq(14))|(hh.eq(15)&mm.eq(0)))
seq_trade=is_continuous.groupby(df_minute["dt_day"]).cumsum()-1
seq_trade=seq_trade.where(is_continuous,-1)

df_minute["is_day_start_trade"]=((hh.eq(9)&mm.eq(31))).astype(int)
df_minute["is_day_end_trade"]=((hh.eq(15)&mm.eq(0))).astype(int)
df_minute["is_am_start_trade"]=((hh.eq(9)&mm.eq(31))).astype(int)
df_minute["is_am_end_trade"]=((hh.eq(11)&mm.eq(30))).astype(int)
df_minute["is_pm_start_trade"]=((hh.eq(13)&mm.eq(1))).astype(int)
df_minute["is_pm_end_trade"]=((hh.eq(15)&mm.eq(0))).astype(int)
df_minute["is_halfday_start_trade"]=df_minute["is_am_start_trade"]|df_minute["is_pm_start_trade"]
df_minute["is_halfday_end_trade"]=df_minute["is_am_end_trade"]|df_minute["is_pm_end_trade"]
df_minute["is_hour_start_trade"]=((seq_trade.eq(0))|((seq_trade.ge(0))&((seq_trade%60).eq(0)))).astype(int)
df_minute["is_hour_end_trade"]=((seq_trade.ge(0))&(((seq_trade+1)%60).eq(0))).astype(int)
df_minute["is_30m_start_trade"]=((seq_trade.ge(0))&((seq_trade%30).eq(0))).astype(int)
df_minute["is_30m_end_trade"]=((seq_trade.ge(0))&(((seq_trade+1)%30).eq(0))).astype(int)
df_minute["is_20m_start_trade"]=((seq_trade.ge(0))&((seq_trade%20).eq(0))).astype(int)
df_minute["is_20m_end_trade"]=((seq_trade.ge(0))&(((seq_trade+1)%20).eq(0))).astype(int)
df_minute["is_15m_start_trade"]=((seq_trade.ge(0))&((seq_trade%15).eq(0))).astype(int)
df_minute["is_15m_end_trade"]=((seq_trade.ge(0))&(((seq_trade+1)%15).eq(0))).astype(int)
df_minute["is_10m_start_trade"]=((seq_trade.ge(0))&((seq_trade%10).eq(0))).astype(int)
df_minute["is_10m_end_trade"]=((seq_trade.ge(0))&(((seq_trade+1)%10).eq(0))).astype(int)
df_minute["is_05m_start_trade"]=((seq_trade.ge(0))&((seq_trade%5).eq(0))).astype(int)
df_minute["is_05m_end_trade"]=((seq_trade.ge(0))&(((seq_trade+1)%5).eq(0))).astype(int)
df_minute["is_am_call_auction_trade"]=((hh.eq(9)&mm.eq(25))).astype(int)
df_minute["is_pm_call_auction_trade"]=((hh.eq(14)&mm.ge(57))|(hh.eq(15)&mm.eq(0))).astype(int)

df_minute["is_day_start_order"]=df_minute.groupby("dt_day")["is_day_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_day_end_order"]=df_minute.groupby("dt_day")["is_day_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_am_start_order"]=df_minute.groupby("dt_day")["is_am_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_am_end_order"]=df_minute.groupby("dt_day")["is_am_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_pm_start_order"]=df_minute.groupby("dt_day")["is_pm_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_pm_end_order"]=df_minute.groupby("dt_day")["is_pm_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_halfday_start_order"]=df_minute.groupby("dt_day")["is_halfday_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_halfday_end_order"]=df_minute.groupby("dt_day")["is_halfday_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_hour_start_order"]=df_minute.groupby("dt_day")["is_hour_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_hour_end_order"]=df_minute.groupby("dt_day")["is_hour_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_30m_start_order"]=df_minute.groupby("dt_day")["is_30m_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_30m_end_order"]=df_minute.groupby("dt_day")["is_30m_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_20m_start_order"]=df_minute.groupby("dt_day")["is_20m_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_20m_end_order"]=df_minute.groupby("dt_day")["is_20m_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_15m_start_order"]=df_minute.groupby("dt_day")["is_15m_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_15m_end_order"]=df_minute.groupby("dt_day")["is_15m_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_10m_start_order"]=df_minute.groupby("dt_day")["is_10m_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_10m_end_order"]=df_minute.groupby("dt_day")["is_10m_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_05m_start_order"]=df_minute.groupby("dt_day")["is_05m_start_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_05m_end_order"]=df_minute.groupby("dt_day")["is_05m_end_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_am_call_auction_order"]=df_minute.groupby("dt_day")["is_am_call_auction_trade"].shift(-1).fillna(0).astype(int)
df_minute["is_pm_call_auction_order"]=df_minute.groupby("dt_day")["is_pm_call_auction_trade"].shift(-1).fillna(0).astype(int)

In [7]:
df_minute

Unnamed: 0,date,dt_day,dt_hour,dt_minute,dt_halfday,dt_trade_hour,dt_trade_30m,dt_trade_20m,dt_trade_15m,dt_trade_10m,...,is_20m_start_order,is_20m_end_order,is_15m_start_order,is_15m_end_order,is_10m_start_order,is_10m_end_order,is_05m_start_order,is_05m_end_order,is_am_call_auction_order,is_pm_call_auction_order
0,2005-01-04 09:25:00,20050104,2005010409,200501040925,2005010401,2005010401,2005010401,2005010401,2005010401,2005010401,...,1,0,1,0,1,0,1,0,0,0
1,2005-01-04 09:31:00,20050104,2005010409,200501040931,2005010401,2005010401,2005010401,2005010401,2005010401,2005010401,...,0,0,0,0,0,0,0,0,0,0
2,2005-01-04 09:32:00,20050104,2005010409,200501040932,2005010401,2005010401,2005010401,2005010401,2005010401,2005010401,...,0,0,0,0,0,0,0,0,0,0
3,2005-01-04 09:33:00,20050104,2005010409,200501040933,2005010401,2005010401,2005010401,2005010401,2005010401,2005010401,...,0,0,0,0,0,0,0,0,0,0
4,2005-01-04 09:34:00,20050104,2005010409,200501040934,2005010401,2005010401,2005010401,2005010401,2005010401,2005010401,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1287658,2026-12-31 14:56:00,20261231,2026123114,202612311456,2026123102,2026123104,2026123108,2026123112,2026123116,2026123124,...,0,0,0,0,0,0,0,0,0,1
1287659,2026-12-31 14:57:00,20261231,2026123114,202612311457,2026123102,2026123104,2026123108,2026123112,2026123116,2026123124,...,0,0,0,0,0,0,0,0,0,1
1287660,2026-12-31 14:58:00,20261231,2026123114,202612311458,2026123102,2026123104,2026123108,2026123112,2026123116,2026123124,...,0,0,0,0,0,0,0,0,0,1
1287661,2026-12-31 14:59:00,20261231,2026123114,202612311459,2026123102,2026123104,2026123108,2026123112,2026123116,2026123124,...,0,1,0,1,0,1,0,1,0,1


In [8]:
# Save to DAI
ds = dai.DataSource("mldt_cn_stock_calendar_minute")
ds.delete()

dai.DataSource.write_bdb(
    data=df_minute,
    id="mldt_cn_stock_calendar_minute",
    unique_together=["date"],
    indexes=["date"],
)

dai.DataSource("mldt_cn_stock_calendar_minute")