In [12]:
import tomica
print(tomica.__file__)

/Users/zhangyangfa/rust-projects/tomica/tomica/__init__.py


## Usage

In [13]:
import tomica
import polars as pl

df = pl.read_csv('./data/csidata.csv')

# 方法1：使用 map_groups 返回 DataFrame
def calculate_factors(group_df):
    return pl.DataFrame({
        "datetime": group_df["datetime"],
        "close": group_df["close"],
        "return_10d": tomica.operators.ts_returns(group_df["close"], 10),
        "volatility_20d": tomica.operators.ts_std(group_df["close"], 20),
        "ma_5d": tomica.operators.ts_mean(group_df["close"], 5),
        "sharpe_10d": tomica.operators.ts_sharpe(group_df["close"], 10),
        "zscore_20d": tomica.operators.ts_zscore(group_df["close"], 20),
    })

result = df.group_by("instrument", maintain_order=True).map_groups(calculate_factors)
print(result)

# 方法2：直接对整个列操作（如果不需要分组）
# 注意：这会把所有股票的数据当作一个序列处理
simple_result = df.with_columns([
    tomica.operators.ts_returns(df["close"], 10).alias("return_10d"),
    tomica.operators.ts_std(df["close"], 20).alias("volatility_20d"),
])
print(simple_result)

shape: (1_240_545, 7)
┌────────────┬───────┬────────────┬────────────────┬──────────┬────────────┬────────────┐
│ datetime   ┆ close ┆ return_10d ┆ volatility_20d ┆ ma_5d    ┆ sharpe_10d ┆ zscore_20d │
│ ---        ┆ ---   ┆ ---        ┆ ---            ┆ ---      ┆ ---        ┆ ---        │
│ str        ┆ f64   ┆ f64        ┆ f64            ┆ f64      ┆ f64        ┆ f64        │
╞════════════╪═══════╪════════════╪════════════════╪══════════╪════════════╪════════════╡
│ 2006-01-04 ┆ 6.28  ┆ null       ┆ null           ┆ 6.28     ┆ null       ┆ null       │
│ 2006-01-05 ┆ 6.32  ┆ null       ┆ 0.028284       ┆ 6.3      ┆ 222.738636 ┆ 0.707107   │
│ 2006-01-06 ┆ 6.41  ┆ null       ┆ 0.066583       ┆ 6.336667 ┆ 95.169036  ┆ 1.101378   │
│ 2006-01-09 ┆ 6.39  ┆ null       ┆ 0.060553       ┆ 6.35     ┆ 104.866799 ┆ 0.660578   │
│ 2006-01-10 ┆ 6.28  ┆ null       ┆ 0.061074       ┆ 6.336    ┆ 103.743466 ┆ -0.916925  │
│ …          ┆ …     ┆ …          ┆ …              ┆ …        ┆ …          ┆ …

## test speed

In [14]:
import polars as pl
import random
import time
from datetime import datetime

In [15]:
data = pl.select(
    pl.lit(1).sample(n=1e6, with_replacement=True)  # 先采样
    .map_elements(lambda _: random.uniform(-50, 50), return_dtype=pl.Float64)
    .alias("foo")
).to_series()

In [16]:
def test_speed(fn, window_size):
    st = time.time()
    try:
        fn(data, window_size)
        print(f'{fn.__name__:<20} {time.time()-st:>8.4f} s')
    except:
        fn(data, data, window_size)
        print(f'{fn.__name__:<20} {time.time()-st:>8.4f} s')

In [17]:
for func in tomica.operators.__all__:
    func = getattr(tomica.operators,func)
    test_speed(func,10)

ts_delay               0.0001 s
ts_delta               0.0008 s
ts_returns             0.0014 s
ts_sum                 0.0087 s
ts_product             0.0084 s
ts_max                 0.0120 s
ts_min                 0.0113 s
ts_mean                0.0082 s
ts_std                 0.0169 s
ts_rank                0.0956 s
ts_variance            0.0171 s
ts_quantile_up         0.0329 s
ts_quantile_down       0.0330 s
ts_zscore              0.0292 s
ts_robust_zscore       0.0747 s
ts_scale               0.0269 s
ts_sharpe              0.0280 s
ts_av_diff             0.0095 s
ts_min_diff            0.0132 s
ts_max_diff            0.0130 s
ts_cov                 0.0288 s
ts_beta                0.0457 s
ts_pos_count           0.0042 s
ts_neg_count           0.0039 s
decay_n                0.0027 s


In [18]:
st = time.time()
df['open'].rolling_rank(10)
print(f'{time.time()-st:.4f} s')

0.1044 s


In [19]:
st = time.time()
tomica.operators.ts_rank(df['open'],10)
print(f'{time.time()-st:.4f} s')

0.0912 s
