In [1]:
import json
import os
from typing import Optional, Tuple
from datetime import datetime
import fire

import numpy as np
from sb3_contrib.ppo_mask import MaskablePPO
from stable_baselines3.common.callbacks import BaseCallback
from alphagen.data.calculator import AlphaCalculator

from alphagen.data.expression import *
from alphagen.models.alpha_pool import AlphaPool, AlphaPoolBase
from alphagen.rl.env.wrapper import AlphaEnv
from alphagen.rl.policy import LSTMSharedNet
from alphagen.utils.random import reseed_everything
from alphagen.rl.env.core import AlphaEnvCore
from alphagen_qlib.calculator import QLibStockDataCalculator

In [2]:
class CustomCallback(BaseCallback):
    def __init__(self,
                 save_freq: int,
                 show_freq: int,
                 save_path: str,
                 valid_calculator: AlphaCalculator,
                 test_calculator: AlphaCalculator,
                 name_prefix: str = 'rl_model',
                 timestamp: Optional[str] = None,
                 verbose: int = 0):
        super().__init__(verbose)
        self.save_freq = save_freq
        self.show_freq = show_freq
        self.save_path = save_path
        self.name_prefix = name_prefix

        self.valid_calculator = valid_calculator
        self.test_calculator = test_calculator

        if timestamp is None:
            self.timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
        else:
            self.timestamp = timestamp

    def _init_callback(self) -> None:
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        return True

    def _on_rollout_end(self) -> None:
        assert self.logger is not None
        self.logger.record('pool/size', self.pool.size)
        self.logger.record('pool/significant', (np.abs(self.pool.weights[:self.pool.size]) > 1e-4).sum())
        self.logger.record('pool/best_ic_ret', self.pool.best_ic_ret)
        self.logger.record('pool/eval_cnt', self.pool.eval_cnt)
        ic_test, rank_ic_test = self.pool.test_ensemble(self.test_calculator)
        self.logger.record('test/ic', ic_test)
        self.logger.record('test/rank_ic', rank_ic_test)
        self.save_checkpoint()

    def save_checkpoint(self):
        path = os.path.join(self.save_path, f'{self.name_prefix}_{self.timestamp}', f'{self.num_timesteps}_steps')
        self.model.save(path)   # type: ignore
        if self.verbose > 1:
            print(f'Saving model checkpoint to {path}')
        with open(f'{path}_pool.json', 'w') as f:
            json.dump(self.pool.to_dict(), f)

    def show_pool_state(self):
        state = self.pool.state
        n = len(state['exprs'])
        print('---------------------------------------------')
        for i in range(n):
            weight = state['weights'][i]
            expr_str = str(state['exprs'][i])
            ic_ret = state['ics_ret'][i]
            print(f'> Alpha #{i}: {weight}, {expr_str}, {ic_ret}')
        print(f'>> Ensemble ic_ret: {state["best_ic_ret"]}')
        print('---------------------------------------------')

    @property
    def pool(self) -> AlphaPoolBase:
        return self.env_core.pool

    @property
    def env_core(self) -> AlphaEnvCore:
        return self.training_env.envs[0].unwrapped  # type: ignore

In [3]:
import qlib
from qlib.data import D

# 1min data cn
# freq=1min
qlib.init(provider_uri="~/.qlib/qlib_data/my_data_60min")
#inst = D.list_instruments(D.instruments("all"), freq="15min", as_list=True)
# get 100 symbols
#df = D.features(D.instruments("all"), ["$close", '$open','$high', '$low', '$volume','$vwap'], freq="15min")
# get all symbol data
# df = D.features(D.instruments("all"), ["$close"], freq="1min")

[21480:MainThread](2024-07-16 16:19:02,581) INFO - qlib.Initialization - [config.py:416] - default_conf: client.
[21480:MainThread](2024-07-16 16:19:02,921) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[21480:MainThread](2024-07-16 16:19:02,923) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': WindowsPath('C:/Users/in.lucas.lu/.qlib/qlib_data/my_data_60min')}


In [7]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,$close,$open,$high,$low,$volume,$vwap
instrument,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1INCH,2021-01-01 00:00:00,1.3135,1.3623,1.3974,1.2796,1.002782e+06,1.330167
1INCH,2021-01-01 00:15:00,1.3146,1.3135,1.3326,1.3003,5.006487e+05,1.325394
1INCH,2021-01-01 00:30:00,1.3289,1.3133,1.3428,1.3094,3.569408e+05,1.325708
1INCH,2021-01-01 00:45:00,1.3222,1.3283,1.3300,1.3086,2.561110e+05,1.325050
1INCH,2021-01-01 01:00:00,1.3095,1.3202,1.3240,1.2800,7.142080e+05,1.319865
...,...,...,...,...,...,...,...
XTZ,2024-07-12 10:30:00,0.7410,0.7390,0.7410,0.7390,2.865070e+04,3.344620
XTZ,2024-07-12 10:45:00,0.7410,0.7410,0.7410,0.7390,1.628960e+04,3.344615
XTZ,2024-07-12 11:00:00,0.7380,0.7410,0.7410,0.7370,1.736610e+04,3.344609
XTZ,2024-07-12 11:15:00,0.7400,0.7370,0.7400,0.7370,3.666780e+04,3.344597


In [18]:
24* 7

168

In [79]:
device = torch.device('cpu')
close = Feature(FeatureType.CLOSE)
target = Ref(close, -2) / close - 1
freq = '60min'

# You can re-implement AlphaCalculator instead of using QLibStockDataCalculator.
data_train = StockData(instrument='all',
                       start_time='2021-01-14 00:00:00',
                       end_time = '2022-12-31 00:00:00', 
                       max_backtrack_days = 96,
                       max_future_days = 12, device=device)

In [5]:
data_train._get_data()

(tensor([[[4.1827e+01, 3.3950e+04, 9.9771e-03, 1.1002e+03, 2.1636e+00,
           2.2577e-01],
          [4.1123e+01, 3.3918e+04, 9.7789e-03, 1.0812e+03, 2.0676e+00,
           2.2900e-01],
          [4.1868e+01, 3.4108e+04, 1.0224e-02, 1.1103e+03, 2.1761e+00,
           2.3530e-01],
          [4.0676e+01, 3.3288e+04, 9.6506e-03, 1.0632e+03, 2.0587e+00,
           2.2139e-01],
          [2.4133e+05, 6.1064e+03, 4.1364e+08, 9.9324e+04, 3.5382e+05,
           8.5247e+07],
          [3.9563e+01, 3.1836e+04, 9.8556e-03, 9.5114e+02, 2.0570e+00,
           2.3049e-01]],
 
         [[4.1123e+01, 3.3918e+04, 9.7639e-03, 1.0810e+03, 2.0676e+00,
           2.2901e-01],
          [4.0675e+01, 3.3811e+04, 9.6628e-03, 1.0718e+03, 2.0801e+00,
           2.2564e-01],
          [4.1203e+01, 3.4019e+04, 9.9299e-03, 1.0862e+03, 2.1452e+00,
           2.3021e-01],
          [4.0453e+01, 3.3513e+04, 9.6229e-03, 1.0573e+03, 2.0523e+00,
           2.2464e-01],
          [1.0936e+05, 3.2377e+03, 1.5382e+08, 

In [80]:
calculator_train = QLibStockDataCalculator(data_train, target)

In [81]:
data_valid = StockData(instrument='all',
                       start_time='2023-01-01 00:00:00',
                       end_time='2023-12-30 00:00:00', 
                       max_backtrack_days = 96,
                       max_future_days = 12,device=device)
calculator_valid = QLibStockDataCalculator(data_valid, target)

In [10]:
data_valid.n_days + 96 + 12

8820

In [8]:
data_valid._get_data()

(tensor([[[2.4670e+02, 1.6706e+04, 7.3720e-02, 1.2116e+03, 1.1100e+01,
           3.6750e-01],
          [2.4580e+02, 1.6686e+04, 7.3140e-02, 1.2084e+03, 1.1010e+01,
           3.6600e-01],
          [2.4740e+02, 1.6725e+04, 7.3910e-02, 1.2131e+03, 1.1120e+01,
           3.6870e-01],
          [2.4560e+02, 1.6667e+04, 7.2980e-02, 1.2063e+03, 1.1000e+01,
           3.6500e-01],
          [8.6316e+03, 6.2107e+03, 3.2080e+07, 9.9556e+03, 7.4000e+04,
           1.2614e+07],
          [3.2177e+02, 2.9911e+04, 1.6438e-01, 2.1355e+03, 5.9713e+01,
           7.4588e-01]],
 
         [[2.4580e+02, 1.6686e+04, 7.3140e-02, 1.2084e+03, 1.1010e+01,
           3.6600e-01],
          [2.4610e+02, 1.6696e+04, 7.2900e-02, 1.2094e+03, 1.0910e+01,
           3.6390e-01],
          [2.4680e+02, 1.6755e+04, 7.3280e-02, 1.2158e+03, 1.1030e+01,
           3.6680e-01],
          [2.4470e+02, 1.6650e+04, 7.2070e-02, 1.2046e+03, 1.0880e+01,
           3.6270e-01],
          [6.9101e+03, 8.3395e+03, 1.0674e+08, 

In [82]:
data_test = StockData(instrument='all',
                      start_time='2024-01-01 00:00:00',
                      end_time='2024-07-10 00:00:00', 
                      max_backtrack_days = 96,
                      max_future_days = 12,device=device)
calculator_test = QLibStockDataCalculator(data_test, target)

In [12]:
data_test._get_data()

(tensor([[[3.1700e+02, 4.2140e+04, 9.0000e-02, 2.2917e+03, 1.0190e+02,
           6.2070e-01],
          [3.1780e+02, 4.2368e+04, 9.0380e-02, 2.3010e+03, 1.0257e+02,
           6.2260e-01],
          [3.1920e+02, 4.2430e+04, 9.0500e-02, 2.3046e+03, 1.0291e+02,
           6.2290e-01],
          [3.1640e+02, 4.2140e+04, 8.9960e-02, 2.2910e+03, 1.0175e+02,
           6.2000e-01],
          [2.0218e+04, 9.7778e+02, 1.0814e+07, 6.5715e+03, 1.0352e+05,
           6.1588e+06],
          [3.1444e+02, 2.8325e+04, 1.5184e-01, 2.0685e+03, 4.9966e+01,
           6.9156e-01]],
 
         [[3.1780e+02, 4.2368e+04, 9.0390e-02, 2.3010e+03, 1.0259e+02,
           6.2260e-01],
          [3.1780e+02, 4.2171e+04, 8.9930e-02, 2.2898e+03, 1.0130e+02,
           6.2140e-01],
          [3.1900e+02, 4.2433e+04, 9.0490e-02, 2.3034e+03, 1.0293e+02,
           6.2340e-01],
          [3.1700e+02, 4.2157e+04, 8.9790e-02, 2.2896e+03, 1.0111e+02,
           6.2120e-01],
          [1.5944e+04, 7.1070e+02, 1.3889e+07, 

In [88]:
pool_capacity = 20
instruments = 'all'
seed = 12
steps = 200000

In [None]:
pool = AlphaPool(
    capacity=pool_capacity,
    calculator=calculator_train,
    ic_lower_bound=None,
    l1_alpha=5e-3
)
env = AlphaEnv(pool=pool, device=device, print_expr=True)

name_prefix = f"new_{instruments}_{pool_capacity}_{seed}"
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')

checkpoint_callback = CustomCallback(
    save_freq=10000,
    show_freq=10000,
    save_path='/path/for/checkpoints',
    valid_calculator=calculator_valid,
    test_calculator=calculator_test,
    name_prefix=name_prefix,
    timestamp=timestamp,
    verbose=1,
)

model = MaskablePPO(
    'MlpPolicy',
    env,
    policy_kwargs=dict(
        features_extractor_class=LSTMSharedNet,
        features_extractor_kwargs=dict(
            n_layers=2, #2
            d_model=128,
            dropout=0.2,
            device=device,
        ),
    ),
    gamma=2.,
    ent_coef=0.01,
    batch_size=256, #128 
    tensorboard_log='/path/for/tb/log',
    device=device,
    verbose=1,
)
model.learn(
    total_timesteps=steps,
    callback=checkpoint_callback,
    tb_log_name=f'{name_prefix}_{timestamp}',
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to /path/for/tb/log\new_all_20_12_20240716182402_1
$volume
Ref(Div(Constant(-0.01),$close),40)
$volume
Mul(Constant(1.0),Add($volume,Constant(-0.5)))
Mul(Div(Ref(Add($low,Constant(-30.0)),20),Constant(0.01)),Mul(EMA($low,40),Sub(Constant(-2.0),Min(Mean($vwap,10),50))))
Div($vwap,$high)




---------------------------------
| pool/              |          |
|    best_ic_ret     | 0.0299   |
|    eval_cnt        | 4        |
|    significant     | 2        |
|    size            | 4        |
| rollout/           |          |
|    ep_len_mean     | 19.2     |
|    ep_rew_mean     | -0.939   |
| test/              |          |
|    ic              | 0.0265   |
|    rank_ic         | 0.0232   |
| time/              |          |
|    fps             | 429      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
$close
Ref(Sub(Div($high,Less(Constant(-0.5),$high)),EMA(Sub(Ref(Div(Constant(0.5),WMA($volume,40)),30),Constant(2.0)),30)),30)
$vwap
Less(Div($open,Constant(-0.5)),$vwap)
-------------------------------------------
| pool/                   |               |
|    best_ic_ret          | 0.0299        |
|    eval_cnt             | 5             |
|    significant          | 2           

In [30]:
from math import isnan

import pandas as pd
from alphagen.trade.base import StockPosition, StockStatus
from alphagen_qlib.calculator import QLibStockDataCalculator

from alphagen_qlib.strategy import TopKSwapNStrategy
from alphagen_qlib.utils import load_alpha_pool_by_path, load_recent_data

  from .autonotebook import tqdm as notebook_tqdm


In [31]:
POOL_PATH = '/path/for/checkpoints/new_all_10_4_20240716085029/20480_steps_pool.json'

In [15]:
cal: np.ndarray = D.calendar(freq='60min')

In [17]:
import pandas as pd

In [18]:
start_index = cal.searchsorted(pd.Timestamp('2024-07-06 07:00:00'))  # type: ignore

In [20]:
end_index = cal.searchsorted(pd.Timestamp('2024-07-09 00:00:00'))

In [19]:
start_index

30761

In [54]:
real_end_time = cal[end_index]

In [27]:
real_start_time

Timestamp('2024-07-02 07:00:00')

In [28]:
cal.searchsorted(real_start_time)

30665

In [29]:
end_index

30826

In [37]:
from typing import List, Union, Optional, Tuple
from enum import IntEnum
import numpy as np
import pandas as pd
import torch

In [38]:
class FeatureType(IntEnum):
    OPEN = 0
    CLOSE = 1
    HIGH = 2
    LOW = 3
    VOLUME = 4
    VWAP = 5

In [40]:
features: Optional[List[FeatureType]] = None

In [41]:
features = features if features is not None else list(FeatureType)

In [47]:
def _load_exprs(exprs: Union[str, List[str]]) -> pd.DataFrame:
    # This evaluates an expression on the data and returns the dataframe
    # It might throw on illegal expressions like "Ref(constant, dtime)"
    from qlib.data.dataset.loader import QlibDataLoader
    from qlib.data import D
    if not isinstance(exprs, list):
        exprs = [exprs]
    cal: np.ndarray = D.calendar(freq='60min')
    start_index = cal.searchsorted(pd.Timestamp(self._start_time))  # type: ignore
    end_index = cal.searchsorted(pd.Timestamp(self._end_time))  # type: ignore
    real_start_time = cal[start_index - self.max_backtrack_days]
    if cal[end_index] != pd.Timestamp(self._end_time):
        end_index -= 1
    real_end_time = cal[end_index + self.max_future_days]
    return (QlibDataLoader(config=features, freq='60min')  # type: ignore
            .load(self._instrument, real_start_time, real_end_time))

In [44]:
features = ['$' + f.name.lower() for f in features]

In [45]:
features

['$open', '$close', '$high', '$low', '$volume', '$vwap']

In [49]:
from qlib.data.dataset.loader import QlibDataLoader

In [56]:
df = (QlibDataLoader(config=features, freq='60min').load('all', real_start_time, real_end_time))

In [57]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,$open,$close,$high,$low,$volume,$vwap
datetime,instrument,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-07-02 07:00:00,BNB,579.599976,578.599976,579.799988,577.299988,8.490010e+03,
2024-07-02 07:00:00,BTC,62866.011719,62629.988281,62880.000000,62580.000000,1.022416e+03,
2024-07-02 07:00:00,DOGE,0.123390,0.123000,0.123390,0.122850,1.398119e+07,
2024-07-02 07:00:00,ETH,3448.719971,3439.000000,3448.719971,3434.169922,9.038815e+03,
2024-07-02 07:00:00,SOL,147.759995,147.429993,147.860001,146.979996,1.116679e+05,
...,...,...,...,...,...,...,...
2024-07-09 00:00:00,BTC,56714.609375,56531.050781,56765.851562,56289.449219,7.773062e+02,
2024-07-09 00:00:00,DOGE,0.107570,0.106720,0.107590,0.106340,2.647937e+07,
2024-07-09 00:00:00,ETH,3019.010010,3023.830078,3035.629883,3004.000000,7.826203e+03,
2024-07-09 00:00:00,SOL,139.809998,138.250000,139.839996,137.899994,1.196869e+05,


In [58]:
df = df.stack().unstack(level=1)

In [71]:
df.head(15)

Unnamed: 0_level_0,instrument,BNB,BTC,DOGE,ETH,SOL,XRP
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-07-02 07:00:00,$open,579.599976,62866.011719,0.12339,3448.719971,147.759995,0.4796
2024-07-02 07:00:00,$close,578.599976,62629.988281,0.123,3439.0,147.429993,0.4786
2024-07-02 07:00:00,$high,579.799988,62880.0,0.12339,3448.719971,147.860001,0.481
2024-07-02 07:00:00,$low,577.299988,62580.0,0.12285,3434.169922,146.979996,0.4781
2024-07-02 07:00:00,$volume,8490.009766,1022.41571,13981190.0,9038.81543,111667.882812,26678740.0
2024-07-02 08:00:00,$open,578.599976,62630.0,0.12299,3439.01001,147.429993,0.4786
2024-07-02 08:00:00,$close,579.400024,62670.011719,0.12322,3441.709961,148.0,0.482
2024-07-02 08:00:00,$high,580.200012,62746.550781,0.12351,3447.0,148.399994,0.4834
2024-07-02 08:00:00,$low,577.299988,62401.230469,0.12278,3431.600098,146.880005,0.4781
2024-07-02 08:00:00,$volume,6634.839844,1141.203613,13184370.0,7711.488281,77030.523438,23406440.0


In [69]:
values = df.values

In [70]:
values

array([[5.7959998e+02, 6.2866012e+04, 1.2339000e-01, 3.4487200e+03,
        1.4775999e+02, 4.7960001e-01],
       [5.7859998e+02, 6.2629988e+04, 1.2300000e-01, 3.4390000e+03,
        1.4742999e+02, 4.7860000e-01],
       [5.7979999e+02, 6.2880000e+04, 1.2339000e-01, 3.4487200e+03,
        1.4786000e+02, 4.8100001e-01],
       ...,
       [5.1100000e+02, 5.6765852e+04, 1.0759000e-01, 3.0356299e+03,
        1.3984000e+02, 4.3160000e-01],
       [5.0720001e+02, 5.6289449e+04, 1.0634000e-01, 3.0040000e+03,
        1.3789999e+02, 4.2870000e-01],
       [6.0993921e+03, 7.7730621e+02, 2.6479372e+07, 7.8262031e+03,
        1.1968687e+05, 1.4320736e+07]], dtype=float32)

In [72]:
values = values.reshape((-1, 5, values.shape[-1]))

In [73]:
len(values)

162

In [68]:
810 / 36

22.5

In [194]:

cal: np.ndarray = D.calendar(freq='60min')
start_index = cal.searchsorted(pd.Timestamp(self._start_time))  # type: ignore
end_index = cal.searchsorted(pd.Timestamp(self._end_time))  # type: ignore
real_start_time = cal[start_index - self.max_backtrack_days]
if cal[end_index] != pd.Timestamp(self._end_time):
    end_index -= 1
real_end_time = cal[end_index + self.max_future_days]

66

In [31]:
data = StockData(instrument='all',
                      start_time='2024-07-06 07:00:00',
                      end_time='2024-07-09 00:00:00', 
                      max_future_days = 0,device=device)

In [33]:
y

DatetimeIndex(['2024-07-02 07:00:00', '2024-07-02 08:00:00',
               '2024-07-02 09:00:00', '2024-07-02 10:00:00',
               '2024-07-02 11:00:00', '2024-07-02 12:00:00',
               '2024-07-02 13:00:00', '2024-07-02 14:00:00',
               '2024-07-02 15:00:00', '2024-07-02 16:00:00',
               ...
               '2024-07-08 15:00:00', '2024-07-08 16:00:00',
               '2024-07-08 17:00:00', '2024-07-08 18:00:00',
               '2024-07-08 19:00:00', '2024-07-08 20:00:00',
               '2024-07-08 21:00:00', '2024-07-08 22:00:00',
               '2024-07-08 23:00:00', '2024-07-09 00:00:00'],
              dtype='datetime64[ns]', name='datetime', length=162, freq=None)

In [32]:
x, y, z = data._get_data()

In [14]:
x.shape[0]

135

In [11]:
6 * 24

144

In [154]:
from typing import List, Optional, Tuple
from torch import Tensor
import torch
from alphagen.data.calculator import AlphaCalculator
from alphagen.data.expression import Expression
from alphagen.utils.correlation import batch_pearsonr, batch_spearmanr
from alphagen.utils.pytorch_utils import normalize_by_day
from alphagen_qlib.stock_data import StockData

In [155]:
def _calc_alpha(self, expr: Expression) -> Tensor:
    return normalize_by_day(expr.evaluate(self.data))

In [168]:
exprs[1]

Mul(Constant(-0.01),Mul($high,$vwap))

In [161]:
len(exprs[1].evaluate(data))

99

In [196]:
len(exprs[1].evaluate(data))

39

In [9]:
135 - 96

39

In [8]:
data.n_days

(135, 39)

In [175]:
exprs, weights = load_alpha_pool_by_path(POOL_PATH)

In [177]:
calculator = QLibStockDataCalculator(data=data, target=None)

In [178]:
ensemble_alpha = calculator.make_ensemble_alpha(exprs, weights)

RuntimeError: Trying to create tensor with negative dimension -6: [-6, 6]

In [145]:
ensemble_alpha

tensor([[-5.1235e-01, -5.6124e-01,  2.1454e+00, -5.4018e-01, -5.0260e-01,
         -2.9005e-02],
        [-3.4501e-02, -1.1241e-01,  5.2527e-03,  1.2860e-01,  3.0895e-02,
         -1.7837e-02],
        [-4.5464e-01,  2.1034e+00, -4.4337e-01, -4.1368e-01, -4.6005e-01,
         -3.3163e-01],
        [-4.7707e-02, -1.4693e-01,  5.3554e-03,  1.9748e-01,  2.1496e-02,
         -2.9694e-02],
        [-1.0370e-02,  3.4857e-01, -2.2415e-01, -4.2439e-02, -4.8880e-02,
         -2.2737e-02],
        [-5.0416e-01, -5.5363e-01,  2.1653e+00, -5.3161e-01, -4.9546e-01,
         -8.0471e-02],
        [-2.9145e-02, -7.3464e-02,  4.8661e-02,  6.9897e-02,  4.6493e-03,
         -2.0597e-02],
        [-3.6975e-01,  2.1864e+00, -7.5323e-02, -3.2783e-01, -3.7709e-01,
         -1.0364e+00],
        [-4.4618e-02, -8.9829e-02,  1.9016e-02,  6.9157e-02,  3.0421e-02,
          1.5852e-02],
        [-2.1795e-02,  3.4021e-01, -2.5312e-01, -4.7209e-02, -5.7502e-02,
          3.9409e-02],
        [-5.0379e-01, -5.5487e

In [146]:
ensemble_alpha.shape

torch.Size([99, 6])

In [131]:
columns = None
if isinstance(ensemble_alpha, list):
    ensemble_alpha = torch.stack(ensemble_alpha, dim=2)
if len(ensemble_alpha.shape) == 2:
    ensemble_alpha = ensemble_alpha.unsqueeze(2)
if columns is None:
    columns = [str(i) for i in range(ensemble_alpha.shape[2])]
n_days, n_stocks, n_columns = ensemble_alpha.shape

In [136]:
ensemble_alpha

tensor([[[-5.1235e-01],
         [-5.6124e-01],
         [ 2.1454e+00],
         [-5.4018e-01],
         [-5.0260e-01],
         [-2.9005e-02]],

        [[-3.4501e-02],
         [-1.1241e-01],
         [ 5.2527e-03],
         [ 1.2860e-01],
         [ 3.0895e-02],
         [-1.7837e-02]],

        [[-4.5464e-01],
         [ 2.1034e+00],
         [-4.4337e-01],
         [-4.1368e-01],
         [-4.6005e-01],
         [-3.3163e-01]],

        [[-4.7707e-02],
         [-1.4693e-01],
         [ 5.3554e-03],
         [ 1.9748e-01],
         [ 2.1496e-02],
         [-2.9694e-02]],

        [[-1.0370e-02],
         [ 3.4857e-01],
         [-2.2415e-01],
         [-4.2439e-02],
         [-4.8880e-02],
         [-2.2737e-02]],

        [[-5.0416e-01],
         [-5.5363e-01],
         [ 2.1653e+00],
         [-5.3161e-01],
         [-4.9546e-01],
         [-8.0471e-02]],

        [[-2.9145e-02],
         [-7.3464e-02],
         [ 4.8661e-02],
         [ 6.9897e-02],
         [ 4.6493e-03],
    

In [148]:
ensemble_alpha = ensemble_alpha.reshape(-1, n_columns)

In [150]:
len(ensemble_alpha)

594

In [151]:
ensemble_alpha.detach().cpu().numpy()

array([[-5.12352705e-01],
       [-5.61243296e-01],
       [ 2.14538074e+00],
       [-5.40176570e-01],
       [-5.02603292e-01],
       [-2.90048588e-02],
       [-3.45005058e-02],
       [-1.12410352e-01],
       [ 5.25267422e-03],
       [ 1.28601134e-01],
       [ 3.08945030e-02],
       [-1.78372674e-02],
       [-4.54636246e-01],
       [ 2.10336113e+00],
       [-4.43368256e-01],
       [-4.13682491e-01],
       [-4.60047513e-01],
       [-3.31626534e-01],
       [-4.77068722e-02],
       [-1.46928862e-01],
       [ 5.35535812e-03],
       [ 1.97478831e-01],
       [ 2.14955024e-02],
       [-2.96937954e-02],
       [-1.03695840e-02],
       [ 3.48571509e-01],
       [-2.24146023e-01],
       [-4.24389318e-02],
       [-4.88796458e-02],
       [-2.27371808e-02],
       [-5.04164398e-01],
       [-5.53631425e-01],
       [ 2.16534281e+00],
       [-5.31613231e-01],
       [-4.95462775e-01],
       [-8.04708153e-02],
       [-2.91454010e-02],
       [-7.34641030e-02],
       [ 4.8

In [147]:
if max_future_days == 0:
    date_index = self._dates[self.max_backtrack_days:]
index = pd.MultiIndex.from_product([date_index, self._stock_ids])
data = data.reshape(-1, n_columns)

NameError: name 'self' is not defined

In [127]:
pd.DataFrame(ensemble_alpha.detach().cpu().numpy(), index=index, columns=columns)

NameError: name 'index' is not defined

In [110]:
df = data.make_dataframe(ensemble_alpha)

ValueError: Shape of passed values is (594, 1), indices imply (828, 1)

In [111]:
828 - 594

234

In [88]:
ensemble_alpha

tensor([[-0.4769, -0.5343,  2.1920, -0.5030, -0.4682, -0.2095],
        [-0.0528, -0.0985,  0.0304,  0.0304,  0.1130, -0.0225],
        [-0.3948,  2.1580, -0.3673, -0.3512, -0.4071, -0.6377],
        ...,
        [ 0.0075,  0.3769, -0.1996, -0.0593, -0.0163, -0.1093],
        [-0.4813, -0.5304,  2.1837, -0.5578, -0.4664, -0.1477],
        [-0.0328, -0.0645,  0.0479,  0.0424,  0.1204, -0.1134]])

In [89]:
data_test._get_data()

(tensor([[[3.1700e+02, 4.2140e+04, 9.0000e-02, 2.2917e+03, 1.0190e+02,
           6.2070e-01],
          [3.1780e+02, 4.2368e+04, 9.0380e-02, 2.3010e+03, 1.0257e+02,
           6.2260e-01],
          [3.1920e+02, 4.2430e+04, 9.0500e-02, 2.3046e+03, 1.0291e+02,
           6.2290e-01],
          [3.1640e+02, 4.2140e+04, 8.9960e-02, 2.2910e+03, 1.0175e+02,
           6.2000e-01],
          [2.0218e+04, 9.7778e+02, 1.0814e+07, 6.5715e+03, 1.0352e+05,
           6.1588e+06],
          [3.1780e+02, 4.2368e+04, 9.0390e-02, 2.3010e+03, 1.0259e+02,
           6.2260e-01]],
 
         [[3.1780e+02, 4.2171e+04, 8.9930e-02, 2.2898e+03, 1.0130e+02,
           6.2140e-01],
          [3.1900e+02, 4.2433e+04, 9.0490e-02, 2.3034e+03, 1.0293e+02,
           6.2340e-01],
          [3.1700e+02, 4.2157e+04, 8.9790e-02, 2.2896e+03, 1.0111e+02,
           6.2120e-01],
          [1.5944e+04, 7.1070e+02, 1.3889e+07, 7.0175e+03, 1.3826e+05,
           3.7779e+06],
          [3.1780e+02, 4.2171e+04, 8.9930e-02, 

In [90]:
import datetime
import json
from typing import List, Tuple
from alphagen.data.expression import *
from alphagen_generic.features import *

from alphagen_qlib.stock_data import StockData

In [114]:
d_list = [1, 2, 3, 4, 5, 6, 7]

In [117]:
len(d_list)

7

In [121]:
q_list = [d_list[i] + 1 for i in range(len(d_list))]

In [124]:
q_list

[2, 3, 4, 5, 6, 7, 8]

In [122]:
sum(q_list)

35

In [91]:
today = datetime.date.today().strftime('%Y-%m-%d %H:%M:%S')

In [54]:
today

'2024-07-15 00:00:00'

In [28]:
import pandas as pd

In [38]:
pd.date_range(start='1/1/2018', end='1/2/2024', freq='M')

DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
               '2018-05-31', '2018-06-30', '2018-07-31', '2018-08-31',
               '2018-09-30', '2018-10-31', '2018-11-30', '2018-12-31',
               '2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31',
               '2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31',
               '2022-01-31', '2022-02-28', '2022-03-31', '2022-04-30',
               '2022-05-31', '2022-06-30', '2022-07-31', '2022-08-31',
      

In [None]:
'2006-01-31', '2006-02-28', '2006-03-31', '2006-04-30', '2006-05-31', '2006-06-30', '2006-07-31', '2006-08-31', '2006-09-30', '2006-10-31', '2006-11-30', '2006-12-31', '2007-01-31', 
'2007-02-28', '2007-03-31', '2006-01-31', '2006-02-28', '2006-03-31', '2006-04-30', '2006-05-31', '2006-06-30', '2006-07-31', '2006-08-31', '2006-09-30', '2006-10-31', '2006-11-30', '2006-12-31' 