In [1]:
import dolphindb as ddb
import torch
import torch.nn as nn 
from net import SimpleNet, MultiChannNet
from tqdm import tqdm
import os
from dolphindb_tools.dataloader import DDBDataLoader
import numpy as np

In [2]:
def constructDataLoader():
    s = ddb.session()
    s.connect("127.0.0.1", 11281, "admin", "123456")
    
    # 加载库表
    dbPath = "dfs://ai_dataloader"
    tbName = "wide_factor_table_test"
    t1 = s.loadTable(tableName=tbName,dbPath=dbPath)
    symbols = t1.exec(['distinct security_code']).toList().tolist()
    symbols = ["`"+i for i in symbols]
    times = t1.exec(['distinct trade_time']).toList().tolist()
    times = [i.strftime("%Y.%m.%d") for i in times]
    times = list(set(times))
    
    sql = f"""select * from loadTable("{dbPath}", "{tbName}")"""
    
    dataloader = DDBDataLoader(
        s, sql, targetCol=["label"], batchSize=64, shuffle=True,
        windowSize=[1, 1], windowStride=[1, 1],
        repartitionCol="date(trade_time)", repartitionScheme=times,
        groupCol="security_code", groupScheme=symbols,
        seed=0, offset=0,
        excludeCol=["label"], device="cuda",
        prefetchBatch=5, prepartitionNum=3
    )
    return dataloader

In [6]:

# 创建一个随机的三维张量
# tensor = torch.randn(2, 3, 4)

# # 对中间维度进行批归一化
# batch_norm = nn.BatchNorm1d(4)
# # output = batch_norm(tensor)
# output = batch_norm(tensor.transpose(1, 2)).transpose(1, 2)

tensor = torch.randn(3, 2)
batch_norm = nn.BatchNorm1d(2)
output = batch_norm(tensor)

print("批归一化前的张量：", tensor)
print("批归一化后的张量：", output)

批归一化前的张量： tensor([[-0.5726,  0.5194],
        [ 1.0865, -1.8410],
        [-1.3326, -0.2792]])
批归一化后的张量： tensor([[-0.2967,  1.0742],
        [ 1.3458, -1.3337],
        [-1.0491,  0.2596]], grad_fn=<NativeBatchNormBackward0>)


In [8]:
output.transpose(0, 1)[0].detach().flatten().numpy().mean()

3.973643e-08

In [41]:
output[0].transpose(0, 1)[0].detach().numpy().std()

0.8689510339124705

In [3]:
def get_model(model_name, model_pth_file):
    if model_name == "simple_model":
        model = SimpleNet(features_in=60)
    elif model_name == "multichann_model":
        model = MultiChannNet(features_in=6)
    model.load_state_dict(torch.load(model_pth_file))
    model = model.to("cuda")
    return model

In [4]:
def apply_and_save(model, factor_name):
    s = ddb.session()
    s.connect("127.0.0.1", 11281, "admin", "123456")
    # 加载库表
    dbPath = "dfs://ai_dataloader"
    tbName = "wide_factor_table_test"
    df = s.loadTable(tableName=tbName,dbPath=dbPath).toDF()
    
    dbPath_write = "dfs://MIN_FACTOR_TSDB"
    tbName_write = "min_factor"
    batchSize = 64
    
    model.eval()
    with torch.no_grad():
        for li in tqdm(range(0, df.shape[0], batchSize), mininterval=1):
            ri = min(li+batchSize, df.shape[0])
            
            # apply model to x
            x_df = df.iloc[li:ri, 2:-1]
            x = torch.tensor(x_df.values).reshape(-1, 1, 60)
            x = x.to("cuda")
            y_pred = model(x)
            
            # save factor values into table
            basicInfo_df = df.iloc[li:ri, :2]
            basicInfo_df["factor_code"] = factor_name
            basicInfo_df["value"] = y_pred.flatten().cpu().detach().numpy() 
            s.run("tableInsert{{loadTable('{db}', `{tb})}}".format(db=dbPath_write,tb=tbName_write), basicInfo_df)
        
        

In [91]:
# load model parameters and create a model object
torch.set_default_tensor_type(torch.DoubleTensor)
model_name = "simple_model"
model_pth_file = f"./models/{model_name}_old.pth"
model = get_model(model_name, model_pth_file)

# # get a dataloader
# dataloader = constructDataLoader()

apply_and_save(model, factor_name=model_name)

100%|██████████| 9237/9237 [06:11<00:00, 24.87it/s]  


In [5]:
# load model parameters and create a model object
torch.set_default_tensor_type(torch.DoubleTensor)
model_name = "multichann_model"
model_pth_file = f"./models/{model_name}_old.pth"
model = get_model(model_name, model_pth_file)

# # get a dataloader
# dataloader = constructDataLoader()
apply_and_save(model, factor_name=model_name)

100%|██████████| 9237/9237 [06:29<00:00, 23.70it/s]


In [23]:
model.eval()
y_pred_list = []
with torch.no_grad():
    for x, y in tqdm(dataloader, mininterval=1):
        x = x.to("cuda")
        y = y.to("cuda")
        y_pred = model(x)
        y_pred_list.append(y_pred)

4762it [02:26, 32.85it/s]Exception ignored in: <function DataManager.__del__ at 0x7ff2b566d4d0>
Traceback (most recent call last):
  File "/home/wangzirui/miniconda3/envs/myenv/lib/python3.7/site-packages/dolphindb_tools/dataloader/datamanager.py", line 56, in __del__
    self.exit()
  File "/home/wangzirui/miniconda3/envs/myenv/lib/python3.7/site-packages/dolphindb_tools/dataloader/datamanager.py", line 63, in exit
    self.join()
  File "/home/wangzirui/miniconda3/envs/myenv/lib/python3.7/site-packages/dolphindb_tools/dataloader/datamanager.py", line 51, in join
    self.back_thread.join()
  File "/home/wangzirui/miniconda3/envs/myenv/lib/python3.7/threading.py", line 1041, in join
    raise RuntimeError("cannot join current thread")
RuntimeError: cannot join current thread
8749it [04:31, 31.84it/s]Exception ignored in: <function DataManager.__del__ at 0x7ff2b566d4d0>
Traceback (most recent call last):
  File "/home/wangzirui/miniconda3/envs/myenv/lib/python3.7/site-packages/dolphind

KeyboardInterrupt: 

In [32]:
torch.cat(y_pred_list[:2]).shape

torch.Size([128, 1, 1])