In [1]:
use_gpu = True
use_ramdon_split = False
use_dataparallel = True

# 模型选择: "baseline" | "baseline_large" | "vit"
model_type = "baseline"

In [2]:
import os
import sys
sys.path.insert(0, '..')

if use_gpu:
    visible_devices = "0,1,2,3,4,5,6,7"
    os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices
    batch_size = 128 * len(visible_devices.split(","))
import time
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split



torch.manual_seed(42)

IMAGE_WIDTH = {5: 15, 20: 60, 60: 180}
IMAGE_HEIGHT = {5: 32, 20: 64, 60: 96}  

## load data

here we choose 1993-2001 data as our training(include validation) data, the remaining will be used in testing.

In [3]:
year_list = np.arange(1993,2001,1)

images = []
label_df = []
for year in year_list:
    images.append(np.memmap(os.path.join("../data/monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_images.dat"), dtype=np.uint8, mode='r').reshape(
                        (-1, IMAGE_HEIGHT[20], IMAGE_WIDTH[20])))
    label_df.append(pd.read_feather(os.path.join("../data/monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_labels_w_delay.feather")))
    
images = np.concatenate(images)
label_df = pd.concat(label_df)

print(images.shape)
print(label_df.shape)

(793019, 64, 60)
(793019, 8)


## build dataset

In [4]:
class MyDataset(Dataset):
    
    def __init__(self, img, label):
        self.img = torch.Tensor(img.copy())
        self.label = torch.Tensor(label)
        self.len = len(img)
  
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return self.img[idx], self.label[idx]

Split method (not random split is recommended)

In [5]:
if not use_ramdon_split:
    train_val_ratio = 0.7
    split_idx = int(images.shape[0] * 0.7)
    train_dataset = MyDataset(images[:split_idx], (label_df.Ret_20d > 0).values[:split_idx])
    val_dataset = MyDataset(images[split_idx:], (label_df.Ret_20d > 0).values[split_idx:])
else:
    dataset = MyDataset(images, (label_df.Ret_20d > 0).values)
    train_val_ratio = 0.7
    train_dataset, val_dataset = random_split(dataset, \
        [int(dataset.len*train_val_ratio), dataset.len-int(dataset.len*train_val_ratio)], \
        generator=torch.Generator().manual_seed(42))
    del dataset

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

## models

In [6]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.)
    elif isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)

In [7]:
from models import baseline, baseline_large, vit

# 由于设置了 CUDA_VISIBLE_DEVICES="6,7"，物理 GPU 6 变成 cuda:0，GPU 7 变成 cuda:1
device = 'cuda' if use_gpu else 'cpu'
export_onnx = True

# 根据 model_type 选择模型
if model_type == "baseline":
    net = baseline.Net().to(device)
    onnx_path = "../cnn_baseline.onnx"
elif model_type == "baseline_large":
    net = baseline_large.Net().to(device)
    onnx_path = "../cnn_baseline_large.onnx"
elif model_type == "vit":
    net = vit.Net().to(device)
    onnx_path = "../vit.onnx"
else:
    raise ValueError(f"Unknown model_type: {model_type}")

print(f"Using model: {model_type}")
net.apply(init_weights)

if export_onnx:
    import torch.onnx
    x = torch.randn([1,1,64,60]).to(device)
    torch.onnx.export(net,               # model being run
                      x,                         # model input (or a tuple for multiple inputs)
                      onnx_path,                 # where to save the model (can be a file or file-like object)
                      export_params=False,        # store the trained parameter weights inside the model file
                      opset_version=10,          # the ONNX version to export the model to
                      do_constant_folding=False,  # whether to execute constant folding for optimization
                      input_names = ['input_images'],   # the model's input names
                      output_names = ['output_prob'], # the model's output names
                      dynamic_axes={'input_images' : {0 : 'batch_size'},    # variable length axes
                                     'output_prob' : {0 : 'batch_size'}})


Using model: baseline


### Profiling

In [8]:
count = 0
for name, parameters in net.named_parameters():
    print(name, ':', parameters.size())
    count += parameters.numel()
print('total_parameters : {}'.format(count))

layer1.0.weight : torch.Size([64, 1, 5, 3])
layer1.0.bias : torch.Size([64])
layer1.1.weight : torch.Size([64])
layer1.1.bias : torch.Size([64])
layer2.0.weight : torch.Size([128, 64, 5, 3])
layer2.0.bias : torch.Size([128])
layer2.1.weight : torch.Size([128])
layer2.1.bias : torch.Size([128])
layer3.0.weight : torch.Size([256, 128, 5, 3])
layer3.0.bias : torch.Size([256])
layer3.1.weight : torch.Size([256])
layer3.1.bias : torch.Size([256])
fc1.1.weight : torch.Size([2, 46080])
fc1.1.bias : torch.Size([2])
total_parameters : 708866


In [9]:
from thop import profile as thop_profile

flops, params = thop_profile(net, inputs=(next(iter(train_dataloader))[0].to(device),))
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_softmax() for <class 'torch.nn.modules.activation.Softmax'>.
FLOPs = 289.75693824G
Params = 0.708866M


In [10]:
from torch.profiler import profile, record_function, ProfilerActivity

inputs = next(iter(train_dataloader))[0].to(device)

with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        net(inputs)

prof.export_chrome_trace("../trace.json")
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         0.00%       0.000us         0.00%       0.000us       0.000us      21.841ms       258.62%      21.841ms      21.841ms             1  
                                        model_inference         2.83%     653.708us        99.96%      23.098ms      23.098ms       0.000us         0.00%      12.413ms      12.413ms             1  
         

## train

In [11]:
def train_loop(dataloader, net, loss_fn, optimizer, tb, global_step):
    """
    训练循环
    Args:
        tb: TensorBoard SummaryWriter
        global_step: 当前全局步数（用于记录 step 级别的 loss）
    Returns:
        running_loss: epoch 平均 loss
        global_step: 更新后的全局步数
    """
    running_loss = 0.0
    current = 0
    net.train()
    
    with tqdm(dataloader) as t:
        for batch, (X, y) in enumerate(t):
            X = X.to(device)
            y = y.to(device)
            y_pred = net(X)
            loss = loss_fn(y_pred, y.long())
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 每 100 步记录一次 loss
            if global_step % 100 == 0:
                tb.add_scalar("Loss/train_step", loss.item(), global_step)
            global_step += 1

            running_loss = (len(X) * loss.item() + running_loss * current) / (len(X) + current)
            current += len(X)
            t.set_postfix({'running_loss':running_loss})
    
    return running_loss, global_step

In [12]:
def val_loop(dataloader, net, loss_fn):

    running_loss = 0.0
    current = 0
    net.eval()
    
    with torch.no_grad():
        with tqdm(dataloader) as t:
            for batch, (X, y) in enumerate(t):
                X = X.to(device)
                y = y.to(device)
                y_pred = net(X)
                loss = loss_fn(y_pred, y.long())

                # 使用与 train_loop 相同的加权平均逻辑
                running_loss = (len(X) * loss.item() + running_loss * current) / (len(X) + current)
                current += len(X)
            
    return running_loss

In [13]:
# net = torch.load('/home/clidg/proj_2/pt/baseline_epoch_10_train_0.6865865240322523_eval_0.686580_.pt')

In [14]:
if use_gpu and use_dataparallel and 'DataParallel' not in str(type(net)):
    net = net.to(device)
    # 使用 device_ids=[0, 1] 对应物理 GPU 6 和 7
    net = nn.DataParallel(net, device_ids=[0, 1, 2, 3, 4, 5, 6, 7])

In [15]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-5)

start_epoch = 0
min_val_loss = 1e9
last_min_ind = -1
early_stopping_epoch = 10

from torch.utils.tensorboard import SummaryWriter
tb = SummaryWriter()

In [16]:
start_time = datetime.datetime.now().strftime('%Y%m%d_%H:%M:%S')
os.mkdir('../pt'+os.sep+start_time)
epochs = 100
global_step = 0  # 全局步数计数器

for t in range(start_epoch, epochs):
    print(f"Epoch {t}\n-------------------------------")
    time.sleep(0.2)
    train_loss, global_step = train_loop(train_dataloader, net, loss_fn, optimizer, tb, global_step)
    val_loss = val_loop(val_dataloader, net, loss_fn)
    
    # 记录 epoch 级别的 loss 曲线
    tb.add_scalar("Loss/train_epoch", train_loss, t)
    tb.add_scalar("Loss/val_epoch", val_loss, t)
    
    torch.save(net, '../pt'+os.sep+start_time+os.sep+'baseline_epoch_{}_train_{:5f}_val_{:5f}.pt'.format(t, train_loss, val_loss)) 
    if val_loss < min_val_loss:
        last_min_ind = t
        min_val_loss = val_loss
    elif t - last_min_ind >= early_stopping_epoch:
        break

tb.close()  # 关闭 TensorBoard writer
print('Done!')
print('Best epoch: {}, val_loss: {}'.format(last_min_ind, min_val_loss))

Epoch 0
-------------------------------


100%|██████████| 543/543 [00:24<00:00, 22.22it/s, running_loss=1.03]
100%|██████████| 233/233 [00:07<00:00, 29.74it/s]


Epoch 1
-------------------------------


100%|██████████| 543/543 [00:21<00:00, 25.40it/s, running_loss=0.881]
100%|██████████| 233/233 [00:06<00:00, 35.42it/s]


Epoch 2
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.35it/s, running_loss=0.823]
100%|██████████| 233/233 [00:06<00:00, 35.53it/s]


Epoch 3
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.80it/s, running_loss=0.795]
100%|██████████| 233/233 [00:06<00:00, 35.92it/s]


Epoch 4
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.55it/s, running_loss=0.775]
100%|██████████| 233/233 [00:06<00:00, 35.72it/s]


Epoch 5
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.55it/s, running_loss=0.763]
100%|██████████| 233/233 [00:06<00:00, 35.94it/s]


Epoch 6
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.49it/s, running_loss=0.752]
100%|██████████| 233/233 [00:06<00:00, 35.94it/s]


Epoch 7
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.30it/s, running_loss=0.745]
100%|██████████| 233/233 [00:06<00:00, 36.86it/s]


Epoch 8
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.30it/s, running_loss=0.737]
100%|██████████| 233/233 [00:06<00:00, 35.83it/s]


Epoch 9
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.57it/s, running_loss=0.731]
100%|██████████| 233/233 [00:06<00:00, 36.31it/s]


Epoch 10
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.66it/s, running_loss=0.727]
100%|██████████| 233/233 [00:06<00:00, 36.89it/s]


Epoch 11
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.22it/s, running_loss=0.723]
100%|██████████| 233/233 [00:06<00:00, 35.72it/s]


Epoch 12
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.93it/s, running_loss=0.719]
100%|██████████| 233/233 [00:06<00:00, 36.98it/s]


Epoch 13
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.45it/s, running_loss=0.715]
100%|██████████| 233/233 [00:05<00:00, 40.24it/s]


Epoch 14
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.20it/s, running_loss=0.712]
100%|██████████| 233/233 [00:06<00:00, 35.33it/s]


Epoch 15
-------------------------------


100%|██████████| 543/543 [00:39<00:00, 13.62it/s, running_loss=0.71] 
100%|██████████| 233/233 [00:06<00:00, 34.60it/s]


Epoch 16
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 30.12it/s, running_loss=0.708]
100%|██████████| 233/233 [00:06<00:00, 35.51it/s]


Epoch 17
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.55it/s, running_loss=0.705]
100%|██████████| 233/233 [00:06<00:00, 35.82it/s]


Epoch 18
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 30.11it/s, running_loss=0.703]
100%|██████████| 233/233 [00:06<00:00, 35.63it/s]


Epoch 19
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.32it/s, running_loss=0.701]
100%|██████████| 233/233 [00:06<00:00, 36.21it/s]


Epoch 20
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.25it/s, running_loss=0.699]
100%|██████████| 233/233 [00:05<00:00, 40.64it/s]


Epoch 21
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.21it/s, running_loss=0.697]
100%|██████████| 233/233 [00:06<00:00, 35.67it/s]


Epoch 22
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.78it/s, running_loss=0.696]
100%|██████████| 233/233 [00:06<00:00, 35.48it/s]


Epoch 23
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.86it/s, running_loss=0.695]
100%|██████████| 233/233 [00:06<00:00, 36.65it/s]


Epoch 24
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.19it/s, running_loss=0.694]
100%|██████████| 233/233 [00:06<00:00, 35.43it/s]


Epoch 25
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.49it/s, running_loss=0.692]
100%|██████████| 233/233 [00:06<00:00, 35.91it/s]


Epoch 26
-------------------------------


100%|██████████| 543/543 [00:18<00:00, 29.68it/s, running_loss=0.691]
100%|██████████| 233/233 [00:05<00:00, 40.21it/s]


Epoch 27
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.43it/s, running_loss=0.69] 
100%|██████████| 233/233 [00:06<00:00, 36.25it/s]


Epoch 28
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.72it/s, running_loss=0.689]
100%|██████████| 233/233 [00:06<00:00, 35.62it/s]


Epoch 29
-------------------------------


100%|██████████| 543/543 [00:17<00:00, 30.20it/s, running_loss=0.688]
100%|██████████| 233/233 [00:06<00:00, 36.81it/s]

Done!
Best epoch: 19, val_loss: 0.686746714259012



