In [1]:
from model import util, SimpleGRU
from data import SlidingWinDataset

import torch
from torch import nn
from torch.utils.data import DataLoader

from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, root_mean_squared_error, mean_absolute_error

# Config

In [2]:
SLIDING_WIN = 10
BATCH_SIZE = 8
SHUFFLE = True

NUM_EPOCH = 100

# Data Preparation

In [3]:
train_data = pd.read_csv('../data/13series_time_stacked_cash/train.csv').drop(['日期'], axis=1).astype(float)
test_data = pd.read_csv('../data/13series_time_stacked_cash/test.csv').drop(['日期'], axis=1).astype(float)

valid_data = train_data.iloc[-20:, :]
train_data = train_data.iloc[:-20, :]

INPUT_DIM = train_data.shape[-1]

In [4]:
train_data

Unnamed: 0,尾箱余额_9012,尾箱余额_9003,尾箱余额_9049,尾箱余额_9025,尾箱余额_9053,尾箱余额_9077,尾箱余额_9207,尾箱余额_9200,尾箱余额_9164,尾箱余额_9008,尾箱余额_9039,尾箱余额_9472,尾箱余额_9490
0,600.0,270200.0,13900.0,25500.0,184300.0,90500.0,89400.0,271300.0,159800.0,173300.0,221100.0,72600.0,555800.0
1,504000.0,270200.0,456500.0,617000.0,159500.0,56500.0,42700.0,227900.0,87900.0,173300.0,227300.0,101600.0,145800.0
2,467300.0,262500.0,306500.0,565400.0,154100.0,66900.0,195900.0,136600.0,121300.0,169600.0,250300.0,75100.0,234600.0
3,382200.0,108300.0,293200.0,529500.0,259800.0,421400.0,285400.0,14300.0,96600.0,139700.0,140300.0,49000.0,300700.0
4,298700.0,428700.0,293200.0,543600.0,288300.0,432400.0,298800.0,487100.0,266400.0,194000.0,152900.0,307900.0,580300.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,395200.0,370300.0,150900.0,122700.0,356200.0,490400.0,102700.0,243300.0,153800.0,213500.0,202200.0,574300.0,102200.0
356,304200.0,363100.0,91600.0,19700.0,366600.0,600000.0,87200.0,120700.0,160800.0,213500.0,217300.0,619400.0,354100.0
357,210800.0,318900.0,91600.0,616000.0,332300.0,570700.0,45900.0,29600.0,232900.0,0.0,248200.0,655400.0,365300.0
358,101900.0,247800.0,900.0,615400.0,472300.0,553600.0,40200.0,329400.0,165100.0,99800.0,235400.0,319200.0,483800.0


In [5]:
train_set = SlidingWinDataset(train_data, SLIDING_WIN)
valid_set = SlidingWinDataset(valid_data, SLIDING_WIN)
test_set = SlidingWinDataset(test_data, SLIDING_WIN)

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
valid_loader = DataLoader(valid_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=SHUFFLE)

# Model Definition, Train, Test

In [6]:
HIDDEN_DIM = 10
model = SimpleGRU(
    input_dim=INPUT_DIM,
    hidden_dim=HIDDEN_DIM,
    output_dim=INPUT_DIM,
)
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
metrics = [mean_squared_error, root_mean_squared_error, mean_absolute_error]

  _torch_pytree._register_pytree_node(


In [9]:
@torch.no_grad()
def test() -> np.ndarray:
	results = []
	for b, (X, y) in enumerate(test_loader):
		y_pred = model(X)
		batch_results = [metrics_fn(y, y_pred) for metrics_fn in metrics]

		if loss_fn is not None:
			loss = loss_fn(y_pred, y).item()
			batch_results = [loss] + batch_results

		results.append(batch_results)
	return np.array(results).mean(axis=0)

def train(save_dir, num_epoch):
    for e in tqdm(range(num_epoch)):
        epoch_loss = []
        for b, (X, y) in enumerate(train_loader):
            y_pred = model(X, torch.zeros(1, BATCH_SIZE, HIDDEN_DIM, dtype=torch.float))
            loss = loss_fn(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss = np.array(epoch_loss).mean()
            epoch_valid_results = test(valid_loader, model, metrics, loss_fn).tolist()
            util.log_train(e, model, epoch_loss, epoch_valid_results, save_dir=save_dir)
            

In [10]:
train('../log/SimpleGRU/', num_epoch=NUM_EPOCH)

  0%|          | 0/100 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (10) must match the size of tensor b (8) at non-singleton dimension 1