In [1]:
import os
import sys
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.append(parent_dir)
import numpy as np
import tscv
from common_metrics import *
from model import Model
from preprocessor import Preprocessor



In [2]:
x = np.random.rand(10000000, 10)
y = np.random.rand(10000000)

In [3]:
class StandardScaler(Preprocessor):
    def fit(self, x, y):
        self.mean_ = np.mean(x, axis=0)
        self.std_ = np.std(x, axis=0)

    def transform(self, x, y):
        return ((x - self.mean_) / self.std_, y)

In [4]:
class CustomLinearModel(Model):
    def __init__(self, label):
        self._label = label

    @property
    def label(self) -> str:
        return self._label

    def fit(self, x, y):
        self.coef_ = np.linalg.pinv(x) @ y

    def predict(self, x):
        return x @ self.coef_

class CustomLinearModel2(Model):
    def __init__(self, label):
        self._label = label

    @property
    def label(self) -> str:
        return self._label

    def fit(self, x, y):
        self.coef_ = np.linalg.pinv(x) @ y

    def predict(self, x):
        return x @ self.coef_ + 1

In [5]:
cross_validator = tscv.TimeSeriesCrossValidator(
    x,
    y,
    train_starts=[0, 20000, 40000],
    train_size=100000,
    test_starts=[100000, 120000, 140000],
    test_size=20000,
    metrics=[MSE(), MAE()]
)

In [6]:
cross_validator.cross_validate(preprocessor=StandardScaler(), models=[CustomLinearModel("lin"), CustomLinearModel2("lin2")])

Unnamed: 0,train_start,train_end,test_start,test_end,model,MSE,MAE
0,0,99999,100000,119999,lin,0.337254,0.503726
1,0,99999,100000,119999,lin2,0.329812,0.496285
0,20000,119999,120000,139999,lin,0.33213,0.498146
1,20000,119999,120000,139999,lin2,0.335844,0.501861
0,40000,139999,140000,159999,lin,0.334175,0.500955
1,40000,139999,140000,159999,lin2,0.33227,0.499051


In [7]:
cross_validator.cross_validate(
    preprocessor=StandardScaler(), models=[CustomLinearModel("lin"), CustomLinearModel2("lin2")], parallelize="none"
)

Unnamed: 0,train_start,train_end,test_start,test_end,model,MSE,MAE
0,0,99999,100000,119999,lin,0.337254,0.503726
1,0,99999,100000,119999,lin2,0.329812,0.496285
0,20000,119999,120000,139999,lin,0.33213,0.498146
1,20000,119999,120000,139999,lin2,0.335844,0.501861
0,40000,139999,140000,159999,lin,0.334175,0.500955
1,40000,139999,140000,159999,lin2,0.33227,0.499051


In [8]:
cross_validator.cross_validate(
    preprocessor=StandardScaler(), models=[CustomLinearModel("lin"), CustomLinearModel2("lin2")], parallelize="script", max_workers=8
)

Unnamed: 0,train_start,train_end,test_start,test_end,model,MSE,MAE
0,0,99999,100000,119999,lin,0.337254,0.503726
1,0,99999,100000,119999,lin2,0.329812,0.496285
0,20000,119999,120000,139999,lin,0.33213,0.498146
1,20000,119999,120000,139999,lin2,0.335844,0.501861
0,40000,139999,140000,159999,lin,0.334175,0.500955
1,40000,139999,140000,159999,lin2,0.33227,0.499051
