In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim
import torch.nn.functional as F
from abc import abstractmethod
from collections import defaultdict
from functools import lru_cache
from itertools import count
from typing import List, Dict
from typing import Tuple, Any
from sklearn import ensemble
from sklearn.metrics import mean_squared_error
from torch.nn import MSELoss, LSTM, GRU, RNN
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
from importlib import reload
from config import Config
import config
from data import MDataset, Graph, GraphNode, load_graphs, save_dataset_pkl, load_dataset_pkl, save_scalers_pkl, load_scalers_pkl
import data
from base_module import MModule, pad_np_vectors
import base_module
from executor import single_train_loop, nested_detach
import executor
from objects import ModelType
import objects
from metric import MetricUtil
import metric
from logger import init_logging, logging
import logger
import gcn
from gcn import GCNLayer
import transformer
from transformer import TransformerModel
reload(config)
reload(data)
reload(base_module)
reload(executor)
reload(objects)
reload(metric)
reload(logger)
reload(gcn)
reload(transformer)
from config import Config
from data import MDataset, Graph, load_graphs
from base_module import MModule
from objects import ModelType
from metric import MetricUtil
from logger import init_logging
from gcn import GCNLayer
from transformer import TransformerModel
init_logging()

datasets_path: /root/guohao/repos/DLT-perf-model/datasets
configs_path: /root/guohao/repos/DLT-perf-model/notebooks/configs
datasets_path: /root/guohao/repos/DLT-perf-model/datasets
configs_path: /root/guohao/repos/DLT-perf-model/notebooks/configs


In [3]:
dataset_environment_str = "T4_CPUALL"
normalizer_cls = StandardScaler # MinMaxScaler
dummy = False
model_type = ModelType.RNN
method_prefix = "SubgraphBased"

In [4]:

eval_graphs = load_graphs(dataset_environment_str,
                            train_or_eval="train",
                            use_dummy=dummy,
                            max_row=200_000)
train_graphs = load_graphs(dataset_environment_str,
                            train_or_eval="train",
                            use_dummy=dummy,
                            max_row=1000_000)

[2023-12-12 17:26:09,506] {data.py:441} INFO - Loading graphs train
[2023-12-12 17:26:09,507] {data.py:412} INFO - Loading merged.csv
[2023-12-12 17:26:09,516] {data.py:415} INFO - Loaded merged.csv, 1000 rows
[2023-12-12 17:26:09,994] {data.py:421} INFO - Loaded rand_5000.207_7.csv, 1000 rows
[2023-12-12 17:26:09,995] {data.py:441} INFO - Loading graphs train
[2023-12-12 17:26:09,995] {data.py:412} INFO - Loading merged.csv
[2023-12-12 17:26:10,030] {data.py:415} INFO - Loaded merged.csv, 10000 rows
[2023-12-12 17:26:10,646] {data.py:421} INFO - Loaded rand_5000.207_7.csv, 1332 rows
[2023-12-12 17:26:11,271] {data.py:421} INFO - Loaded rand_1000.146_7.csv, 1366 rows
[2023-12-12 17:26:12,461] {data.py:421} INFO - Loaded rand_5500.22_7.csv, 2609 rows
[2023-12-12 17:26:13,378] {data.py:421} INFO - Loaded rand_4500.22_7.csv, 1996 rows
[2023-12-12 17:26:13,461] {data.py:421} INFO - Loaded vgg16.85_7.csv, 173 rows
[2023-12-12 17:26:13,612] {data.py:421} INFO - Loaded resnet18.138_7.csv, 315

In [104]:
train_configs = {
    ModelType.MLPTestSubgraph.name: Config.from_dict({
        "model": "MLPTestSubgraph",
        "all_seed": 42,
        "dataset_environment_str": dataset_environment_str,
        "dataset_params": {
            "duration_summed": False,
        },
        "dataset_dummy": False,
        "batch_size": 16,
        "eval_steps": 100,
        "learning_rate": 1e-3,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
    ModelType.LSTM.name: Config.from_dict({
        "model": "LSTM",
        "dataset_environment_str": dataset_environment_str,
        "meta_dataset_environment_strs": [dataset_environment_str],
        "dataset_subgraph_node_size": 10,
        "all_seed": 42,
        "dataset_params": {
            "duration_summed": False,
        },
        "model_params": {
            "num_layers": 4,
            "bidirectional": True
        },
        "dataset_dummy": True,
        "batch_size": 16,
        "eval_steps": 100,
        "learning_rate": 1e-3,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
    ModelType.RNN.name: Config.from_dict({
        "model": "RNN",
        "dataset_environment_str": dataset_environment_str,
        "meta_dataset_environment_strs": [dataset_environment_str],
        "dataset_subgraph_node_size": 10,
        "all_seed": 42,
        "dataset_params": {
            "duration_summed": False,
        },
        "model_params": {
            "num_layers": 5,
            "bidirectional": True,
            "hidden_size": 64
        },
        "dataset_dummy": True,
        "batch_size": 16,
        "eval_steps": 100,
        "learning_rate": 1e-3,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
    ModelType.GRU.name: Config.from_dict({
        "model": "GRU",
        "dataset_environment_str": dataset_environment_str,
        "meta_dataset_environment_strs": [dataset_environment_str],
        "dataset_subgraph_node_size": 10,
        "all_seed": 42,
        "dataset_params": {
            "duration_summed": False,
        },
        "model_params": {
            "num_layers": 5,
            "bidirectional": True
        },
        "dataset_dummy": True,
        "batch_size": 16,
        "eval_steps": 100,
        "learning_rate": 1e-3,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
    ModelType.GCNSubgraph.name: Config.from_dict({
        "model": "GCNGrouping",
        "dataset_environment_str": dataset_environment_str,
        "dataset_subgraph_node_size": 10,
        "all_seed": 42,
        "dataset_params": {
            "duration_summed": False,
        },
        "dataset_dummy": True,
        "batch_size": 16,
        "eval_steps": 100,
        "learning_rate": 1e-3,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
    ModelType.Transformer.name: Config.from_dict({
        "model": "Transformer",
        "dataset_environment_str": dataset_environment_str,
        "dataset_subgraph_node_size": 10,
        "all_seed": 42,
        "dataset_params": {
            "duration_summed": False,
        },
        "model_params": {
            "nlayers": 6,
            "d_hid": 64,
            "dropout": 0.0
        },
        "dataset_dummy": True,
        "batch_size": 16,
        "eval_steps": 100,
        "learning_rate": 1e-3,
        "epochs": 100,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
}

conf: Config = train_configs[model_type.name]

In [9]:
def subgraph_features(graph: Graph, subgraph_node_size: int = 10, step: int = 5, dataset_params: Dict = {}) -> \
        Tuple[List[Dict], List[Dict]]:
    subgraphs, _ = graph.subgraphs(subgraph_node_size=subgraph_node_size, step=step)
    X, Y = list(), list()

    def subgraph_feature(nodes: List[GraphNode]):
        feature_matrix = list()
        for node in nodes:
            feature = node.op.to_feature_array(
                mode=dataset_params.get("mode", "complex"))
            feature = np.array(feature)
            feature_matrix.append(feature)

        feature_matrix = pad_np_vectors(feature_matrix)
        feature_matrix = np.array(feature_matrix)

        adj_matrix = [
            [0.] * len(nodes) for _ in range(len(nodes))
        ]
        for curr_idx, node in enumerate(nodes):
            if curr_idx + 1 < len(nodes):
                adj_matrix[curr_idx][curr_idx+1] = 1.

        adj_matrix = np.array(adj_matrix)
        # x
        feature = {
            "x_graph_id": graph.ID,
            "x_node_ids": "|".join([str(node.node_id) for node in nodes]),
            "x_subgraph_feature": feature_matrix,
            "x_adj_matrix": adj_matrix
        }

        # y
        subgraph_duration = sum(node.duration + node.gap for node in subgraph)
        nodes_durations = list()
        for node in subgraph:
            node_duration_label = (
                node.duration, node.gap
            )
            nodes_durations.append(node_duration_label)

        label = {
            "y_graph_id": graph.ID,
            "y_nodes_durations": nodes_durations,
            "y_subgraph_durations": (subgraph_duration,)
        }

        return feature, label

    for i, subgraph in enumerate(subgraphs):
        x, y = subgraph_feature(subgraph)
        X.append(x)
        Y.append(y)

    return X, Y


def init_dataset(graphs: List[Graph]) -> MDataset:
    X = list()
    Y = list()

    subgraph_feature_maxsize = 0

    for graph in graphs:
        X_, Y_ = subgraph_features(graph=graph,
                                        subgraph_node_size=conf.dataset_subgraph_node_size,
                                        step=conf.dataset_subgraph_step,
                                        dataset_params=conf.dataset_params)
        for x in X_:
            subgraph_feature_size = len(x["x_subgraph_feature"][0])
            subgraph_feature_maxsize = max(subgraph_feature_maxsize, subgraph_feature_size)

        X.extend(X_)
        Y.extend(Y_)

    for x in X:
        x["x_subgraph_feature"] = pad_np_vectors(x["x_subgraph_feature"], maxsize=subgraph_feature_maxsize)

    dataset = MDataset(X, Y)
    return dataset

train_ds = init_dataset(train_graphs)
eval_ds = init_dataset(eval_graphs)

In [10]:
def get_scalers(raw_train_ds: MDataset):

    def _preprocess_required_data(ds: MDataset):
        x_subgraph_feature_array = list()
        y_nodes_durations_array = list()
        y_subgraph_durations_array = list()

        for data in ds:
            feature, label = data
            x_subgraph_feature = feature["x_subgraph_feature"]
            assert isinstance(x_subgraph_feature, list)
            x_subgraph_feature_array.extend(x_subgraph_feature)

            y_nodes_durations = label["y_nodes_durations"]
            assert isinstance(y_nodes_durations, list)
            y_nodes_durations_array.extend(y_nodes_durations)

            y_subgraph_durations = label["y_subgraph_durations"]
            y_subgraph_durations_array.append(y_subgraph_durations)

        x_subgraph_feature_array = np.array(x_subgraph_feature_array)
        y_nodes_durations_array = np.array(y_nodes_durations_array)
        y_subgraph_durations_array = np.array(y_subgraph_durations_array)
        return [x_subgraph_feature_array, y_nodes_durations_array, y_subgraph_durations_array]
    
    scaler_cls = conf.dataset_normalizer_cls

    x_subgraph_feature_array, y_nodes_durations_array, y_subgraph_durations_array = _preprocess_required_data(
        ds=raw_train_ds)

    x_subgraph_feature_scaler = scaler_cls()
    x_subgraph_feature_scaler.fit(x_subgraph_feature_array)

    y_nodes_durations_scaler = scaler_cls()
    y_nodes_durations_scaler.fit(y_nodes_durations_array)

    y_subgraph_durations_scaler = scaler_cls()
    y_subgraph_durations_scaler.fit(y_subgraph_durations_array)

    return x_subgraph_feature_scaler, y_nodes_durations_scaler, y_subgraph_durations_scaler

scalers = get_scalers(train_ds)
x_subgraph_feature_scaler, y_nodes_durations_scaler, y_subgraph_durations_scaler = scalers


In [11]:

def preprocess_dataset(ds: MDataset) -> MDataset:
    x_subgraph_feature_scaler, y_nodes_durations_scaler, y_subgraph_durations_scaler = scalers

    processed_features = list()
    processed_labels = list()

    for data in ds:
        feature, label = data
        x_subgraph_feature = feature["x_subgraph_feature"]
        assert isinstance(x_subgraph_feature, list)
        x_subgraph_feature = np.array(x_subgraph_feature).astype(np.float32)
        transformed_x_subgraph_feature = x_subgraph_feature_scaler.transform(x_subgraph_feature)

        x_adj_matrix = feature["x_adj_matrix"]
        x_adj_matrix = np.array(x_adj_matrix).astype(np.float32)

        y_nodes_durations = label["y_nodes_durations"]
        assert isinstance(y_nodes_durations, list)
        y_nodes_durations = np.array(y_nodes_durations).astype(np.float32)
        transformed_y_nodes_durations = y_nodes_durations_scaler.transform(y_nodes_durations)

        y_subgraph_durations = label["y_subgraph_durations"]
        y_subgraph_durations_array = (y_subgraph_durations,)
        y_subgraph_durations_array = y_subgraph_durations_scaler.transform(y_subgraph_durations_array)
        transformed_y_subgraph_durations = y_subgraph_durations_array[0]

        processed_features.append({
            "x_graph_id": feature["x_graph_id"],
            "x_node_ids": feature["x_node_ids"],
            "x_subgraph_feature": torch.Tensor(transformed_x_subgraph_feature),
            "x_adj_matrix": torch.Tensor(x_adj_matrix)
        })

        processed_labels.append({
            "y_graph_id": label["y_graph_id"],
            "y_nodes_durations": torch.Tensor(transformed_y_nodes_durations),
            "y_subgraph_durations": torch.Tensor(transformed_y_subgraph_durations)
        })

    ds = MDataset(processed_features, processed_labels)
    return ds


preprocessed_train_ds = preprocess_dataset(train_ds)
preprocessed_eval_ds = preprocess_dataset(eval_ds)


In [12]:

save_dataset_pkl(preprocessed_train_ds, conf.dataset_environment, method_prefix, 'train',
                         conf.dataset_normalization)
save_dataset_pkl(preprocessed_eval_ds, conf.dataset_environment, method_prefix, 'eval',
                         conf.dataset_normalization)
save_scalers_pkl(scalers, conf.dataset_environment, method_prefix, 'train',
                         conf.dataset_normalization)

In [13]:
preprocessed_train_ds = load_dataset_pkl(conf.dataset_environment, method_prefix, 'train', 
                                         conf.dataset_normalization)
preprocessed_eval_ds = load_dataset_pkl(conf.dataset_environment, method_prefix, 'eval',
                                        conf.dataset_normalization)
scalers = load_scalers_pkl(conf.dataset_environment, method_prefix, 'train',
                           conf.dataset_normalization)
x_subgraph_feature_scaler, y_nodes_durations_scaler, y_subgraph_durations_scaler = scalers

Loading dataset RTX2080Ti_CPU-1 SubgraphBased train Standard
Loading dataset RTX2080Ti_CPU-1 SubgraphBased eval Standard
Loading scalers RTX2080Ti_CPU-1 SubgraphBased train, Standard


In [14]:
def compute_evaluate_metrics(input_batches, output_batches, eval_loss) -> Dict[str, float]:
    def compute_graph_nodes_durations(outputs_, node_ids_str_):
            x_subgraph_feature_scaler, y_nodes_durations_scaler, y_subgraph_durations_scaler = scalers
            node_to_durations = defaultdict(list)
            for i, output_ in enumerate(outputs_):
                node_ids = node_ids_str_[i]
                node_ids_ = node_ids.split("|")
                assert len(output_) == len(node_ids_)
                transformed: np.ndarray = y_nodes_durations_scaler.inverse_transform(output_)
                for i, node_id in enumerate(node_ids_):
                    node_to_durations[node_id].append(np.sum(transformed[i]))
            node_to_duration = {k: np.average(v) for k, v in node_to_durations.items()}
            return node_to_duration

    graph_id_to_node_to_duration = defaultdict(lambda: defaultdict(list))
    for inputs, outputs in zip(input_batches, output_batches):
        outputs = nested_detach(outputs)
        outputs = outputs.cpu().numpy()
        graph_ids = inputs["x_graph_id"]
        graph_groups = defaultdict(list)
        for i, graph_id in enumerate(graph_ids):
            graph_groups[graph_id].append(i)

        for graph_id, indices in graph_groups.items():
            group_x_node_ids = [v for i, v in enumerate(inputs["x_node_ids"]) if i in indices]
            group_outputs = [v for i, v in enumerate(outputs) if i in indices]
            node_to_durations = compute_graph_nodes_durations(group_outputs, group_x_node_ids)
            for node, duration in node_to_durations.items():
                graph_id_to_node_to_duration[graph_id][node].append(duration)
    graph_id_to_duration_pred = dict()
    # TODO check this!!!
    for graph_id, node_to_duration in graph_id_to_node_to_duration.items():
        duration_pred = 0
        for _, duration_preds in node_to_duration.items():
            duration_pred += np.average(duration_preds)
        graph_id_to_duration_pred[graph_id] = duration_pred
    duration_metrics = MetricUtil.compute_duration_metrics(eval_graphs, graph_id_to_duration_pred)
    return {"eval_loss": eval_loss, **duration_metrics}


In [15]:

def to_device(conf: Config, features, labels):
    features['x_subgraph_feature'] = features['x_subgraph_feature'].to(conf.device)
    features['x_adj_matrix'] = features['x_adj_matrix'].to(conf.device)
    labels['y_nodes_durations'] = labels['y_nodes_durations'].to(conf.device)
    labels['y_subgraph_durations'] = labels['y_subgraph_durations'].to(conf.device)
    return features, labels

In [16]:

class MLPTest_SubgraphModel(MModule):

    def __init__(self, x_node_feature_count, x_node_feature_size, y_nodes_duration_count, y_nodes_duration_size,
                 **kwargs):
        super().__init__(**kwargs)
        self.x_node_feature_count, self.x_node_feature_size, self.y_nodes_duration_count, self.y_nodes_duration_size \
            = x_node_feature_count, x_node_feature_size, y_nodes_duration_count, y_nodes_duration_size
        self.flatten = torch.nn.Flatten()
        self.linear1 = torch.nn.Linear(in_features=self.x_node_feature_count * self.x_node_feature_size,
                                       out_features=64)
        self.relu1 = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(in_features=64,
                                       out_features=32)
        self.relu2 = torch.nn.ReLU()
        self.output = torch.nn.Linear(32, self.y_nodes_duration_count * self.y_nodes_duration_size)
        self.loss_fn = MSELoss()

    def forward(self, X):
        X = X["x_subgraph_feature"]
        X = self.flatten(X)
        X = self.linear1(X)
        X = self.relu1(X)
        X = self.linear2(X)
        X = self.relu2(X)
        Y = self.output(X)
        Y = torch.reshape(Y, (-1, self.y_nodes_duration_count, self.y_nodes_duration_size))
        return Y

    def compute_loss(self, outputs, Y):
        nodes_durations = Y["y_nodes_durations"]
        loss = self.loss_fn(outputs, nodes_durations)
        return loss

def init_MLPTestSubgraph_model() -> MModule | Any:
    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    x_node_feature_count = len(sample_x_dict["x_subgraph_feature"])
    x_node_feature_size = len(sample_x_dict["x_subgraph_feature"][0])
    y_nodes_duration_count = len(sample_y_dict["y_nodes_durations"])
    y_nodes_duration_size = len(sample_y_dict["y_nodes_durations"][0])
    return MLPTest_SubgraphModel(x_node_feature_count,
                                    x_node_feature_size,
                                    y_nodes_duration_count,
                                    y_nodes_duration_size)


In [79]:

class LSTMModel(MModule):
    def __init__(self, feature_size, nodes_durations_len, num_layers, bidirectional, **kwargs):
        super().__init__(**kwargs)
        self.lstm = LSTM(input_size=feature_size, hidden_size=feature_size, num_layers=num_layers, batch_first=True,
                         bidirectional=bidirectional)
        num_directions = 2 if bidirectional else 1
        self.project = torch.nn.Linear(in_features=feature_size * num_directions, out_features=nodes_durations_len)
        self.loss_fn = MSELoss()

    def forward(self, X):
        X = X["x_subgraph_feature"]
        print(X.shape)
        out, _ = self.lstm(X)
        
        Y = self.project(out)
        print(Y.shape)
        return Y

    def compute_loss(self, outputs, Y):
        node_durations = Y["y_nodes_durations"]
        print(node_durations.shape)
        loss = self.loss_fn(outputs, node_durations)
        return loss

def init_LSTM_model() -> MModule | Any:
    def default_model_params() -> Dict[str, Any]:
        return {
            "num_layers": 4,
            "bidirectional": True,
        }

    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    x_node_feature_size = len(sample_x_dict["x_subgraph_feature"][0])
    y_nodes_durations_len = len(sample_y_dict["y_nodes_durations"][0])
    model_params = conf.model_params
    final_params = default_model_params()
    for k, v in final_params.items():
        final_params[k] = model_params.get(k, v)
    print(final_params)
    return LSTMModel(
        feature_size=x_node_feature_size,
        nodes_durations_len=y_nodes_durations_len,
        **final_params
    )


In [18]:


class GRUModel(MModule):
    def __init__(self, feature_size, nodes_durations_len, num_layers, bidirectional, **kwargs):
        super().__init__(**kwargs)
        self.gru = GRU(input_size=feature_size, hidden_size=feature_size, num_layers=num_layers, batch_first=True,
                       bidirectional=bidirectional)
        num_directions = 2 if bidirectional else 1
        self.project = torch.nn.Linear(in_features=feature_size * num_directions, out_features=nodes_durations_len)
        self.loss_fn = MSELoss()

    def forward(self, X):
        X = X["x_subgraph_feature"]
        out, _ = self.gru(X)
        Y = self.project(out)
        return Y

    def compute_loss(self, outputs, Y):
        node_durations = Y["y_nodes_durations"]
        loss = self.loss_fn(outputs, node_durations)
        return loss


def init_GRU_model() -> MModule | Any:
    def default_model_params() -> Dict[str, Any]:
        return {
            "num_layers": 4,
            "bidirectional": True,
        }

    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    x_node_feature_size = len(sample_x_dict["x_subgraph_feature"][0])
    y_nodes_durations_len = len(sample_y_dict["y_nodes_durations"][0])
    model_params = conf.model_params
    final_params = default_model_params()
    for k, v in final_params.items():
        final_params[k] = model_params.get(k, v)
    return GRUModel(
        feature_size=x_node_feature_size,
        nodes_durations_len=y_nodes_durations_len,
        **final_params
    )


In [19]:


class GCNSubgraphModel(MModule):
    def __init__(self, dim_feats, dim_h, dim_out, n_layers, dropout):
        super(GCNSubgraphModel, self).__init__()
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GCNLayer(dim_feats, dim_h, F.relu, 0))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GCNLayer(dim_h, dim_h, F.relu, dropout))
        # output layer
        self.layers.append(GCNLayer(dim_h, dim_out, None, dropout))
        self.loss_fn = MSELoss()

    def forward(self, X):
        adj, features = X["x_adj_matrix"], X["x_subgraph_feature"]
        h = features
        for layer in self.layers:
            h = layer(adj, h)
        return h

    def compute_loss(self, outputs, Y) -> torch.Tensor:
        y_nodes_durations = Y["y_nodes_durations"]
        loss = self.loss_fn(outputs, y_nodes_durations)
        return loss


def init_GCNSubgraph_model() -> MModule | Any:
    def default_model_params() -> Dict[str, Any]:
        return {
            "dim_h": None,
            "n_layers": 2,
            "dropout": 0.1,
        }
    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    x_node_feature_size = len(sample_x_dict["x_subgraph_feature"][0])
    y_nodes_durations_len = len(sample_y_dict["y_nodes_durations"][0])
    model_params = conf.model_params
    final_params = default_model_params()
    for k, v in final_params.items():
        final_params[k] = model_params.get(k, v)
    if final_params["dim_h"] is None:
        final_params["dim_h"] = x_node_feature_size
    return GCNSubgraphModel(
        dim_feats=x_node_feature_size,
        dim_out=y_nodes_durations_len,
        **final_params
    )

In [None]:
def init_Transformer_model() -> MModule | Any:
    def default_model_params() -> Dict[str, Any]:
        nhead: int = 8
        d_hid: int = 512
        nlayers: int = 6
        dropout: float = 0.5
        return {
            "nhead": nhead,
            "d_hid": d_hid,
            "nlayers": nlayers,
            "dropout": dropout
        }
    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    x_node_feature_size = len(sample_x_dict["x_subgraph_feature"][0])
    nodes_durations_len = len(sample_y_dict["y_nodes_durations"][0])
    model_params = conf.model_params
    final_params = default_model_params()
    for k, v in final_params.items():
        final_params[k] = model_params.get(k, v)

    nhead = final_params["nhead"]
    while x_node_feature_size % nhead != 0:
        nhead -= 1
    if nhead != final_params["nhead"]:
        final_params["nhead"] = nhead
        logging.info(f"Transformer nhead set to {nhead}.")
        conf.model_params["nhead"] = nhead

    return TransformerModel(
        d_model=x_node_feature_size,
        output_d=nodes_durations_len,
        **final_params
    )

In [105]:
class RNNModel(MModule):
    def __init__(self, feature_size, nodes_durations_len,hidden_size, num_layers, bidirectional, **kwargs):
        print(f'feature_size: {feature_size}, nodes_durations_len: {nodes_durations_len}, num_layers: {num_layers}, bidirectional: {bidirectional}')
        super().__init__(**kwargs)
        self.hidden_size = hidden_size
        self.input_size = feature_size
        self.num_layers = num_layers
        self.num_directions = 2 if bidirectional else 1
        self.node_durations_len = 2
        self.rnn = RNN(input_size=feature_size, 
                       hidden_size=hidden_size, 
                       num_layers=num_layers,
                          batch_first=True, 
                       bidirectional=bidirectional)
        
        
        self.project = torch.nn.Linear(in_features=self.hidden_size * self.num_directions, out_features=nodes_durations_len)
        
        self.loss_fn = MSELoss()

    def forward(self, X):
        X = X["x_subgraph_feature"] # (batch_size, subgraph_size(seq_len), input_size)
        batch_size = X.size(0)
        hidden = self.init_hidden(batch_size).cuda()
        out, _ = self.rnn(X, hidden) #(seq_len, batchsize, hidden_size)
        Y = self.project(out) # 16, 10, 2
        return Y

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size)
        return hidden

    def compute_loss(self, outputs, Y):
        node_durations = Y["y_nodes_durations"]
        loss = self.loss_fn(outputs, node_durations)
        return loss

def init_RNN_model() -> MModule | Any:
    def default_model_params() -> Dict[str, Any]:
        return {
            "num_layers": 4,
            "hidden_size": 64,
            "bidirectional": True,
        }

    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    x_node_feature_size = len(sample_x_dict["x_subgraph_feature"][0])
    y_nodes_durations_len = len(sample_y_dict["y_nodes_durations"][0])
    model_params = conf.model_params
    final_params = default_model_params()
    for k, v in final_params.items():
        final_params[k] = model_params.get(k, v)
    print(final_params)
    return RNNModel(
        feature_size=x_node_feature_size,
        nodes_durations_len=y_nodes_durations_len,
        **final_params
    )


In [106]:
init_model_funcs = {
    ModelType.Transformer.name: init_Transformer_model,
    ModelType.GCNSubgraph.name: init_GCNSubgraph_model,
    ModelType.GRU.name: init_GRU_model,
    ModelType.LSTM.name: init_LSTM_model,
    ModelType.MLPTestSubgraph.name: init_MLPTestSubgraph_model,
    ModelType.RNN.name: init_RNN_model,
}

# model_type = ModelType.MLPTestSubgraph
# model_type = ModelType.LSTM
# model_type = ModelType.GRU
model_type = ModelType.RNN
init_model = init_model_funcs[model_type.name]

model = init_model()
model = model.to(conf.device)




{'num_layers': 4, 'hidden_size': 64, 'bidirectional': True}
feature_size: 66, nodes_durations_len: 2, num_layers: 4, bidirectional: True


In [107]:
single_train_loop(model_type, conf, preprocessed_train_ds, preprocessed_eval_ds, model, compute_evaluate_metrics, to_device)

[2023-12-12 19:24:06,781] {executor.py:120} INFO - ModelType.RNN start single training.
[2023-12-12 19:24:06,782] {executor.py:122} INFO - ModelType.RNN training epoch 0


  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:06,792] {executor.py:139} INFO - ModelType.RNN trained for 0.010835199 seconds.
[2023-12-12 19:24:06,793] {executor.py:140} INFO - ModelType.RNN eval at step 0.
[2023-12-12 19:24:06,849] {executor.py:144} INFO - ModelType.RNN train loss: 1.036185622215271, eval metrics: {'eval_loss': 1.1142544792248652, 'MRE': 0.04312431567689222, 'MAE': 0.04312431567689222, 'RMSE': 32.93178508447306}
[2023-12-12 19:24:06,849] {executor.py:174} INFO - Saving model at step 0 with loss 1.036185622215271,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 66%|██████▌   | 82/125 [00:00<00:00, 178.45it/s]

[2023-12-12 19:24:07,393] {executor.py:139} INFO - ModelType.RNN trained for 0.612119896 seconds.
[2023-12-12 19:24:07,394] {executor.py:140} INFO - ModelType.RNN eval at step 100.
[2023-12-12 19:24:07,447] {executor.py:144} INFO - ModelType.RNN train loss: 0.5352503657341003, eval metrics: {'eval_loss': 0.41186931578872293, 'MRE': 0.5172251258817492, 'MAE': 0.5172251258817492, 'RMSE': 394.9777850029598}
[2023-12-12 19:24:07,448] {executor.py:174} INFO - Saving model at step 100 with loss 0.5352503657341003,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 158.71it/s]

[2023-12-12 19:24:07,571] {executor.py:122} INFO - ModelType.RNN training epoch 1



 49%|████▉     | 61/125 [00:00<00:00, 199.47it/s]

[2023-12-12 19:24:07,954] {executor.py:139} INFO - ModelType.RNN trained for 1.17294905 seconds.
[2023-12-12 19:24:07,955] {executor.py:140} INFO - ModelType.RNN eval at step 200.
[2023-12-12 19:24:08,004] {executor.py:144} INFO - ModelType.RNN train loss: 0.07050430029630661, eval metrics: {'eval_loss': 0.3019494588773411, 'MRE': 0.0817657965586424, 'MAE': 0.0817657965586424, 'RMSE': 62.44026362539637}
[2023-12-12 19:24:08,004] {executor.py:174} INFO - Saving model at step 200 with loss 0.07050430029630661,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 186.20it/s]

[2023-12-12 19:24:08,245] {executor.py:122} INFO - ModelType.RNN training epoch 2



 35%|███▌      | 44/125 [00:00<00:00, 214.04it/s]

[2023-12-12 19:24:08,488] {executor.py:139} INFO - ModelType.RNN trained for 1.706553813 seconds.
[2023-12-12 19:24:08,488] {executor.py:140} INFO - ModelType.RNN eval at step 300.
[2023-12-12 19:24:08,536] {executor.py:144} INFO - ModelType.RNN train loss: 0.06021766737103462, eval metrics: {'eval_loss': 0.30464235965449077, 'MRE': 0.38918156485767064, 'MAE': 0.38918156485767064, 'RMSE': 297.19761233450276}
[2023-12-12 19:24:08,536] {executor.py:174} INFO - Saving model at step 300 with loss 0.06021766737103462,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 194.09it/s]

[2023-12-12 19:24:08,891] {executor.py:122} INFO - ModelType.RNN training epoch 3



 18%|█▊        | 22/125 [00:00<00:00, 216.04it/s]

[2023-12-12 19:24:09,014] {executor.py:139} INFO - ModelType.RNN trained for 2.23264166 seconds.
[2023-12-12 19:24:09,014] {executor.py:140} INFO - ModelType.RNN eval at step 400.
[2023-12-12 19:24:09,061] {executor.py:144} INFO - ModelType.RNN train loss: 0.0853407010436058, eval metrics: {'eval_loss': 0.20319747401831242, 'MRE': 0.1573342810993697, 'MAE': 0.1573342810993697, 'RMSE': 120.147964095367}
[2023-12-12 19:24:09,062] {executor.py:174} INFO - Saving model at step 400 with loss 0.0853407010436058,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 193.79it/s]

[2023-12-12 19:24:09,538] {executor.py:122} INFO - ModelType.RNN training epoch 4



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:09,544] {executor.py:139} INFO - ModelType.RNN trained for 2.762686699 seconds.
[2023-12-12 19:24:09,544] {executor.py:140} INFO - ModelType.RNN eval at step 500.
[2023-12-12 19:24:09,592] {executor.py:144} INFO - ModelType.RNN train loss: 0.033612411469221115, eval metrics: {'eval_loss': 0.2249501829680342, 'MRE': 0.8035899989889728, 'MAE': 0.8035899989889728, 'RMSE': 613.659665721191}
[2023-12-12 19:24:09,592] {executor.py:174} INFO - Saving model at step 500 with loss 0.033612411469221115,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 77%|███████▋  | 96/125 [00:00<00:00, 202.06it/s]

[2023-12-12 19:24:10,073] {executor.py:139} INFO - ModelType.RNN trained for 3.291465738 seconds.
[2023-12-12 19:24:10,074] {executor.py:140} INFO - ModelType.RNN eval at step 600.
[2023-12-12 19:24:10,122] {executor.py:144} INFO - ModelType.RNN train loss: 0.26614612340927124, eval metrics: {'eval_loss': 0.10439695550415379, 'MRE': 0.071771933392316, 'MAE': 0.071771933392316, 'RMSE': 54.80847286440235}
[2023-12-12 19:24:10,123] {executor.py:174} INFO - Saving model at step 600 with loss 0.26614612340927124,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 178.42it/s]

[2023-12-12 19:24:10,240] {executor.py:122} INFO - ModelType.RNN training epoch 5



 53%|█████▎    | 66/125 [00:00<00:00, 207.43it/s]

[2023-12-12 19:24:10,610] {executor.py:139} INFO - ModelType.RNN trained for 3.828741709 seconds.
[2023-12-12 19:24:10,611] {executor.py:140} INFO - ModelType.RNN eval at step 700.
[2023-12-12 19:24:10,661] {executor.py:144} INFO - ModelType.RNN train loss: 0.0743778869509697, eval metrics: {'eval_loss': 0.08155186442085184, 'MRE': 0.15699012576276747, 'MAE': 0.15699012576276747, 'RMSE': 119.88515065930983}
[2023-12-12 19:24:10,661] {executor.py:174} INFO - Saving model at step 700 with loss 0.0743778869509697,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 188.42it/s]

[2023-12-12 19:24:10,905] {executor.py:122} INFO - ModelType.RNN training epoch 6



 34%|███▎      | 42/125 [00:00<00:00, 202.37it/s]

[2023-12-12 19:24:11,157] {executor.py:139} INFO - ModelType.RNN trained for 4.375301337 seconds.
[2023-12-12 19:24:11,157] {executor.py:140} INFO - ModelType.RNN eval at step 800.
[2023-12-12 19:24:11,205] {executor.py:144} INFO - ModelType.RNN train loss: 0.05041998624801636, eval metrics: {'eval_loss': 0.07508087172531165, 'MRE': 0.33073670570461955, 'MAE': 0.33073670570461955, 'RMSE': 252.56632924722328}
[2023-12-12 19:24:11,205] {executor.py:174} INFO - Saving model at step 800 with loss 0.05041998624801636,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 188.86it/s]

[2023-12-12 19:24:11,569] {executor.py:122} INFO - ModelType.RNN training epoch 7



 17%|█▋        | 21/125 [00:00<00:00, 203.48it/s]

[2023-12-12 19:24:11,699] {executor.py:139} INFO - ModelType.RNN trained for 4.917285409 seconds.
[2023-12-12 19:24:11,699] {executor.py:140} INFO - ModelType.RNN eval at step 900.
[2023-12-12 19:24:11,748] {executor.py:144} INFO - ModelType.RNN train loss: 0.20802102982997894, eval metrics: {'eval_loss': 0.06245760876649561, 'MRE': 0.08902172637340035, 'MAE': 0.08902172637340035, 'RMSE': 67.98123784138068}
[2023-12-12 19:24:11,749] {executor.py:174} INFO - Saving model at step 900 with loss 0.20802102982997894,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 190.88it/s]

[2023-12-12 19:24:12,226] {executor.py:122} INFO - ModelType.RNN training epoch 8



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:12,232] {executor.py:139} INFO - ModelType.RNN trained for 5.451150468 seconds.
[2023-12-12 19:24:12,233] {executor.py:140} INFO - ModelType.RNN eval at step 1000.
[2023-12-12 19:24:12,281] {executor.py:144} INFO - ModelType.RNN train loss: 0.8168876767158508, eval metrics: {'eval_loss': 0.11250424925954296, 'MRE': 0.2153220562866353, 'MAE': 0.2153220562866353, 'RMSE': 164.43019605706831}
[2023-12-12 19:24:12,282] {executor.py:174} INFO - Saving model at step 1000 with loss 0.8168876767158508,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 77%|███████▋  | 96/125 [00:00<00:00, 159.16it/s]

[2023-12-12 19:24:12,879] {executor.py:139} INFO - ModelType.RNN trained for 6.097295625 seconds.
[2023-12-12 19:24:12,879] {executor.py:140} INFO - ModelType.RNN eval at step 1100.
[2023-12-12 19:24:12,935] {executor.py:144} INFO - ModelType.RNN train loss: 0.2959248721599579, eval metrics: {'eval_loss': 0.06888313115072939, 'MRE': 0.14696954281442046, 'MAE': 0.14696954281442046, 'RMSE': 112.23295539785727}
[2023-12-12 19:24:12,935] {executor.py:174} INFO - Saving model at step 1100 with loss 0.2959248721599579,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 143.26it/s]

[2023-12-12 19:24:13,101] {executor.py:122} INFO - ModelType.RNN training epoch 9



 51%|█████     | 64/125 [00:00<00:00, 152.23it/s]

[2023-12-12 19:24:13,600] {executor.py:139} INFO - ModelType.RNN trained for 6.818387331 seconds.
[2023-12-12 19:24:13,600] {executor.py:140} INFO - ModelType.RNN eval at step 1200.
[2023-12-12 19:24:13,657] {executor.py:144} INFO - ModelType.RNN train loss: 0.020897744223475456, eval metrics: {'eval_loss': 0.040518272849462494, 'MRE': 0.08320308473160291, 'MAE': 0.08320308473160291, 'RMSE': 63.53784545303688}
[2023-12-12 19:24:13,658] {executor.py:174} INFO - Saving model at step 1200 with loss 0.020897744223475456,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 142.76it/s]

[2023-12-12 19:24:13,978] {executor.py:122} INFO - ModelType.RNN training epoch 10



 27%|██▋       | 34/125 [00:00<00:00, 162.35it/s]

[2023-12-12 19:24:14,295] {executor.py:139} INFO - ModelType.RNN trained for 7.513692511 seconds.
[2023-12-12 19:24:14,296] {executor.py:140} INFO - ModelType.RNN eval at step 1300.
[2023-12-12 19:24:14,350] {executor.py:144} INFO - ModelType.RNN train loss: 0.1890338510274887, eval metrics: {'eval_loss': 0.06181566931683427, 'MRE': 0.12023127469803269, 'MAE': 0.12023127469803269, 'RMSE': 91.81433807445865}
[2023-12-12 19:24:14,351] {executor.py:174} INFO - Saving model at step 1300 with loss 0.1890338510274887,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 146.52it/s]

[2023-12-12 19:24:14,833] {executor.py:122} INFO - ModelType.RNN training epoch 11



 13%|█▎        | 16/125 [00:00<00:00, 153.02it/s]

[2023-12-12 19:24:15,006] {executor.py:139} INFO - ModelType.RNN trained for 8.224962448 seconds.
[2023-12-12 19:24:15,007] {executor.py:140} INFO - ModelType.RNN eval at step 1400.
[2023-12-12 19:24:15,066] {executor.py:144} INFO - ModelType.RNN train loss: 0.026102453470230103, eval metrics: {'eval_loss': 0.034427232968692593, 'MRE': 0.011578715149579364, 'MAE': 0.011578715149579364, 'RMSE': 8.842059355033484}
[2023-12-12 19:24:15,066] {executor.py:174} INFO - Saving model at step 1400 with loss 0.026102453470230103,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 140.11it/s]

[2023-12-12 19:24:15,727] {executor.py:122} INFO - ModelType.RNN training epoch 12



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:15,735] {executor.py:139} INFO - ModelType.RNN trained for 8.954039239 seconds.
[2023-12-12 19:24:15,736] {executor.py:140} INFO - ModelType.RNN eval at step 1500.
[2023-12-12 19:24:15,793] {executor.py:144} INFO - ModelType.RNN train loss: 0.0211455337703228, eval metrics: {'eval_loss': 0.03839584345965145, 'MRE': 0.08451720583192675, 'MAE': 0.08451720583192675, 'RMSE': 64.54137102721825}
[2023-12-12 19:24:15,794] {executor.py:174} INFO - Saving model at step 1500 with loss 0.0211455337703228,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 69%|██████▉   | 86/125 [00:00<00:00, 148.55it/s]

[2023-12-12 19:24:16,446] {executor.py:139} INFO - ModelType.RNN trained for 9.664712415 seconds.
[2023-12-12 19:24:16,447] {executor.py:140} INFO - ModelType.RNN eval at step 1600.
[2023-12-12 19:24:16,507] {executor.py:144} INFO - ModelType.RNN train loss: 0.05460864305496216, eval metrics: {'eval_loss': 0.043199134135368064, 'MRE': 0.13967508069791945, 'MAE': 0.13967508069791945, 'RMSE': 106.66255607773167}
[2023-12-12 19:24:16,507] {executor.py:174} INFO - Saving model at step 1600 with loss 0.05460864305496216,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 132.94it/s]

[2023-12-12 19:24:16,669] {executor.py:122} INFO - ModelType.RNN training epoch 13



 50%|█████     | 63/125 [00:00<00:00, 151.37it/s]

[2023-12-12 19:24:17,175] {executor.py:139} INFO - ModelType.RNN trained for 10.393971544 seconds.
[2023-12-12 19:24:17,176] {executor.py:140} INFO - ModelType.RNN eval at step 1700.
[2023-12-12 19:24:17,235] {executor.py:144} INFO - ModelType.RNN train loss: 0.021274134516716003, eval metrics: {'eval_loss': 0.03591581900568249, 'MRE': 0.006217834698872427, 'MAE': 0.006217834698872427, 'RMSE': 4.74823525382385}
[2023-12-12 19:24:17,236] {executor.py:174} INFO - Saving model at step 1700 with loss 0.021274134516716003,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 139.29it/s]

[2023-12-12 19:24:17,569] {executor.py:122} INFO - ModelType.RNN training epoch 14



 38%|███▊      | 47/125 [00:00<00:00, 152.17it/s]

[2023-12-12 19:24:17,907] {executor.py:139} INFO - ModelType.RNN trained for 11.125283818 seconds.
[2023-12-12 19:24:17,907] {executor.py:140} INFO - ModelType.RNN eval at step 1800.
[2023-12-12 19:24:17,965] {executor.py:144} INFO - ModelType.RNN train loss: 0.022166883572936058, eval metrics: {'eval_loss': 0.03082712910747012, 'MRE': 0.1569952697362039, 'MAE': 0.1569952697362039, 'RMSE': 119.88907884286539}
[2023-12-12 19:24:17,965] {executor.py:174} INFO - Saving model at step 1800 with loss 0.022166883572936058,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 140.67it/s]

[2023-12-12 19:24:18,460] {executor.py:122} INFO - ModelType.RNN training epoch 15



 12%|█▏        | 15/125 [00:00<00:00, 147.95it/s]

[2023-12-12 19:24:18,635] {executor.py:139} INFO - ModelType.RNN trained for 11.853411036 seconds.
[2023-12-12 19:24:18,635] {executor.py:140} INFO - ModelType.RNN eval at step 1900.
[2023-12-12 19:24:18,691] {executor.py:144} INFO - ModelType.RNN train loss: 0.04308407008647919, eval metrics: {'eval_loss': 0.02956734667532146, 'MRE': 0.16103565828393002, 'MAE': 0.16103565828393002, 'RMSE': 122.97451232100832}
[2023-12-12 19:24:18,692] {executor.py:174} INFO - Saving model at step 1900 with loss 0.04308407008647919,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 139.50it/s]

[2023-12-12 19:24:19,358] {executor.py:122} INFO - ModelType.RNN training epoch 16



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:19,366] {executor.py:139} INFO - ModelType.RNN trained for 12.5848412 seconds.
[2023-12-12 19:24:19,367] {executor.py:140} INFO - ModelType.RNN eval at step 2000.
[2023-12-12 19:24:19,424] {executor.py:144} INFO - ModelType.RNN train loss: 0.008883873000741005, eval metrics: {'eval_loss': 0.016336176929493938, 'MRE': 0.07486201405874608, 'MAE': 0.07486201405874608, 'RMSE': 57.1682059013973}
[2023-12-12 19:24:19,425] {executor.py:174} INFO - Saving model at step 2000 with loss 0.008883873000741005,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 80%|████████  | 100/125 [00:00<00:00, 150.54it/s]

[2023-12-12 19:24:20,086] {executor.py:139} INFO - ModelType.RNN trained for 13.304869245 seconds.
[2023-12-12 19:24:20,087] {executor.py:140} INFO - ModelType.RNN eval at step 2100.
[2023-12-12 19:24:20,143] {executor.py:144} INFO - ModelType.RNN train loss: 0.025785434991121292, eval metrics: {'eval_loss': 0.01697036365602309, 'MRE': 0.006296937986624426, 'MAE': 0.006296937986624426, 'RMSE': 4.808642298685413}
[2023-12-12 19:24:20,144] {executor.py:174} INFO - Saving model at step 2100 with loss 0.025785434991121292,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 131.92it/s]

[2023-12-12 19:24:20,307] {executor.py:122} INFO - ModelType.RNN training epoch 17



 51%|█████     | 64/125 [00:00<00:00, 153.88it/s]

[2023-12-12 19:24:20,803] {executor.py:139} INFO - ModelType.RNN trained for 14.021793543 seconds.
[2023-12-12 19:24:20,804] {executor.py:140} INFO - ModelType.RNN eval at step 2200.
[2023-12-12 19:24:20,862] {executor.py:144} INFO - ModelType.RNN train loss: 0.023415489122271538, eval metrics: {'eval_loss': 0.01644950877660169, 'MRE': 0.012081716899612632, 'MAE': 0.012081716899612632, 'RMSE': 9.2261754915844}
[2023-12-12 19:24:20,862] {executor.py:174} INFO - Saving model at step 2200 with loss 0.023415489122271538,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 141.75it/s]

[2023-12-12 19:24:21,191] {executor.py:122} INFO - ModelType.RNN training epoch 18



 38%|███▊      | 48/125 [00:00<00:00, 151.20it/s]

[2023-12-12 19:24:21,530] {executor.py:139} INFO - ModelType.RNN trained for 14.748327997 seconds.
[2023-12-12 19:24:21,530] {executor.py:140} INFO - ModelType.RNN eval at step 2300.
[2023-12-12 19:24:21,588] {executor.py:144} INFO - ModelType.RNN train loss: 0.038816776126623154, eval metrics: {'eval_loss': 0.020110181886523675, 'MRE': 0.011691744837691144, 'MAE': 0.011691744837691144, 'RMSE': 8.928374217974124}
[2023-12-12 19:24:21,588] {executor.py:174} INFO - Saving model at step 2300 with loss 0.038816776126623154,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 141.47it/s]

[2023-12-12 19:24:22,077] {executor.py:122} INFO - ModelType.RNN training epoch 19



 13%|█▎        | 16/125 [00:00<00:00, 153.93it/s]

[2023-12-12 19:24:22,246] {executor.py:139} INFO - ModelType.RNN trained for 15.465103344 seconds.
[2023-12-12 19:24:22,247] {executor.py:140} INFO - ModelType.RNN eval at step 2400.
[2023-12-12 19:24:22,306] {executor.py:144} INFO - ModelType.RNN train loss: 0.018490487709641457, eval metrics: {'eval_loss': 0.018986223328213852, 'MRE': 0.10903999687621055, 'MAE': 0.10903999687621055, 'RMSE': 83.26814434908522}
[2023-12-12 19:24:22,307] {executor.py:174} INFO - Saving model at step 2400 with loss 0.018490487709641457,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 139.53it/s]

[2023-12-12 19:24:22,975] {executor.py:122} INFO - ModelType.RNN training epoch 20



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:22,983] {executor.py:139} INFO - ModelType.RNN trained for 16.201762586 seconds.
[2023-12-12 19:24:22,984] {executor.py:140} INFO - ModelType.RNN eval at step 2500.
[2023-12-12 19:24:23,043] {executor.py:144} INFO - ModelType.RNN train loss: 0.01641263999044895, eval metrics: {'eval_loss': 0.04783163617293422, 'MRE': 0.4067792516822489, 'MAE': 0.4067792516822489, 'RMSE': 310.6360456497799}
[2023-12-12 19:24:23,043] {executor.py:174} INFO - Saving model at step 2500 with loss 0.01641263999044895,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 68%|██████▊   | 85/125 [00:00<00:00, 150.60it/s]

[2023-12-12 19:24:23,693] {executor.py:139} INFO - ModelType.RNN trained for 16.911323707 seconds.
[2023-12-12 19:24:23,693] {executor.py:140} INFO - ModelType.RNN eval at step 2600.
[2023-12-12 19:24:23,753] {executor.py:144} INFO - ModelType.RNN train loss: 0.0291918758302927, eval metrics: {'eval_loss': 0.05676813963621568, 'MRE': 0.16720021818974304, 'MAE': 0.16720021818974304, 'RMSE': 127.68206440089841}
[2023-12-12 19:24:23,753] {executor.py:174} INFO - Saving model at step 2600 with loss 0.0291918758302927,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 132.32it/s]

[2023-12-12 19:24:23,921] {executor.py:122} INFO - ModelType.RNN training epoch 21



 54%|█████▎    | 67/125 [00:00<00:00, 164.24it/s]

[2023-12-12 19:24:24,394] {executor.py:139} INFO - ModelType.RNN trained for 17.612744159 seconds.
[2023-12-12 19:24:24,395] {executor.py:140} INFO - ModelType.RNN eval at step 2700.
[2023-12-12 19:24:24,444] {executor.py:144} INFO - ModelType.RNN train loss: 0.010355751030147076, eval metrics: {'eval_loss': 0.024351002084306225, 'MRE': 0.10447791677569629, 'MAE': 0.10447791677569629, 'RMSE': 79.78432230924261}
[2023-12-12 19:24:24,445] {executor.py:174} INFO - Saving model at step 2700 with loss 0.010355751030147076,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 158.50it/s]

[2023-12-12 19:24:24,712] {executor.py:122} INFO - ModelType.RNN training epoch 22



 30%|███       | 38/125 [00:00<00:00, 182.81it/s]

[2023-12-12 19:24:24,991] {executor.py:139} INFO - ModelType.RNN trained for 18.209288636 seconds.
[2023-12-12 19:24:24,991] {executor.py:140} INFO - ModelType.RNN eval at step 2800.
[2023-12-12 19:24:25,044] {executor.py:144} INFO - ModelType.RNN train loss: 0.041051752865314484, eval metrics: {'eval_loss': 0.02674970326300424, 'MRE': 0.0036526414075098643, 'MAE': 0.0036526414075098643, 'RMSE': 2.789331260906579}
[2023-12-12 19:24:25,045] {executor.py:174} INFO - Saving model at step 2800 with loss 0.041051752865314484,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 172.92it/s]

[2023-12-12 19:24:25,437] {executor.py:122} INFO - ModelType.RNN training epoch 23



 16%|█▌        | 20/125 [00:00<00:00, 199.56it/s]

[2023-12-12 19:24:25,570] {executor.py:139} INFO - ModelType.RNN trained for 18.78881708 seconds.
[2023-12-12 19:24:25,571] {executor.py:140} INFO - ModelType.RNN eval at step 2900.
[2023-12-12 19:24:25,619] {executor.py:144} INFO - ModelType.RNN train loss: 0.009934485889971256, eval metrics: {'eval_loss': 0.028147587588486764, 'MRE': 0.13228086589277951, 'MAE': 0.13228086589277951, 'RMSE': 101.01598084496152}
[2023-12-12 19:24:25,620] {executor.py:174} INFO - Saving model at step 2900 with loss 0.009934485889971256,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 178.07it/s]

[2023-12-12 19:24:26,141] {executor.py:122} INFO - ModelType.RNN training epoch 24



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:26,147] {executor.py:139} INFO - ModelType.RNN trained for 19.366092327 seconds.
[2023-12-12 19:24:26,148] {executor.py:140} INFO - ModelType.RNN eval at step 3000.
[2023-12-12 19:24:26,196] {executor.py:144} INFO - ModelType.RNN train loss: 0.07651960104703903, eval metrics: {'eval_loss': 0.16448564361780882, 'MRE': 1.7088306750404652, 'MAE': 1.7088306750404652, 'RMSE': 1304.9446386077275}
[2023-12-12 19:24:26,196] {executor.py:174} INFO - Saving model at step 3000 with loss 0.07651960104703903,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 71%|███████   | 89/125 [00:00<00:00, 184.45it/s]

[2023-12-12 19:24:26,719] {executor.py:139} INFO - ModelType.RNN trained for 19.937859839 seconds.
[2023-12-12 19:24:26,720] {executor.py:140} INFO - ModelType.RNN eval at step 3100.
[2023-12-12 19:24:26,770] {executor.py:144} INFO - ModelType.RNN train loss: 0.062213219702243805, eval metrics: {'eval_loss': 0.34206763103317755, 'MRE': 0.3252637288810934, 'MAE': 0.3252637288810934, 'RMSE': 248.3869030071627}
[2023-12-12 19:24:26,771] {executor.py:174} INFO - Saving model at step 3100 with loss 0.062213219702243805,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 164.72it/s]

[2023-12-12 19:24:26,901] {executor.py:122} INFO - ModelType.RNN training epoch 25



 48%|████▊     | 60/125 [00:00<00:00, 193.20it/s]

[2023-12-12 19:24:27,296] {executor.py:139} INFO - ModelType.RNN trained for 20.514210626 seconds.
[2023-12-12 19:24:27,296] {executor.py:140} INFO - ModelType.RNN eval at step 3200.
[2023-12-12 19:24:27,347] {executor.py:144} INFO - ModelType.RNN train loss: 0.025860777124762535, eval metrics: {'eval_loss': 0.16620173907274596, 'MRE': 0.23011571200594366, 'MAE': 0.23011571200594366, 'RMSE': 175.72733742882122}
[2023-12-12 19:24:27,348] {executor.py:174} INFO - Saving model at step 3200 with loss 0.025860777124762535,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 174.54it/s]

[2023-12-12 19:24:27,619] {executor.py:122} INFO - ModelType.RNN training epoch 26



 30%|███       | 38/125 [00:00<00:00, 180.16it/s]

[2023-12-12 19:24:27,903] {executor.py:139} INFO - ModelType.RNN trained for 21.121690609 seconds.
[2023-12-12 19:24:27,904] {executor.py:140} INFO - ModelType.RNN eval at step 3300.
[2023-12-12 19:24:27,954] {executor.py:144} INFO - ModelType.RNN train loss: 0.030057135969400406, eval metrics: {'eval_loss': 0.02979187298087009, 'MRE': 0.13548944965843562, 'MAE': 0.13548944965843562, 'RMSE': 103.46620850278248}
[2023-12-12 19:24:27,955] {executor.py:174} INFO - Saving model at step 3300 with loss 0.030057135969400406,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 171.41it/s]

[2023-12-12 19:24:28,350] {executor.py:122} INFO - ModelType.RNN training epoch 27



 15%|█▌        | 19/125 [00:00<00:00, 189.71it/s]

[2023-12-12 19:24:28,488] {executor.py:139} INFO - ModelType.RNN trained for 21.707143912 seconds.
[2023-12-12 19:24:28,489] {executor.py:140} INFO - ModelType.RNN eval at step 3400.
[2023-12-12 19:24:28,541] {executor.py:144} INFO - ModelType.RNN train loss: 0.01837744377553463, eval metrics: {'eval_loss': 0.023991533182337522, 'MRE': 0.10942549034915176, 'MAE': 0.10942549034915176, 'RMSE': 83.56252555845867}
[2023-12-12 19:24:28,541] {executor.py:174} INFO - Saving model at step 3400 with loss 0.01837744377553463,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 174.02it/s]

[2023-12-12 19:24:29,071] {executor.py:122} INFO - ModelType.RNN training epoch 28



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:29,078] {executor.py:139} INFO - ModelType.RNN trained for 22.296731063 seconds.
[2023-12-12 19:24:29,078] {executor.py:140} INFO - ModelType.RNN eval at step 3500.
[2023-12-12 19:24:29,129] {executor.py:144} INFO - ModelType.RNN train loss: 0.009653052315115929, eval metrics: {'eval_loss': 0.012837852080477975, 'MRE': 0.07580507332756245, 'MAE': 0.07580507332756245, 'RMSE': 57.888370956195445}
[2023-12-12 19:24:29,129] {executor.py:174} INFO - Saving model at step 3500 with loss 0.009653052315115929,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 69%|██████▉   | 86/125 [00:00<00:00, 179.81it/s]

[2023-12-12 19:24:29,665] {executor.py:139} INFO - ModelType.RNN trained for 22.883368511 seconds.
[2023-12-12 19:24:29,665] {executor.py:140} INFO - ModelType.RNN eval at step 3600.
[2023-12-12 19:24:29,715] {executor.py:144} INFO - ModelType.RNN train loss: 0.015674693509936333, eval metrics: {'eval_loss': 0.014042397108054362, 'MRE': 0.004423965281236484, 'MAE': 0.004423965281236484, 'RMSE': 3.378350973831516}
[2023-12-12 19:24:29,715] {executor.py:174} INFO - Saving model at step 3600 with loss 0.015674693509936333,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 160.67it/s]

[2023-12-12 19:24:29,851] {executor.py:122} INFO - ModelType.RNN training epoch 29



 48%|████▊     | 60/125 [00:00<00:00, 193.82it/s]

[2023-12-12 19:24:30,244] {executor.py:139} INFO - ModelType.RNN trained for 23.462797959 seconds.
[2023-12-12 19:24:30,245] {executor.py:140} INFO - ModelType.RNN eval at step 3700.
[2023-12-12 19:24:30,293] {executor.py:144} INFO - ModelType.RNN train loss: 0.009011234156787395, eval metrics: {'eval_loss': 0.01038947851003076, 'MRE': 0.053610270632564176, 'MAE': 0.053610270632564176, 'RMSE': 40.939360615478904}
[2023-12-12 19:24:30,294] {executor.py:174} INFO - Saving model at step 3700 with loss 0.009011234156787395,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 177.75it/s]

[2023-12-12 19:24:30,556] {executor.py:122} INFO - ModelType.RNN training epoch 30



 30%|███       | 38/125 [00:00<00:00, 187.42it/s]

[2023-12-12 19:24:30,830] {executor.py:139} INFO - ModelType.RNN trained for 24.048500747 seconds.
[2023-12-12 19:24:30,830] {executor.py:140} INFO - ModelType.RNN eval at step 3800.
[2023-12-12 19:24:30,883] {executor.py:144} INFO - ModelType.RNN train loss: 0.021352075040340424, eval metrics: {'eval_loss': 0.010856127626119325, 'MRE': 0.10708002770899178, 'MAE': 0.10708002770899178, 'RMSE': 81.77141837502825}
[2023-12-12 19:24:30,883] {executor.py:174} INFO - Saving model at step 3800 with loss 0.021352075040340424,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 170.98it/s]

[2023-12-12 19:24:31,289] {executor.py:122} INFO - ModelType.RNN training epoch 31



 15%|█▌        | 19/125 [00:00<00:00, 186.71it/s]

[2023-12-12 19:24:31,431] {executor.py:139} INFO - ModelType.RNN trained for 24.649842483 seconds.
[2023-12-12 19:24:31,432] {executor.py:140} INFO - ModelType.RNN eval at step 3900.
[2023-12-12 19:24:31,483] {executor.py:144} INFO - ModelType.RNN train loss: 0.014034437946975231, eval metrics: {'eval_loss': 0.009988448528859478, 'MRE': 0.045290264880132956, 'MAE': 0.045290264880132956, 'RMSE': 34.585807242168016}
[2023-12-12 19:24:31,483] {executor.py:174} INFO - Saving model at step 3900 with loss 0.014034437946975231,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 171.40it/s]

[2023-12-12 19:24:32,020] {executor.py:122} INFO - ModelType.RNN training epoch 32



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:32,027] {executor.py:139} INFO - ModelType.RNN trained for 25.246065521 seconds.
[2023-12-12 19:24:32,028] {executor.py:140} INFO - ModelType.RNN eval at step 4000.
[2023-12-12 19:24:32,078] {executor.py:144} INFO - ModelType.RNN train loss: 0.021416492760181427, eval metrics: {'eval_loss': 0.011022935881136129, 'MRE': 0.18067189317191862, 'MAE': 0.18067189317191862, 'RMSE': 137.96967820478778}
[2023-12-12 19:24:32,078] {executor.py:174} INFO - Saving model at step 4000 with loss 0.021416492760181427,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 67%|██████▋   | 84/125 [00:00<00:00, 177.13it/s]

[2023-12-12 19:24:32,621] {executor.py:139} INFO - ModelType.RNN trained for 25.839409869 seconds.
[2023-12-12 19:24:32,621] {executor.py:140} INFO - ModelType.RNN eval at step 4100.
[2023-12-12 19:24:32,672] {executor.py:144} INFO - ModelType.RNN train loss: 0.01053859293460846, eval metrics: {'eval_loss': 0.018388124581318922, 'MRE': 0.13369241305884144, 'MAE': 0.13369241305884144, 'RMSE': 102.09390561152804}
[2023-12-12 19:24:32,672] {executor.py:174} INFO - Saving model at step 4100 with loss 0.01053859293460846,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 158.28it/s]

[2023-12-12 19:24:32,812] {executor.py:122} INFO - ModelType.RNN training epoch 33



 46%|████▋     | 58/125 [00:00<00:00, 187.40it/s]

[2023-12-12 19:24:33,218] {executor.py:139} INFO - ModelType.RNN trained for 26.436630425 seconds.
[2023-12-12 19:24:33,219] {executor.py:140} INFO - ModelType.RNN eval at step 4200.
[2023-12-12 19:24:33,268] {executor.py:144} INFO - ModelType.RNN train loss: 0.013332781381905079, eval metrics: {'eval_loss': 0.009898045727123436, 'MRE': 0.07360389998775407, 'MAE': 0.07360389998775407, 'RMSE': 56.2074499671329}
[2023-12-12 19:24:33,268] {executor.py:174} INFO - Saving model at step 4200 with loss 0.013332781381905079,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 172.92it/s]

[2023-12-12 19:24:33,537] {executor.py:122} INFO - ModelType.RNN training epoch 34



 32%|███▏      | 40/125 [00:00<00:00, 189.85it/s]

[2023-12-12 19:24:33,806] {executor.py:139} INFO - ModelType.RNN trained for 27.024923069 seconds.
[2023-12-12 19:24:33,807] {executor.py:140} INFO - ModelType.RNN eval at step 4300.
[2023-12-12 19:24:33,856] {executor.py:144} INFO - ModelType.RNN train loss: 0.012743139639496803, eval metrics: {'eval_loss': 0.010394554795661511, 'MRE': 0.04808064170235659, 'MAE': 0.04808064170235659, 'RMSE': 36.716672123657645}
[2023-12-12 19:24:33,856] {executor.py:174} INFO - Saving model at step 4300 with loss 0.012743139639496803,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 175.18it/s]

[2023-12-12 19:24:34,252] {executor.py:122} INFO - ModelType.RNN training epoch 35



 16%|█▌        | 20/125 [00:00<00:00, 192.93it/s]

[2023-12-12 19:24:34,389] {executor.py:139} INFO - ModelType.RNN trained for 27.60804698 seconds.
[2023-12-12 19:24:34,390] {executor.py:140} INFO - ModelType.RNN eval at step 4400.
[2023-12-12 19:24:34,440] {executor.py:144} INFO - ModelType.RNN train loss: 0.005784394219517708, eval metrics: {'eval_loss': 0.007255766469125564, 'MRE': 0.0682652418641873, 'MAE': 0.0682652418641873, 'RMSE': 52.13059589524369}
[2023-12-12 19:24:34,441] {executor.py:174} INFO - Saving model at step 4400 with loss 0.005784394219517708,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 174.22it/s]

[2023-12-12 19:24:34,972] {executor.py:122} INFO - ModelType.RNN training epoch 36



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:34,980] {executor.py:139} INFO - ModelType.RNN trained for 28.198294312 seconds.
[2023-12-12 19:24:34,980] {executor.py:140} INFO - ModelType.RNN eval at step 4500.
[2023-12-12 19:24:35,032] {executor.py:144} INFO - ModelType.RNN train loss: 0.010872420854866505, eval metrics: {'eval_loss': 0.020292443095688492, 'MRE': 0.23336366948688558, 'MAE': 0.23336366948688558, 'RMSE': 178.2076327343117}
[2023-12-12 19:24:35,033] {executor.py:174} INFO - Saving model at step 4500 with loss 0.010872420854866505,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 66%|██████▋   | 83/125 [00:00<00:00, 172.80it/s]

[2023-12-12 19:24:35,579] {executor.py:139} INFO - ModelType.RNN trained for 28.797932166 seconds.
[2023-12-12 19:24:35,580] {executor.py:140} INFO - ModelType.RNN eval at step 4600.
[2023-12-12 19:24:35,628] {executor.py:144} INFO - ModelType.RNN train loss: 0.010402634739875793, eval metrics: {'eval_loss': 0.024899770156480372, 'MRE': 0.14621802204996484, 'MAE': 0.14621802204996484, 'RMSE': 111.65905828405721}
[2023-12-12 19:24:35,628] {executor.py:174} INFO - Saving model at step 4600 with loss 0.010402634739875793,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 157.86it/s]

[2023-12-12 19:24:35,766] {executor.py:122} INFO - ModelType.RNN training epoch 37



 48%|████▊     | 60/125 [00:00<00:00, 193.09it/s]

[2023-12-12 19:24:36,162] {executor.py:139} INFO - ModelType.RNN trained for 29.380327215 seconds.
[2023-12-12 19:24:36,162] {executor.py:140} INFO - ModelType.RNN eval at step 4700.
[2023-12-12 19:24:36,212] {executor.py:144} INFO - ModelType.RNN train loss: 0.00938394945114851, eval metrics: {'eval_loss': 0.015069784104036024, 'MRE': 0.09500243977821844, 'MAE': 0.09500243977821844, 'RMSE': 72.54839596105921}
[2023-12-12 19:24:36,213] {executor.py:174} INFO - Saving model at step 4700 with loss 0.00938394945114851,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 176.39it/s]

[2023-12-12 19:24:36,476] {executor.py:122} INFO - ModelType.RNN training epoch 38



 32%|███▏      | 40/125 [00:00<00:00, 192.74it/s]

[2023-12-12 19:24:36,742] {executor.py:139} INFO - ModelType.RNN trained for 29.960724186 seconds.
[2023-12-12 19:24:36,743] {executor.py:140} INFO - ModelType.RNN eval at step 4800.
[2023-12-12 19:24:36,791] {executor.py:144} INFO - ModelType.RNN train loss: 0.009228833019733429, eval metrics: {'eval_loss': 0.008701923771206146, 'MRE': 0.022709732810597284, 'MAE': 0.022709732810597284, 'RMSE': 17.34223554636344}
[2023-12-12 19:24:36,792] {executor.py:174} INFO - Saving model at step 4800 with loss 0.009228833019733429,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 175.14it/s]

[2023-12-12 19:24:37,192] {executor.py:122} INFO - ModelType.RNN training epoch 39



 15%|█▌        | 19/125 [00:00<00:00, 186.06it/s]

[2023-12-12 19:24:37,333] {executor.py:139} INFO - ModelType.RNN trained for 30.551779225 seconds.
[2023-12-12 19:24:37,334] {executor.py:140} INFO - ModelType.RNN eval at step 4900.
[2023-12-12 19:24:37,384] {executor.py:144} INFO - ModelType.RNN train loss: 0.04123586416244507, eval metrics: {'eval_loss': 0.006765972625894042, 'MRE': 0.04138041804049818, 'MAE': 0.04138041804049818, 'RMSE': 31.600061640990816}
[2023-12-12 19:24:37,384] {executor.py:174} INFO - Saving model at step 4900 with loss 0.04123586416244507,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 172.76it/s]

[2023-12-12 19:24:37,918] {executor.py:122} INFO - ModelType.RNN training epoch 40



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:37,925] {executor.py:139} INFO - ModelType.RNN trained for 31.143380657 seconds.
[2023-12-12 19:24:37,925] {executor.py:140} INFO - ModelType.RNN eval at step 5000.
[2023-12-12 19:24:37,973] {executor.py:144} INFO - ModelType.RNN train loss: 0.022745108231902122, eval metrics: {'eval_loss': 0.0069760868254189305, 'MRE': 0.056051848499696556, 'MAE': 0.056051848499696556, 'RMSE': 42.803865972267545}
[2023-12-12 19:24:37,974] {executor.py:174} INFO - Saving model at step 5000 with loss 0.022745108231902122,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 69%|██████▉   | 86/125 [00:00<00:00, 180.05it/s]

[2023-12-12 19:24:38,508] {executor.py:139} INFO - ModelType.RNN trained for 31.72639456 seconds.
[2023-12-12 19:24:38,508] {executor.py:140} INFO - ModelType.RNN eval at step 5100.
[2023-12-12 19:24:38,558] {executor.py:144} INFO - ModelType.RNN train loss: 0.009283282794058323, eval metrics: {'eval_loss': 0.015936492637802776, 'MRE': 0.09745892041185647, 'MAE': 0.09745892041185647, 'RMSE': 74.424281781422}
[2023-12-12 19:24:38,559] {executor.py:174} INFO - Saving model at step 5100 with loss 0.009283282794058323,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 161.05it/s]

[2023-12-12 19:24:38,696] {executor.py:122} INFO - ModelType.RNN training epoch 41



 48%|████▊     | 60/125 [00:00<00:00, 194.51it/s]

[2023-12-12 19:24:39,090] {executor.py:139} INFO - ModelType.RNN trained for 32.308206156 seconds.
[2023-12-12 19:24:39,090] {executor.py:140} INFO - ModelType.RNN eval at step 5200.
[2023-12-12 19:24:39,140] {executor.py:144} INFO - ModelType.RNN train loss: 0.05799631029367447, eval metrics: {'eval_loss': 0.01611833947334582, 'MRE': 0.08936125986900767, 'MAE': 0.08936125986900767, 'RMSE': 68.2405218191276}
[2023-12-12 19:24:39,141] {executor.py:174} INFO - Saving model at step 5200 with loss 0.05799631029367447,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 171.40it/s]

[2023-12-12 19:24:39,427] {executor.py:122} INFO - ModelType.RNN training epoch 42



 32%|███▏      | 40/125 [00:00<00:00, 193.88it/s]

[2023-12-12 19:24:39,691] {executor.py:139} INFO - ModelType.RNN trained for 32.90993802 seconds.
[2023-12-12 19:24:39,692] {executor.py:140} INFO - ModelType.RNN eval at step 5300.
[2023-12-12 19:24:39,741] {executor.py:144} INFO - ModelType.RNN train loss: 0.01330557931214571, eval metrics: {'eval_loss': 0.011632006028846193, 'MRE': 0.08423220833538553, 'MAE': 0.08423220833538553, 'RMSE': 64.32373334048884}
[2023-12-12 19:24:39,742] {executor.py:174} INFO - Saving model at step 5300 with loss 0.01330557931214571,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 175.21it/s]

[2023-12-12 19:24:40,143] {executor.py:122} INFO - ModelType.RNN training epoch 43



 15%|█▌        | 19/125 [00:00<00:00, 187.97it/s]

[2023-12-12 19:24:40,283] {executor.py:139} INFO - ModelType.RNN trained for 33.502022911 seconds.
[2023-12-12 19:24:40,284] {executor.py:140} INFO - ModelType.RNN eval at step 5400.
[2023-12-12 19:24:40,335] {executor.py:144} INFO - ModelType.RNN train loss: 0.018890077248215675, eval metrics: {'eval_loss': 0.018143953983851064, 'MRE': 0.17437660052036055, 'MAE': 0.17437660052036055, 'RMSE': 133.16229236246454}
[2023-12-12 19:24:40,335] {executor.py:174} INFO - Saving model at step 5400 with loss 0.018890077248215675,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 173.31it/s]

[2023-12-12 19:24:40,867] {executor.py:122} INFO - ModelType.RNN training epoch 44



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:40,874] {executor.py:139} INFO - ModelType.RNN trained for 34.092489113 seconds.
[2023-12-12 19:24:40,874] {executor.py:140} INFO - ModelType.RNN eval at step 5500.
[2023-12-12 19:24:40,923] {executor.py:144} INFO - ModelType.RNN train loss: 0.042662251740694046, eval metrics: {'eval_loss': 0.014081843233165832, 'MRE': 0.1093307770489191, 'MAE': 0.1093307770489191, 'RMSE': 83.4901979632507}
[2023-12-12 19:24:40,923] {executor.py:174} INFO - Saving model at step 5500 with loss 0.042662251740694046,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 70%|██████▉   | 87/125 [00:00<00:00, 181.84it/s]

[2023-12-12 19:24:41,455] {executor.py:139} INFO - ModelType.RNN trained for 34.673750697 seconds.
[2023-12-12 19:24:41,456] {executor.py:140} INFO - ModelType.RNN eval at step 5600.
[2023-12-12 19:24:41,506] {executor.py:144} INFO - ModelType.RNN train loss: 0.029446834698319435, eval metrics: {'eval_loss': 0.023818126543819044, 'MRE': 0.05639106031143321, 'MAE': 0.05639106031143321, 'RMSE': 43.06290429686203}
[2023-12-12 19:24:41,506] {executor.py:174} INFO - Saving model at step 5600 with loss 0.029446834698319435,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 161.68it/s]

[2023-12-12 19:24:41,642] {executor.py:122} INFO - ModelType.RNN training epoch 45



 48%|████▊     | 60/125 [00:00<00:00, 190.11it/s]

[2023-12-12 19:24:42,041] {executor.py:139} INFO - ModelType.RNN trained for 35.259569529 seconds.
[2023-12-12 19:24:42,041] {executor.py:140} INFO - ModelType.RNN eval at step 5700.
[2023-12-12 19:24:42,091] {executor.py:144} INFO - ModelType.RNN train loss: 0.011438802815973759, eval metrics: {'eval_loss': 0.008512282504503114, 'MRE': 0.027447475550103805, 'MAE': 0.027447475550103805, 'RMSE': 20.960201958907646}
[2023-12-12 19:24:42,091] {executor.py:174} INFO - Saving model at step 5700 with loss 0.011438802815973759,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 175.34it/s]

[2023-12-12 19:24:42,357] {executor.py:122} INFO - ModelType.RNN training epoch 46



 32%|███▏      | 40/125 [00:00<00:00, 191.03it/s]

[2023-12-12 19:24:42,625] {executor.py:139} INFO - ModelType.RNN trained for 35.843356797 seconds.
[2023-12-12 19:24:42,625] {executor.py:140} INFO - ModelType.RNN eval at step 5800.
[2023-12-12 19:24:42,675] {executor.py:144} INFO - ModelType.RNN train loss: 0.01053819339722395, eval metrics: {'eval_loss': 0.007295059008846203, 'MRE': 0.041475983880853336, 'MAE': 0.041475983880853336, 'RMSE': 31.673040276514485}
[2023-12-12 19:24:42,676] {executor.py:174} INFO - Saving model at step 5800 with loss 0.01053819339722395,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 174.23it/s]

[2023-12-12 19:24:43,076] {executor.py:122} INFO - ModelType.RNN training epoch 47



 15%|█▌        | 19/125 [00:00<00:00, 185.84it/s]

[2023-12-12 19:24:43,218] {executor.py:139} INFO - ModelType.RNN trained for 36.437082441 seconds.
[2023-12-12 19:24:43,219] {executor.py:140} INFO - ModelType.RNN eval at step 5900.
[2023-12-12 19:24:43,271] {executor.py:144} INFO - ModelType.RNN train loss: 0.011722358874976635, eval metrics: {'eval_loss': 0.007898314005265443, 'MRE': 0.008138209841263223, 'MAE': 0.008138209841263223, 'RMSE': 6.214725341332951}
[2023-12-12 19:24:43,271] {executor.py:174} INFO - Saving model at step 5900 with loss 0.011722358874976635,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 171.88it/s]

[2023-12-12 19:24:43,805] {executor.py:122} INFO - ModelType.RNN training epoch 48



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:43,812] {executor.py:139} INFO - ModelType.RNN trained for 37.030969196 seconds.
[2023-12-12 19:24:43,813] {executor.py:140} INFO - ModelType.RNN eval at step 6000.
[2023-12-12 19:24:43,864] {executor.py:144} INFO - ModelType.RNN train loss: 0.009817834943532944, eval metrics: {'eval_loss': 0.0076510872011287855, 'MRE': 0.04655508419452612, 'MAE': 0.04655508419452612, 'RMSE': 35.551683620227436}
[2023-12-12 19:24:43,865] {executor.py:174} INFO - Saving model at step 6000 with loss 0.009817834943532944,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 70%|██████▉   | 87/125 [00:00<00:00, 186.94it/s]

[2023-12-12 19:24:44,382] {executor.py:139} INFO - ModelType.RNN trained for 37.600250974 seconds.
[2023-12-12 19:24:44,382] {executor.py:140} INFO - ModelType.RNN eval at step 6100.
[2023-12-12 19:24:44,433] {executor.py:144} INFO - ModelType.RNN train loss: 0.014981823973357677, eval metrics: {'eval_loss': 0.0063054436155093405, 'MRE': 0.10856216686689009, 'MAE': 0.10856216686689009, 'RMSE': 82.90325055478706}
[2023-12-12 19:24:44,434] {executor.py:174} INFO - Saving model at step 6100 with loss 0.014981823973357677,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 164.90it/s]

[2023-12-12 19:24:44,566] {executor.py:122} INFO - ModelType.RNN training epoch 49



 50%|█████     | 63/125 [00:00<00:00, 201.53it/s]

[2023-12-12 19:24:44,944] {executor.py:139} INFO - ModelType.RNN trained for 38.162425438 seconds.
[2023-12-12 19:24:44,944] {executor.py:140} INFO - ModelType.RNN eval at step 6200.
[2023-12-12 19:24:44,994] {executor.py:144} INFO - ModelType.RNN train loss: 0.011328165419399738, eval metrics: {'eval_loss': 0.005377068981313362, 'MRE': 0.05978709999644314, 'MAE': 0.05978709999644314, 'RMSE': 45.656282238973176}
[2023-12-12 19:24:44,995] {executor.py:174} INFO - Saving model at step 6200 with loss 0.011328165419399738,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 181.80it/s]

[2023-12-12 19:24:45,255] {executor.py:122} INFO - ModelType.RNN training epoch 50



 34%|███▎      | 42/125 [00:00<00:00, 208.16it/s]

[2023-12-12 19:24:45,503] {executor.py:139} INFO - ModelType.RNN trained for 38.722003637 seconds.
[2023-12-12 19:24:45,504] {executor.py:140} INFO - ModelType.RNN eval at step 6300.
[2023-12-12 19:24:45,553] {executor.py:144} INFO - ModelType.RNN train loss: 0.014445299282670021, eval metrics: {'eval_loss': 0.005715708930018501, 'MRE': 0.027616761434464492, 'MAE': 0.027616761434464492, 'RMSE': 21.0894767375122}
[2023-12-12 19:24:45,553] {executor.py:174} INFO - Saving model at step 6300 with loss 0.014445299282670021,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 187.33it/s]

[2023-12-12 19:24:45,924] {executor.py:122} INFO - ModelType.RNN training epoch 51



 17%|█▋        | 21/125 [00:00<00:00, 203.65it/s]

[2023-12-12 19:24:46,054] {executor.py:139} INFO - ModelType.RNN trained for 39.272213418 seconds.
[2023-12-12 19:24:46,054] {executor.py:140} INFO - ModelType.RNN eval at step 6400.
[2023-12-12 19:24:46,104] {executor.py:144} INFO - ModelType.RNN train loss: 0.011316183023154736, eval metrics: {'eval_loss': 0.007511007133871317, 'MRE': 0.02318800704596613, 'MAE': 0.02318800704596613, 'RMSE': 17.707468572867924}
[2023-12-12 19:24:46,104] {executor.py:174} INFO - Saving model at step 6400 with loss 0.011316183023154736,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 186.92it/s]

[2023-12-12 19:24:46,595] {executor.py:122} INFO - ModelType.RNN training epoch 52



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:46,602] {executor.py:139} INFO - ModelType.RNN trained for 39.820326751 seconds.
[2023-12-12 19:24:46,602] {executor.py:140} INFO - ModelType.RNN eval at step 6500.
[2023-12-12 19:24:46,651] {executor.py:144} INFO - ModelType.RNN train loss: 0.01581946201622486, eval metrics: {'eval_loss': 0.00653463611468816, 'MRE': 0.11281602461467072, 'MAE': 0.11281602461467072, 'RMSE': 86.15169929956096}
[2023-12-12 19:24:46,651] {executor.py:174} INFO - Saving model at step 6500 with loss 0.01581946201622486,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 74%|███████▍  | 93/125 [00:00<00:00, 198.41it/s]

[2023-12-12 19:24:47,142] {executor.py:139} INFO - ModelType.RNN trained for 40.361064385 seconds.
[2023-12-12 19:24:47,143] {executor.py:140} INFO - ModelType.RNN eval at step 6600.
[2023-12-12 19:24:47,193] {executor.py:144} INFO - ModelType.RNN train loss: 0.006760977208614349, eval metrics: {'eval_loss': 0.006216195976146712, 'MRE': 0.04097895237376682, 'MAE': 0.04097895237376682, 'RMSE': 31.293483302341883}
[2023-12-12 19:24:47,194] {executor.py:174} INFO - Saving model at step 6600 with loss 0.006760977208614349,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 171.96it/s]

[2023-12-12 19:24:47,324] {executor.py:122} INFO - ModelType.RNN training epoch 53



 50%|█████     | 63/125 [00:00<00:00, 203.66it/s]

[2023-12-12 19:24:47,699] {executor.py:139} INFO - ModelType.RNN trained for 40.917234029 seconds.
[2023-12-12 19:24:47,699] {executor.py:140} INFO - ModelType.RNN eval at step 6700.
[2023-12-12 19:24:47,748] {executor.py:144} INFO - ModelType.RNN train loss: 0.007840385660529137, eval metrics: {'eval_loss': 0.004711840355482239, 'MRE': 0.02746859686439333, 'MAE': 0.02746859686439333, 'RMSE': 20.976331202281585}
[2023-12-12 19:24:47,749] {executor.py:174} INFO - Saving model at step 6700 with loss 0.007840385660529137,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 185.24it/s]

[2023-12-12 19:24:48,001] {executor.py:122} INFO - ModelType.RNN training epoch 54



 34%|███▎      | 42/125 [00:00<00:00, 203.20it/s]

[2023-12-12 19:24:48,252] {executor.py:139} INFO - ModelType.RNN trained for 41.471076862 seconds.
[2023-12-12 19:24:48,253] {executor.py:140} INFO - ModelType.RNN eval at step 6800.
[2023-12-12 19:24:48,303] {executor.py:144} INFO - ModelType.RNN train loss: 0.018878750503063202, eval metrics: {'eval_loss': 0.005417856978825652, 'MRE': 0.006491278581624516, 'MAE': 0.006491278581624516, 'RMSE': 4.957050049794816}
[2023-12-12 19:24:48,303] {executor.py:174} INFO - Saving model at step 6800 with loss 0.018878750503063202,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 186.20it/s]

[2023-12-12 19:24:48,674] {executor.py:122} INFO - ModelType.RNN training epoch 55



 17%|█▋        | 21/125 [00:00<00:00, 209.86it/s]

[2023-12-12 19:24:48,800] {executor.py:139} INFO - ModelType.RNN trained for 42.018824517 seconds.
[2023-12-12 19:24:48,801] {executor.py:140} INFO - ModelType.RNN eval at step 6900.
[2023-12-12 19:24:48,851] {executor.py:144} INFO - ModelType.RNN train loss: 0.02249760739505291, eval metrics: {'eval_loss': 0.005428938983151546, 'MRE': 0.11236692559752212, 'MAE': 0.11236692559752212, 'RMSE': 85.80874586175571}
[2023-12-12 19:24:48,852] {executor.py:174} INFO - Saving model at step 6900 with loss 0.02249760739505291,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 184.23it/s]

[2023-12-12 19:24:49,355] {executor.py:122} INFO - ModelType.RNN training epoch 56



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:49,361] {executor.py:139} INFO - ModelType.RNN trained for 42.580011137 seconds.
[2023-12-12 19:24:49,362] {executor.py:140} INFO - ModelType.RNN eval at step 7000.
[2023-12-12 19:24:49,412] {executor.py:144} INFO - ModelType.RNN train loss: 0.01249407697468996, eval metrics: {'eval_loss': 0.005081060557411267, 'MRE': 0.02823381662909894, 'MAE': 0.02823381662909894, 'RMSE': 21.560689526306533}
[2023-12-12 19:24:49,412] {executor.py:174} INFO - Saving model at step 7000 with loss 0.01249407697468996,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 74%|███████▎  | 92/125 [00:00<00:00, 191.76it/s]

[2023-12-12 19:24:49,921] {executor.py:139} INFO - ModelType.RNN trained for 43.139574993 seconds.
[2023-12-12 19:24:49,921] {executor.py:140} INFO - ModelType.RNN eval at step 7100.
[2023-12-12 19:24:49,971] {executor.py:144} INFO - ModelType.RNN train loss: 0.008312925696372986, eval metrics: {'eval_loss': 0.005667363777040289, 'MRE': 0.024784303448734793, 'MAE': 0.024784303448734793, 'RMSE': 18.926476671708656}
[2023-12-12 19:24:49,972] {executor.py:174} INFO - Saving model at step 7100 with loss 0.008312925696372986,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 167.50it/s]

[2023-12-12 19:24:50,103] {executor.py:122} INFO - ModelType.RNN training epoch 57



 53%|█████▎    | 66/125 [00:00<00:00, 206.31it/s]

[2023-12-12 19:24:50,472] {executor.py:139} INFO - ModelType.RNN trained for 43.690544687 seconds.
[2023-12-12 19:24:50,472] {executor.py:140} INFO - ModelType.RNN eval at step 7200.
[2023-12-12 19:24:50,522] {executor.py:144} INFO - ModelType.RNN train loss: 0.00944638904184103, eval metrics: {'eval_loss': 0.004984159845312556, 'MRE': 0.013703433164317802, 'MAE': 0.013703433164317802, 'RMSE': 10.464595409883145}
[2023-12-12 19:24:50,523] {executor.py:174} INFO - Saving model at step 7200 with loss 0.00944638904184103,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 187.15it/s]

[2023-12-12 19:24:50,773] {executor.py:122} INFO - ModelType.RNN training epoch 58



 34%|███▎      | 42/125 [00:00<00:00, 202.30it/s]

[2023-12-12 19:24:51,027] {executor.py:139} INFO - ModelType.RNN trained for 44.245365111 seconds.
[2023-12-12 19:24:51,027] {executor.py:140} INFO - ModelType.RNN eval at step 7300.
[2023-12-12 19:24:51,076] {executor.py:144} INFO - ModelType.RNN train loss: 0.010964760556817055, eval metrics: {'eval_loss': 0.005598505489615491, 'MRE': 0.07074734299268243, 'MAE': 0.07074734299268243, 'RMSE': 54.02604674793588}
[2023-12-12 19:24:51,076] {executor.py:174} INFO - Saving model at step 7300 with loss 0.010964760556817055,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 185.43it/s]

[2023-12-12 19:24:51,449] {executor.py:122} INFO - ModelType.RNN training epoch 59



 17%|█▋        | 21/125 [00:00<00:00, 204.36it/s]

[2023-12-12 19:24:51,577] {executor.py:139} INFO - ModelType.RNN trained for 44.796048618 seconds.
[2023-12-12 19:24:51,578] {executor.py:140} INFO - ModelType.RNN eval at step 7400.
[2023-12-12 19:24:51,626] {executor.py:144} INFO - ModelType.RNN train loss: 0.012147700414061546, eval metrics: {'eval_loss': 0.00629847151084015, 'MRE': 0.0640045572168906, 'MAE': 0.0640045572168906, 'RMSE': 48.8769338042606}
[2023-12-12 19:24:51,627] {executor.py:174} INFO - Saving model at step 7400 with loss 0.012147700414061546,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 188.88it/s]

[2023-12-12 19:24:52,112] {executor.py:122} INFO - ModelType.RNN training epoch 60



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:52,119] {executor.py:139} INFO - ModelType.RNN trained for 45.337791454 seconds.
[2023-12-12 19:24:52,120] {executor.py:140} INFO - ModelType.RNN eval at step 7500.
[2023-12-12 19:24:52,168] {executor.py:144} INFO - ModelType.RNN train loss: 0.008993709459900856, eval metrics: {'eval_loss': 0.0065676727726195865, 'MRE': 0.09191548291561226, 'MAE': 0.09191548291561226, 'RMSE': 70.1910484097134}
[2023-12-12 19:24:52,168] {executor.py:174} INFO - Saving model at step 7500 with loss 0.008993709459900856,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 75%|███████▌  | 94/125 [00:00<00:00, 199.05it/s]

[2023-12-12 19:24:52,663] {executor.py:139} INFO - ModelType.RNN trained for 45.88127641 seconds.
[2023-12-12 19:24:52,663] {executor.py:140} INFO - ModelType.RNN eval at step 7600.
[2023-12-12 19:24:52,714] {executor.py:144} INFO - ModelType.RNN train loss: 0.011454924941062927, eval metrics: {'eval_loss': 0.005685152335970018, 'MRE': 0.06340518557074512, 'MAE': 0.06340518557074512, 'RMSE': 48.4192250168453}
[2023-12-12 19:24:52,714] {executor.py:174} INFO - Saving model at step 7600 with loss 0.011454924941062927,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 170.84it/s]

[2023-12-12 19:24:52,846] {executor.py:122} INFO - ModelType.RNN training epoch 61



 50%|█████     | 63/125 [00:00<00:00, 205.53it/s]

[2023-12-12 19:24:53,222] {executor.py:139} INFO - ModelType.RNN trained for 46.440339738 seconds.
[2023-12-12 19:24:53,222] {executor.py:140} INFO - ModelType.RNN eval at step 7700.
[2023-12-12 19:24:53,274] {executor.py:144} INFO - ModelType.RNN train loss: 0.038399577140808105, eval metrics: {'eval_loss': 0.0049740569128726535, 'MRE': 0.04091915314294695, 'MAE': 0.04091915314294695, 'RMSE': 31.247817756427366}
[2023-12-12 19:24:53,274] {executor.py:174} INFO - Saving model at step 7700 with loss 0.038399577140808105,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 180.99it/s]

[2023-12-12 19:24:53,539] {executor.py:122} INFO - ModelType.RNN training epoch 62



 35%|███▌      | 44/125 [00:00<00:00, 213.78it/s]

[2023-12-12 19:24:53,779] {executor.py:139} INFO - ModelType.RNN trained for 46.997803875 seconds.
[2023-12-12 19:24:53,780] {executor.py:140} INFO - ModelType.RNN eval at step 7800.
[2023-12-12 19:24:53,828] {executor.py:144} INFO - ModelType.RNN train loss: 0.01589599810540676, eval metrics: {'eval_loss': 0.004053439289586654, 'MRE': 0.06586771485059517, 'MAE': 0.06586771485059517, 'RMSE': 50.29972987206088}
[2023-12-12 19:24:53,828] {executor.py:174} INFO - Saving model at step 7800 with loss 0.01589599810540676,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 192.00it/s]

[2023-12-12 19:24:54,192] {executor.py:122} INFO - ModelType.RNN training epoch 63



 17%|█▋        | 21/125 [00:00<00:00, 206.23it/s]

[2023-12-12 19:24:54,319] {executor.py:139} INFO - ModelType.RNN trained for 47.537948555 seconds.
[2023-12-12 19:24:54,320] {executor.py:140} INFO - ModelType.RNN eval at step 7900.
[2023-12-12 19:24:54,368] {executor.py:144} INFO - ModelType.RNN train loss: 0.005472735967487097, eval metrics: {'eval_loss': 0.007885478365306672, 'MRE': 0.03761689724925502, 'MAE': 0.03761689724925502, 'RMSE': 28.726057592166512}
[2023-12-12 19:24:54,368] {executor.py:174} INFO - Saving model at step 7900 with loss 0.005472735967487097,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 189.76it/s]

[2023-12-12 19:24:54,852] {executor.py:122} INFO - ModelType.RNN training epoch 64



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:54,859] {executor.py:139} INFO - ModelType.RNN trained for 48.077584307 seconds.
[2023-12-12 19:24:54,859] {executor.py:140} INFO - ModelType.RNN eval at step 8000.
[2023-12-12 19:24:54,907] {executor.py:144} INFO - ModelType.RNN train loss: 0.006033834535628557, eval metrics: {'eval_loss': 0.005199550719295915, 'MRE': 0.05983734360808103, 'MAE': 0.05983734360808103, 'RMSE': 45.69465065814359}
[2023-12-12 19:24:54,908] {executor.py:174} INFO - Saving model at step 8000 with loss 0.006033834535628557,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 77%|███████▋  | 96/125 [00:00<00:00, 203.02it/s]

[2023-12-12 19:24:55,389] {executor.py:139} INFO - ModelType.RNN trained for 48.607548443 seconds.
[2023-12-12 19:24:55,390] {executor.py:140} INFO - ModelType.RNN eval at step 8100.
[2023-12-12 19:24:55,440] {executor.py:144} INFO - ModelType.RNN train loss: 0.036672454327344894, eval metrics: {'eval_loss': 0.00584823044818432, 'MRE': 0.05313145259639785, 'MAE': 0.05313145259639785, 'RMSE': 40.573712316738465}
[2023-12-12 19:24:55,440] {executor.py:174} INFO - Saving model at step 8100 with loss 0.036672454327344894,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 175.51it/s]

[2023-12-12 19:24:55,566] {executor.py:122} INFO - ModelType.RNN training epoch 65



 53%|█████▎    | 66/125 [00:00<00:00, 211.82it/s]

[2023-12-12 19:24:55,927] {executor.py:139} INFO - ModelType.RNN trained for 49.145546514 seconds.
[2023-12-12 19:24:55,927] {executor.py:140} INFO - ModelType.RNN eval at step 8200.
[2023-12-12 19:24:55,977] {executor.py:144} INFO - ModelType.RNN train loss: 0.043379705399274826, eval metrics: {'eval_loss': 0.007331395069531237, 'MRE': 0.03640664119518297, 'MAE': 0.03640664119518297, 'RMSE': 27.80184832311977}
[2023-12-12 19:24:55,977] {executor.py:174} INFO - Saving model at step 8200 with loss 0.043379705399274826,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 190.50it/s]

[2023-12-12 19:24:56,225] {executor.py:122} INFO - ModelType.RNN training epoch 66



 35%|███▌      | 44/125 [00:00<00:00, 215.62it/s]

[2023-12-12 19:24:56,464] {executor.py:139} INFO - ModelType.RNN trained for 49.682230874 seconds.
[2023-12-12 19:24:56,464] {executor.py:140} INFO - ModelType.RNN eval at step 8300.
[2023-12-12 19:24:56,514] {executor.py:144} INFO - ModelType.RNN train loss: 0.007540219463407993, eval metrics: {'eval_loss': 0.00931645899366301, 'MRE': 0.008485284050649453, 'MAE': 0.008485284050649453, 'RMSE': 6.4797677679191565}
[2023-12-12 19:24:56,514] {executor.py:174} INFO - Saving model at step 8300 with loss 0.007540219463407993,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 190.40it/s]

[2023-12-12 19:24:56,883] {executor.py:122} INFO - ModelType.RNN training epoch 67



 17%|█▋        | 21/125 [00:00<00:00, 209.19it/s]

[2023-12-12 19:24:57,009] {executor.py:139} INFO - ModelType.RNN trained for 50.227896002 seconds.
[2023-12-12 19:24:57,010] {executor.py:140} INFO - ModelType.RNN eval at step 8400.
[2023-12-12 19:24:57,058] {executor.py:144} INFO - ModelType.RNN train loss: 0.010466369800269604, eval metrics: {'eval_loss': 0.012502668891102076, 'MRE': 0.18992943981046462, 'MAE': 0.18992943981046462, 'RMSE': 145.03918253256188}
[2023-12-12 19:24:57,059] {executor.py:174} INFO - Saving model at step 8400 with loss 0.010466369800269604,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 174.46it/s]

[2023-12-12 19:24:57,602] {executor.py:122} INFO - ModelType.RNN training epoch 68



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:24:57,609] {executor.py:139} INFO - ModelType.RNN trained for 50.82770039 seconds.
[2023-12-12 19:24:57,609] {executor.py:140} INFO - ModelType.RNN eval at step 8500.
[2023-12-12 19:24:57,660] {executor.py:144} INFO - ModelType.RNN train loss: 0.017447855323553085, eval metrics: {'eval_loss': 0.014547356144668391, 'MRE': 0.05933874220334639, 'MAE': 0.05933874220334639, 'RMSE': 45.31389483522071}
[2023-12-12 19:24:57,660] {executor.py:174} INFO - Saving model at step 8500 with loss 0.017447855323553085,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 79%|███████▉  | 99/125 [00:00<00:00, 146.76it/s]

[2023-12-12 19:24:58,332] {executor.py:139} INFO - ModelType.RNN trained for 51.551143432 seconds.
[2023-12-12 19:24:58,333] {executor.py:140} INFO - ModelType.RNN eval at step 8600.
[2023-12-12 19:24:58,391] {executor.py:144} INFO - ModelType.RNN train loss: 0.020318077877163887, eval metrics: {'eval_loss': 0.015671386674512178, 'MRE': 0.15871886538023977, 'MAE': 0.15871886538023977, 'RMSE': 121.20529871629367}
[2023-12-12 19:24:58,391] {executor.py:174} INFO - Saving model at step 8600 with loss 0.020318077877163887,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 129.92it/s]

[2023-12-12 19:24:58,566] {executor.py:122} INFO - ModelType.RNN training epoch 69



 51%|█████     | 64/125 [00:00<00:00, 153.80it/s]

[2023-12-12 19:24:59,062] {executor.py:139} INFO - ModelType.RNN trained for 52.280318079 seconds.
[2023-12-12 19:24:59,062] {executor.py:140} INFO - ModelType.RNN eval at step 8700.
[2023-12-12 19:24:59,117] {executor.py:144} INFO - ModelType.RNN train loss: 0.02989289164543152, eval metrics: {'eval_loss': 0.02027257719936852, 'MRE': 0.13451263293551247, 'MAE': 0.13451263293551247, 'RMSE': 102.72026464532507}
[2023-12-12 19:24:59,118] {executor.py:174} INFO - Saving model at step 8700 with loss 0.02989289164543152,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 140.84it/s]

[2023-12-12 19:24:59,456] {executor.py:122} INFO - ModelType.RNN training epoch 70



 38%|███▊      | 47/125 [00:00<00:00, 152.41it/s]

[2023-12-12 19:24:59,793] {executor.py:139} INFO - ModelType.RNN trained for 53.011339353 seconds.
[2023-12-12 19:24:59,793] {executor.py:140} INFO - ModelType.RNN eval at step 8800.
[2023-12-12 19:25:00,083] {executor.py:144} INFO - ModelType.RNN train loss: 0.009965198114514351, eval metrics: {'eval_loss': 0.011630096266834209, 'MRE': 0.0373503391061276, 'MAE': 0.0373503391061276, 'RMSE': 28.52250107551913}
[2023-12-12 19:25:00,083] {executor.py:174} INFO - Saving model at step 8800 with loss 0.009965198114514351,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:01<00:00, 110.58it/s]

[2023-12-12 19:25:00,588] {executor.py:122} INFO - ModelType.RNN training epoch 71



 12%|█▏        | 15/125 [00:00<00:00, 149.49it/s]

[2023-12-12 19:25:00,764] {executor.py:139} INFO - ModelType.RNN trained for 53.98232134 seconds.
[2023-12-12 19:25:00,764] {executor.py:140} INFO - ModelType.RNN eval at step 8900.
[2023-12-12 19:25:00,822] {executor.py:144} INFO - ModelType.RNN train loss: 0.0322004072368145, eval metrics: {'eval_loss': 0.006199456688661415, 'MRE': 0.0014628398443140987, 'MAE': 0.0014628398443140987, 'RMSE': 1.1170943030585931}
[2023-12-12 19:25:00,822] {executor.py:174} INFO - Saving model at step 8900 with loss 0.0322004072368145,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 137.18it/s]

[2023-12-12 19:25:01,501] {executor.py:122} INFO - ModelType.RNN training epoch 72



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:25:01,510] {executor.py:139} INFO - ModelType.RNN trained for 54.7284869 seconds.
[2023-12-12 19:25:01,510] {executor.py:140} INFO - ModelType.RNN eval at step 9000.
[2023-12-12 19:25:01,568] {executor.py:144} INFO - ModelType.RNN train loss: 0.008446699008345604, eval metrics: {'eval_loss': 0.019826896864217885, 'MRE': 0.02954929072252665, 'MAE': 0.02954929072252665, 'RMSE': 22.56524831057891}
[2023-12-12 19:25:01,568] {executor.py:174} INFO - Saving model at step 9000 with loss 0.008446699008345604,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 80%|████████  | 100/125 [00:00<00:00, 151.63it/s]

[2023-12-12 19:25:02,231] {executor.py:139} INFO - ModelType.RNN trained for 55.44935934 seconds.
[2023-12-12 19:25:02,231] {executor.py:140} INFO - ModelType.RNN eval at step 9100.
[2023-12-12 19:25:02,287] {executor.py:144} INFO - ModelType.RNN train loss: 0.020065460354089737, eval metrics: {'eval_loss': 0.00805234566080169, 'MRE': 0.11301177562596167, 'MAE': 0.11301177562596167, 'RMSE': 86.30118411184651}
[2023-12-12 19:25:02,288] {executor.py:174} INFO - Saving model at step 9100 with loss 0.020065460354089737,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 130.53it/s]

[2023-12-12 19:25:02,461] {executor.py:122} INFO - ModelType.RNN training epoch 73



 60%|██████    | 75/125 [00:00<00:00, 190.52it/s]

[2023-12-12 19:25:02,879] {executor.py:139} INFO - ModelType.RNN trained for 56.097805053 seconds.
[2023-12-12 19:25:02,880] {executor.py:140} INFO - ModelType.RNN eval at step 9200.
[2023-12-12 19:25:02,929] {executor.py:144} INFO - ModelType.RNN train loss: 0.030813787132501602, eval metrics: {'eval_loss': 0.19765428271001348, 'MRE': 0.7065008015249724, 'MAE': 0.7065008015249724, 'RMSE': 539.5177220237125}
[2023-12-12 19:25:02,929] {executor.py:174} INFO - Saving model at step 9200 with loss 0.030813787132501602,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 173.81it/s]

[2023-12-12 19:25:03,182] {executor.py:122} INFO - ModelType.RNN training epoch 74



 34%|███▎      | 42/125 [00:00<00:00, 200.42it/s]

[2023-12-12 19:25:03,438] {executor.py:139} INFO - ModelType.RNN trained for 56.65664131 seconds.
[2023-12-12 19:25:03,439] {executor.py:140} INFO - ModelType.RNN eval at step 9300.
[2023-12-12 19:25:03,492] {executor.py:144} INFO - ModelType.RNN train loss: 0.02047334611415863, eval metrics: {'eval_loss': 0.044165525800333574, 'MRE': 0.11010061493057675, 'MAE': 0.11010061493057675, 'RMSE': 84.0780829017292}
[2023-12-12 19:25:03,493] {executor.py:174} INFO - Saving model at step 9300 with loss 0.02047334611415863,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 180.10it/s]

[2023-12-12 19:25:03,878] {executor.py:122} INFO - ModelType.RNN training epoch 75



 17%|█▋        | 21/125 [00:00<00:00, 203.69it/s]

[2023-12-12 19:25:04,008] {executor.py:139} INFO - ModelType.RNN trained for 57.226231089 seconds.
[2023-12-12 19:25:04,008] {executor.py:140} INFO - ModelType.RNN eval at step 9400.
[2023-12-12 19:25:04,057] {executor.py:144} INFO - ModelType.RNN train loss: 0.04370561242103577, eval metrics: {'eval_loss': 0.39685538253532004, 'MRE': 0.20158571494582156, 'MAE': 0.20158571494582156, 'RMSE': 153.94047039343218}
[2023-12-12 19:25:04,058] {executor.py:174} INFO - Saving model at step 9400 with loss 0.04370561242103577,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 185.91it/s]

[2023-12-12 19:25:04,553] {executor.py:122} INFO - ModelType.RNN training epoch 76



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:25:04,559] {executor.py:139} INFO - ModelType.RNN trained for 57.777975296 seconds.
[2023-12-12 19:25:04,560] {executor.py:140} INFO - ModelType.RNN eval at step 9500.
[2023-12-12 19:25:04,608] {executor.py:144} INFO - ModelType.RNN train loss: 0.02603026106953621, eval metrics: {'eval_loss': 0.03661861944083984, 'MRE': 0.02462150072335215, 'MAE': 0.02462150072335215, 'RMSE': 18.80215274263719}
[2023-12-12 19:25:04,609] {executor.py:174} INFO - Saving model at step 9500 with loss 0.02603026106953621,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 74%|███████▎  | 92/125 [00:00<00:00, 196.28it/s]

[2023-12-12 19:25:05,109] {executor.py:139} INFO - ModelType.RNN trained for 58.327633673 seconds.
[2023-12-12 19:25:05,110] {executor.py:140} INFO - ModelType.RNN eval at step 9600.
[2023-12-12 19:25:05,159] {executor.py:144} INFO - ModelType.RNN train loss: 0.0118233822286129, eval metrics: {'eval_loss': 0.01374828854862314, 'MRE': 0.15454543883312635, 'MAE': 0.15454543883312635, 'RMSE': 118.0182710740437}
[2023-12-12 19:25:05,160] {executor.py:174} INFO - Saving model at step 9600 with loss 0.0118233822286129,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 167.94it/s]

[2023-12-12 19:25:05,299] {executor.py:122} INFO - ModelType.RNN training epoch 77



 50%|████▉     | 62/125 [00:00<00:00, 203.36it/s]

[2023-12-12 19:25:05,674] {executor.py:139} INFO - ModelType.RNN trained for 58.892906302 seconds.
[2023-12-12 19:25:05,675] {executor.py:140} INFO - ModelType.RNN eval at step 9700.
[2023-12-12 19:25:05,724] {executor.py:144} INFO - ModelType.RNN train loss: 0.014573642984032631, eval metrics: {'eval_loss': 0.009183730527113836, 'MRE': 0.050750828780250556, 'MAE': 0.050750828780250556, 'RMSE': 38.75575438164367}
[2023-12-12 19:25:05,724] {executor.py:174} INFO - Saving model at step 9700 with loss 0.014573642984032631,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 183.71it/s]

[2023-12-12 19:25:05,981] {executor.py:122} INFO - ModelType.RNN training epoch 78



 34%|███▎      | 42/125 [00:00<00:00, 208.21it/s]

[2023-12-12 19:25:06,229] {executor.py:139} INFO - ModelType.RNN trained for 59.447762435 seconds.
[2023-12-12 19:25:06,230] {executor.py:140} INFO - ModelType.RNN eval at step 9800.
[2023-12-12 19:25:06,278] {executor.py:144} INFO - ModelType.RNN train loss: 0.007731372956186533, eval metrics: {'eval_loss': 0.007499349070712924, 'MRE': 0.04615845130578266, 'MAE': 0.04615845130578266, 'RMSE': 35.24879582144126}
[2023-12-12 19:25:06,278] {executor.py:174} INFO - Saving model at step 9800 with loss 0.007731372956186533,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 184.01it/s]

[2023-12-12 19:25:06,662] {executor.py:122} INFO - ModelType.RNN training epoch 79



 16%|█▌        | 20/125 [00:00<00:00, 199.92it/s]

[2023-12-12 19:25:06,794] {executor.py:139} INFO - ModelType.RNN trained for 60.012947725 seconds.
[2023-12-12 19:25:06,795] {executor.py:140} INFO - ModelType.RNN eval at step 9900.
[2023-12-12 19:25:06,845] {executor.py:144} INFO - ModelType.RNN train loss: 0.01595800556242466, eval metrics: {'eval_loss': 0.00643996939358588, 'MRE': 0.04350098026006799, 'MAE': 0.04350098026006799, 'RMSE': 33.21942413236002}
[2023-12-12 19:25:06,846] {executor.py:174} INFO - Saving model at step 9900 with loss 0.01595800556242466,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 182.04it/s]

[2023-12-12 19:25:07,351] {executor.py:122} INFO - ModelType.RNN training epoch 80



  0%|          | 0/125 [00:00<?, ?it/s]

[2023-12-12 19:25:07,358] {executor.py:139} INFO - ModelType.RNN trained for 60.576676734 seconds.
[2023-12-12 19:25:07,358] {executor.py:140} INFO - ModelType.RNN eval at step 10000.
[2023-12-12 19:25:07,410] {executor.py:144} INFO - ModelType.RNN train loss: 0.012444185093045235, eval metrics: {'eval_loss': 0.006341345487341572, 'MRE': 0.06738492872778815, 'MAE': 0.06738492872778815, 'RMSE': 51.458346781028354}
[2023-12-12 19:25:07,410] {executor.py:174} INFO - Saving model at step 10000 with loss 0.012444185093045235,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


 71%|███████   | 89/125 [00:00<00:00, 193.80it/s]

[2023-12-12 19:25:07,915] {executor.py:139} INFO - ModelType.RNN trained for 61.134159753 seconds.
[2023-12-12 19:25:07,916] {executor.py:140} INFO - ModelType.RNN eval at step 10100.
[2023-12-12 19:25:07,966] {executor.py:144} INFO - ModelType.RNN train loss: 0.006706655025482178, eval metrics: {'eval_loss': 0.004908942801053994, 'MRE': 0.011556083583242182, 'MAE': 0.011556083583242182, 'RMSE': 8.824776811135848}
[2023-12-12 19:25:07,966] {executor.py:174} INFO - Saving model at step 10100 with loss 0.006706655025482178,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RNN/single_train2023-12-12_19-24-06


100%|██████████| 125/125 [00:00<00:00, 167.18it/s]

[2023-12-12 19:25:08,101] {executor.py:122} INFO - ModelType.RNN training epoch 81



 47%|████▋     | 59/125 [00:00<00:00, 193.03it/s]