### lib

In [None]:
from dataclasses import dataclass, field
from functools import cache
from typing import Literal

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import display
from numpy.typing import ArrayLike
from sklearn.base import clone
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from tqdm import tqdm

sns.set_theme()

LAGS = 14
N_STEPS = 7
VALIDATION_FOLDS = 5


def convert_timestamps(df: pd.DataFrame):
    df["searchDate"] = pd.to_datetime(df["searchDate"])
    df["segmentsDepartureTimeEpochSeconds"] = pd.to_datetime(
        df["segmentsDepartureTimeEpochSeconds"],
        unit="s"
    )
    return df


def filter_and_sort(df: pd.DataFrame):
    # Tickets with duration smaller than `LAGS + N_STEPS` were filtered
    group = df.groupby("legId")["searchDate"]
    duration = group.max() - group.min() + pd.Timedelta(days=1)
    duration = duration[duration >= pd.Timedelta(days=LAGS+N_STEPS)]
    leg_valid = duration.index
    df = df[df["legId"].isin(leg_valid)]
    # sort searchDate by their minimum date and then maximum date
    sorted_idx = np.lexsort((group.max()[leg_valid], group.min()[leg_valid]))
    sorted_legs = leg_valid[sorted_idx]

    return (
        df.set_index("legId")
        .loc[sorted_legs]
        .reset_index()
    )


def leg_slice_generator(df: pd.DataFrame):
    pos = 0
    for span in df["legId"].value_counts(sort=False):
        yield slice(pos, pos+span)
        pos += span


def ffill_bfill(arr: np.ndarray):
    """
    Peforming foward filling and backward filling. The index of elements would be 
    created, and the indexes of nan values were set to 0.Then, doing accumulate 
    maximum to find the mamimum available indexes at current position. Finally, 
    get the filled array by the resulting indexes.
    """
    mask = np.isnan(arr)
    # return array of row indexes and the index of nan is set to 0
    idx = np.where(~mask, np.arange(arr.shape[0])[:, None], 0)
    np.maximum.accumulate(idx, axis=0, out=idx)
    # calcuating accumulate maximum in reverse order is back filling
    np.maximum.accumulate(idx[::-1], axis=0, out=idx)
    idx = idx[::-1]
    return arr[idx, np.arange(arr.shape[1])]


def impute_null_data(df: pd.DataFrame):
    arr = df[["segmentsEquipmentDescription", "totalTravelDistance"]].to_numpy()
    enc = OrdinalEncoder()
    arr = enc.fit_transform(arr)

    for leg_slice in leg_slice_generator(df):
        subarr = arr[leg_slice]
        if np.isnan(subarr).any() and np.isnan(subarr).all() != True:
            subarr[:] = ffill_bfill(subarr)

    arr = enc.inverse_transform(arr)
    df["segmentsEquipmentDescription"] = arr[:, 0]
    df["totalTravelDistance"] = arr[:, 1].astype(np.float32)

    return df


def fill_distance(df: pd.DataFrame):
    distance_airport_map = {"ONT": 1897, "LAX": 1943}
    df["totalTravelDistance"].fillna(
        df["segmentsArrivalAirportCode"].map(distance_airport_map),
        inplace=True
    )
    return df


def fill_equipment(df: pd.DataFrame):
    null = df["segmentsEquipmentDescription"].isna()
    spirit = df["segmentsAirlineName"] == "Spirit Airlines"
    delta = df["segmentsAirlineName"] == "Delta"
    df.loc[null & spirit, "segmentsEquipmentDescription"] = "AIRBUS INDUSTRIE A320 SHARKLETS"
    df.loc[null & delta, "segmentsEquipmentDescription"] = "Airbus A321"   

    return df



def impute_lost_days(df: pd.DataFrame):
    # date_features = [
    #     "searchDate",
    #     "segmentsDepartureTimeEpochSeconds",
    # ]
    # str_features = [
    #     "legId",
    #     "fareBasisCode",
    #     "segmentsArrivalAirportCode",
    #     "segmentsAirlineName",
    #     "segmentsEquipmentDescription",
    # ]
    # num_features = [
    #     "totalFare",
    #     "seatsRemaining",
    #     "isBasicEconomy",
    #     "totalTravelDistance",
    #     "segmentsDurationInSeconds",
    # ]

    df_arr = df.to_numpy()
    search_date_index = df.columns.get_loc("searchDate")
    search_date = df_arr[:, search_date_index].astype("datetime64[D]")

    # losing data or changing legId caused the searchDate difference not equal 1
    date_diff = np.diff(search_date) // np.timedelta64(1, "D")
    # excluding the part caused by changing legId
    leg_counts = df["legId"].value_counts(sort=False)
    leg_change_indexes = (leg_counts.to_numpy().cumsum() - 1)[:-1]
    date_diff[leg_change_indexes] = 1
    
    # calculate the corresponding indexes of existing data in the new array
    new_indexes = np.zeros(search_date.shape[0], dtype=int)
    date_diff.cumsum(out=new_indexes[1:])

    arr = np.zeros([new_indexes[-1] + 1, df.shape[1]], dtype=object)
    arr[new_indexes] = df_arr # copy the existing data to the new array

    # impute part
    mark_arr = np.zeros(arr.shape[0], dtype=np.float32)
    DAY = np.timedelta64(1, "D")
    for i in np.nonzero(date_diff != 1)[0]:
        lost_days = date_diff[i] - 1
        start =  new_indexes[i] + 1
        stop = start + lost_days
        arr[start:stop] = df_arr[i] # forward fill
        mark_arr[start:stop] = 1 # mark the imputed data
        ref_day = np.datetime64(df_arr[i, search_date_index], "D")
        arr[start:stop, search_date_index] = np.arange(
            ref_day + DAY, ref_day + DAY * date_diff[i], DAY
        )

    array_dict = {
        key: arr[:, i].astype(type_)
        for i, (key, type_) in enumerate(df.dtypes.items())
    }
    array_dict.update({"imputed": mark_arr})
    return array_dict


@cache
def month_start(year, month):
    return pd.Timestamp(year=year, month=month, day=1)


def cyclic_encode(
    timestamps: ArrayLike, period: Literal["day", "week", "month", "year"]
):
    """
    Encoding features which can be converted by `pd.to_datetime`.
    The returning features consist of sine and cosine waves with period determined
    by the parameter `period`.

    Parameters
    ----------
    timestamps : ArrayLike object of timestamps
        Could be converted by `pd.to_datetime`
    period : "day", "week", "month" or "year"
        The period in sine and cosine wave

    Returns
    -------
    tuple(x_sin, x_cos)
    """
    if not isinstance(timestamps, pd.Series):
        timestamps = pd.Series(timestamps)

    if period == "day":
        offset = (timestamps - timestamps.dt.normalize()).dt.total_seconds()
        _period = 86400
    elif period == "week":
        # seconds of days passed this week + hour, minute and seconds
        offset = timestamps.dt.day_of_week * 86400
        offset += (timestamps - timestamps.dt.normalize()).dt.total_seconds()
        _period = 86400 * 7
    elif period == "month":
        # offset to the the beginning of the month
        offset = (
            timestamps.apply(lambda x: x - month_start(x.year, x.month))
            .dt.total_seconds()
        )
        _period = timestamps.dt.days_in_month * 86400 # 86400 seconds in a day
    elif period == "year":
        offset = timestamps.dt.day_of_year * 86400
        offset += (timestamps - timestamps.dt.normalize()).dt.total_seconds()
        _period = 86400 * 365
    else:
        raise ValueError("The parameter period only support day, week, month and year")

    basis = 2 * np.pi * offset / _period
    basis = basis.to_numpy(np.float32)
    return np.sin(basis), np.cos(basis)



class EncodeData:
    target_names = ("totalFare", "seatsRemaining")
    encoding_spec = {
        "Ordinal": ["segmentsAirlineName", "fareBasisCode"],
        "OneHot": ["segmentsArrivalAirportCode", "segmentsEquipmentDescription"],
    }
    encoders = {}

    def __init__(self, df: pd.DataFrame):
        if df is None:
            return
        self.data = self.clean_impute_df(df)
        self.metadata = {
            "legId": self.data.pop("legId"),
            "searchDate": self.data["searchDate"],
            "imputed" : self.data.pop("imputed"),
        }
        self.columns, self._x, self._y = self.encode()

    @classmethod
    def read_csv(cls, filepath: str, **kwargs):
        return cls(pd.read_csv(filepath, **kwargs))

    @staticmethod
    def clean_impute_df(df: pd.DataFrame) -> dict[str, np.ndarray]:
        selected_features = [
            "searchDate", "segmentsDepartureTimeEpochSeconds", "legId",
            "fareBasisCode", "segmentsArrivalAirportCode", "segmentsAirlineName",
            "segmentsEquipmentDescription", "totalFare", "seatsRemaining",
            "isBasicEconomy", "totalTravelDistance", "segmentsDurationInSeconds",
        ]
        return (
            df.loc[:, selected_features]
            .pipe(convert_timestamps)
            .pipe(filter_and_sort)
            .pipe(impute_null_data)
            .pipe(fill_distance)
            .pipe(fill_equipment)
            .pipe(impute_lost_days)
        )
    
    def fit_encoders(self):
        search_date = self.data["searchDate"]
        last_day = search_date.max() - pd.Timedelta(days=N_STEPS * VALIDATION_FOLDS)
        train_mask = search_date <= last_day

        for feature in self.encoding_spec["Ordinal"]:
            self.encoders[feature] = OrdinalEncoder(
                dtype=np.float32,
                handle_unknown="use_encoded_value",
                unknown_value=-1,
            )
            self.encoders[feature].fit(self.data[feature][train_mask].reshape(-1, 1))
        for feature in self.encoding_spec["OneHot"]:
            self.encoders[feature] = OneHotEncoder(
                dtype=np.float32,
                handle_unknown="infrequent_if_exist",
                min_frequency=0.01,
                sparse_output=False,
                feature_name_combiner=lambda _, category: f"{feature}_{category}",
            )
            self.encoders[feature].fit(self.data[feature][train_mask].reshape(-1, 1))

    def encode(self):
        if not self.encoders:
            self.fit_encoders()

        # Ordinal encode
        embed_data = {}
        for feature in self.encoding_spec["Ordinal"]:
            arr = self.data.pop(feature)
            enc = self.encoders[feature]
            embed_data[feature] = enc.transform(arr.reshape(-1, 1))
        # One Hot encode
        for feature in self.encoding_spec["OneHot"]:
            arr = self.data.pop(feature)
            enc = self.encoders[feature]
            arr = enc.transform(arr.reshape(-1, 1))
            self.data.update(
                {name: arr[:, i] for i, name in enumerate(enc.get_feature_names_out())}
            )
        # Cyclic encode
        period_args = {
            "searchDate": ["week", "month", "year"],
            "segmentsDepartureTimeEpochSeconds": ["day", "week", "month", "year"],
        }
        for feature, period_list in period_args.items():
            arr = self.data.pop(feature)
            for period in period_list:
                waves = cyclic_encode(arr, period=period)
                descr = f"{feature}_{period}"
                self.data.update({f"{descr}_sin": waves[0], f"{descr}_cos": waves[1]})

        # return (columns, x, y)
        return (
            tuple(tuple(data.keys()) for data in (self.data, embed_data)),
            tuple(
                np.column_stack(tuple(data.values())).astype(np.float32)
                for data in (self.data, embed_data)
            ),
            np.column_stack(tuple(self.data[name] for name in self.target_names)),
        )


@dataclass
class WindowData:
    x: tuple[list | np.ndarray] = field(default_factory=list)
    y: list | np.ndarray = field(default_factory=list)
    imputed_x: list | np.ndarray = field(default_factory=list)
    imputed_y: list | np.ndarray = field(default_factory=list)
    date: list | np.ndarray = field(default_factory=list)

    @classmethod
    def sliding_window(cls, encoded: EncodeData):
        window = cls(x=tuple([] for _ in encoded._x))
        causal = cls()

        search_date = encoded.metadata["searchDate"].reshape(-1, 1)
        imputed = encoded.metadata["imputed"].reshape(-1, 1)
        for leg_slice in window._leg_slice_generator(encoded.metadata["legId"]):
            # append windowed samples
            for frag_list, arr in zip(window.x, encoded._x):
                frag_list.append(cls._window_x(arr[leg_slice]))
            window.y.append(cls._window_y(encoded._y[leg_slice]))
            window.imputed_x.append(cls._window_x(imputed[leg_slice]))
            window.imputed_y.append(cls._window_y(imputed[leg_slice]))
            window.date.append(cls._window_y(search_date[leg_slice]))
            # dataset for causal model only changes those related to output (y)
            causal.y.append(cls._window_y(encoded._y[leg_slice], causal=True))
            causal.imputed_y.append(cls._window_y(imputed[leg_slice], causal=True))

        window._vstack_samples()
        causal._vstack_samples()
        # convert imputed counts to imputed rate (percentage)
        window.imputed_x = cls._compute_imputed_rate(window.imputed_x)
        window.imputed_y = cls._compute_imputed_rate(window.imputed_y)
        causal.imputed_y = cls._compute_imputed_rate(causal.imputed_y)
        # use last date of samples to split train, validation and test set
        window.date = window.date[:, -1].flatten()
        ind = np.argsort(window.date)
        window.apply_index(ind)
        causal.apply_index(ind)
        # reference the attributes from `window` because they are all the same
        causal.x = window.x
        causal.imputed_x = window.imputed_x
        causal.date = window.date

        return window, causal
    
    def _vstack_samples(self):
        for field_name in self.__dataclass_fields__:
            attr = getattr(self, field_name)
            if isinstance(attr, list) and attr:
                attr = np.vstack(attr)
            elif isinstance(attr, tuple):
                attr = tuple(np.vstack(arr) for arr in attr)
            else:
                continue
            setattr(self, field_name, attr)

    @staticmethod
    def _leg_slice_generator(leg_id_array: np.ndarray):
        leg_counts = pd.Series(leg_id_array).value_counts(sort=False)
        pos = 0
        for span in leg_counts:
            yield slice(pos, pos+span)
            pos += span

    @staticmethod
    def _window_x(array: np.ndarray):
        return np.lib.stride_tricks.sliding_window_view(
            array[:-N_STEPS], window_shape=LAGS, axis=0
        ).swapaxes(1, 2)

    @staticmethod
    def _window_y(array: np.ndarray, causal: bool = False):
        if causal:
            return np.lib.stride_tricks.sliding_window_view(
                array[1:], window_shape=(N_STEPS, LAGS), axis=(0, 0)
            ).swapaxes(1, 3)
        else:
            return np.lib.stride_tricks.sliding_window_view(
                array[LAGS:], window_shape=N_STEPS, axis=0
            ).swapaxes(1, 2)

    @staticmethod
    def _compute_imputed_rate(imputed: np.ndarray):
        axis_to_sum = tuple(range(1, imputed.ndim))
        return imputed.sum(axis=axis_to_sum) / np.prod(imputed.shape[1:])
    
    def apply_index(self, ind: np.ndarray):
        for field_name in self.__dataclass_fields__:
            attr = getattr(self, field_name)
            if isinstance(attr, np.ndarray):
                attr = attr[ind]
            elif isinstance(attr, tuple):
                attr = tuple(arr[ind] for arr in attr)
            else:
                continue
            setattr(self, field_name, attr)


class SplitData:
    def __init__(self, window: WindowData) -> None:
        self.window = window
        self._normalization_params = []

    @property
    def sample_weight(self):
        return (1 - self.window.imputed_x) * (1 - self.window.imputed_y)

    def train_valid_split(self, n_folds: int = VALIDATION_FOLDS):
        self._normalization_params.clear()
        x, y = self.window.x, self.window.y
        train_boundary = self.window.date.max() - np.timedelta64(N_STEPS * n_folds, "D")
        for _ in range(n_folds):
            valid_boundary = train_boundary + np.timedelta64(N_STEPS, "D")
            train_mask = self.window.date <= train_boundary
            valid_mask = (self.window.date <= valid_boundary) ^ train_mask
            kwargs = {
                "x": [arr[train_mask] for arr in x],
                "y": [arr[train_mask] for arr in np.rollaxis(y, axis=-1)],
                "validation_data": (
                    [arr[valid_mask] for arr in x],
                    [arr[valid_mask] for arr in np.rollaxis(y, axis=-1)],
                ),
                "sample_weight": self.sample_weight[train_mask],
            }
            yield self._normalize_split(kwargs)
            # yield kwargs
            train_boundary = valid_boundary

    def _normalize_split(self, split_dict):
        params = {} # parameters are mean and std
        dest = []
        x_train, x_valid = split_dict["x"], split_dict["validation_data"][0]
        for i, arr in enumerate(x_train):
            norm_train, norm_valid, mean, std = self.normalize(arr, x_valid[i])
            x_train[i], x_valid[i] = norm_train, norm_valid
            dest.append((mean ,std))
        params["x"] = tuple(dest)
        # only normalize the regression target: totalFare
        y_train, y_valid = split_dict["y"], split_dict["validation_data"][1]
        norm_train, norm_valid, mean, std = self.normalize(y_train[0], y_valid[0])
        y_train[0], y_valid[0] = norm_train, norm_valid
        params["y"] = (mean, std)
        self._normalization_params.append(params) # save params in object
        return split_dict

    @staticmethod
    def normalize(train: np.ndarray, valid: np.ndarray):
        # if the array only contains 1 feature, the shape would be (samples, timesteps)
        ndim = max(train.ndim, 3) # ensure calling mean/std like 3d array
        axes = tuple(range(ndim - 1))
        mean = train.mean(axis=axes)
        std = train.std(axis=axes)
        return (train - mean) / std, (valid - mean) / std, mean, std

    def denormalize_fare(self, array: np.ndarray, nth_fold: int = -1):
        mean, std = self._normalization_params[nth_fold]["y"]
        return array * std + mean


def collapse_2d(x: np.ndarray):
    last_x = x[:, -1]
    # [0, 1] are indexes of totalFare and seatsRemaining
    targets_x = x[:, :-1, [0, 1]].reshape(x.shape[0], -1)
    return np.hstack((targets_x, last_x))


def sklearn_model_kwargs(fold: dict, regression_only: bool = False):
    x = collapse_2d(np.concatenate(fold["x"], axis=-1))
    x_valid = collapse_2d(np.concatenate(fold["validation_data"][0], axis=-1))
    y = fold["y"]
    if regression_only:
        y = np.concatenate(y, axis=-1)
        return {
            "fit": ((x, y), {"sample_weight": fold["sample_weight"]}),
            "predict": {"X": x_valid},
        }
    return (
        {
            "fit": ((x, y[0]), {"sample_weight": fold["sample_weight"]}),
            "predict": {"X": x_valid},
        },
        {
            "fit": ((x, y[1]), {"sample_weight": fold["sample_weight"]}),
            "predict": {"X": x_valid},
        }
    )


def _predict_reg_only(fold: dict, reg) -> np.ndarray:
    sk_kwargs = sklearn_model_kwargs(fold, regression_only=True)
    return (
        reg.fit(*sk_kwargs["fit"][0], **sk_kwargs["fit"][1])
        .predict(**sk_kwargs["predict"])
    )


def _predict_reg_clf(fold: dict, reg, clf) -> tuple[np.ndarray]:
    sk_kwargs = sklearn_model_kwargs(fold)
    # cloning a new, not referenced model to make sure it is freed after prediciton
    return tuple(
        clone(model).fit(*kwargs["fit"][0], **kwargs["fit"][1])
        .predict(**kwargs["predict"])
        for kwargs, model in zip(sk_kwargs, (reg, clf))
    )


def sk_model_predict(window: WindowData, name: str, reg=None, clf=None):
    splits = SplitData(window)
    fare_list = []
    seat_list = []
    for i, fold in tqdm(
        enumerate(splits.train_valid_split()), total=VALIDATION_FOLDS, desc=name
    ):
        if clf is None:
            y_pred = _predict_reg_only(fold, reg) # np.ndarray
            fare_list.append(splits.denormalize_fare(y_pred[:, :N_STEPS], i))
            seat_list.append(y_pred[:, N_STEPS:].round())
        else:
            y_pred = _predict_reg_clf(fold, reg, clf) # tuple[np.ndarray]
            fare_list.append(splits.denormalize_fare(y_pred[0], i))
            seat_list.append(y_pred[1])
    np.savez_compressed(
        f"models/{name}.npz", fare=np.vstack(fare_list), seat=np.vstack(seat_list)
    )
    print(f"y_pred saved at 'models/{name}.npz'")


def load_y_pred(path: str) -> dict[str, np.ndarray]:
    with np.load(path) as f:
        y_preds = dict(f)
    return y_preds


def _melt_relplot(fare_mae: pd.DataFrame, seat_mae: pd.DataFrame):
    fare_mae["day"] = np.arange(N_STEPS) + 1
    fare_mae["target"] = "totalFare"
    seat_mae["day"] = np.arange(N_STEPS) + 1
    seat_mae["target"] = "seatsRemaining"
    mae = pd.concat([fare_mae, seat_mae])
    melt = pd.melt(mae, ["day", "target"], var_name="model", value_name="MAE")

    by = ["target", "model"]
    style = style_order = None
    if melt["model"].str.contains("_reg_only").any():
        style = "reg_only"
        by.append(style)
        style_order = ["Yes", "No"]
        melt["reg_only"] = np.where(
            melt["model"].str.contains("_reg_only"), "No", "Yes"
        )
        melt["model"] = melt["model"].str.replace("_reg_only", "")

    sns.relplot(
        melt, x="day", y="MAE", hue="model", style=style, kind="line",
        style_order=style_order, col="target", facet_kws={"sharey": False}
    )
    display(melt.drop(columns="day").groupby(by).agg(["mean", "max", "min"]))


def plot_mae(name_and_path: dict):
    y_true = load_y_pred("models/y_true.npz")
    fare_mae = {}
    seat_mae = {}
    for name, path in name_and_path.items():
        y_pred = load_y_pred(path)
        y_pred["seat"] = y_pred["seat"].round() # TODO: make sure rounded while saving
        for mae, target in zip((fare_mae, seat_mae), ("fare", "seat")):
            mae[name] = mean_absolute_error(
                y_true[target], y_pred[target], multioutput="raw_values"
            )
    _melt_relplot(pd.DataFrame(fare_mae), pd.DataFrame(seat_mae))


def error_dist(pred_path: str, days_avg: bool = True):
    y_true = load_y_pred("models/y_true.npz")
    y_pred = load_y_pred(pred_path)
    target_spec = {"fare": "totalFare", "seat": "seatsRemaining"}
    hue = None
    palette = None
    if days_avg:
        errors = {
            name: (y_pred[key] - y_true[key]).mean(axis=-1)
            for key, name in target_spec.items()
        }
        errors = pd.DataFrame(errors).melt(value_name="error", var_name="target")
    else:
        hue = "days"
        palette = "crest"
        errors = pd.concat(
            pd.DataFrame(y_pred[key] - y_true[key], columns=np.arange(7) + 1)
            .melt(value_name="error", var_name=hue).assign(target=name)
            for key, name in {"fare": "totalFare", "seat": "seatsRemaining"}.items()
        )
    sns.displot(
        errors, x="error", col="target", kind="kde", hue=hue, palette=palette,
        facet_kws={"sharex": False, "sharey": False}
    )


### Quick Start

In [1]:
%run snapshot
import joblib

LAGS = 14
N_STEPS = 7
# encoded = joblib.load("EncodeData_encoded.gz")
# test_run_data = joblib.load("test_run_data.gz")

window = joblib.load("WindowData_window.gz")
# splits = SplitData(window)
causal = joblib.load("WindowData_causal.gz")
# splits = SplitData(causal)
# fold_1 = next(splits.train_valid_split())

In [35]:
import hashlib

hashlib.sha256(causal.__repr__().encode()).hexdigest()

'3bbdfd7d8d107824bf9970146417b842bdb83554d9d690c7452f93a78926ffc8'

In [38]:
from time import sleep
from joblib import Parallel, delayed

def slow(n):
    sleep(1)
    return n

Parallel(5)(delayed(slow)(i) for i in range(5))

[0, 1, 2, 3, 4]

### machine learning performance

#### extra tree

In [53]:
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor

reg = ExtraTreesRegressor(bootstrap=False, n_jobs=14)
clf = ExtraTreesClassifier(bootstrap=False, n_jobs=14)
y_pred = _predict_reg_clf(fold_1, reg, clf)

In [54]:
for name, _true, _pred in zip(("fare", "seat"), fold_1["validation_data"][1], y_pred):
    print(name, mean_absolute_error(_true, _pred))

fare 0.23375716055806559
seat 0.4416006282572999


### Embedding layer

In [14]:
from keras.backend import clear_session
from keras.layers import Input, Embedding, Concatenate, Reshape

clear_session()

# embed_dim = 10
# embed_dim = 4
embed_dim = 16

encoded = Input(shape=(LAGS, 34), name="encoded")
fare_basis = Input(shape=(LAGS, 1), name="fare_basis")

embedded = Embedding(input_dim=250, output_dim=embed_dim)(fare_basis)
embedded = Reshape((LAGS, embed_dim))(embedded)
concatenated_tensor = Concatenate(axis=-1)([encoded, embedded])

In [26]:
from keras import layers

conv = layers.Conv1D(7, 1)
conv(concatenated_tensor)

<KerasTensor: shape=(None, 14, 7) dtype=float32 (created by layer 'conv1d_1')>

In [16]:
from keras.layers import TimeDistributed, Dense, EinsumDense

lay = TimeDistributed(Dense(N_STEPS))

In [32]:
time_dists = [
    TimeDistributed(Dense(N_STEPS)) for _ in range(11)
]

layers.concatenate([layer(concatenated_tensor)[..., None] for layer in time_dists])

<KerasTensor: shape=(None, 14, 7, 11) dtype=float32 (created by layer 'concatenate_3')>

In [22]:
lay(concatenated_tensor)

<KerasTensor: shape=(None, 14, 7) dtype=float32 (created by layer 'time_distributed')>

### Simple LSTM

In [36]:
from keras.layers import LSTM, Dense, Reshape, LayerNormalization
from keras.models import Model

model_comment = "simple_LSTM"

concatenated_tensor = LayerNormalization()(concatenated_tensor)
# Shared LSTM layer for both regression and classification
_lstm_output = LSTM(64, kernel_regularizer="l1", dropout=0.1, return_sequences=True)(concatenated_tensor)
_lstm_output = LayerNormalization()(_lstm_output)
_lstm_output = LSTM(32, kernel_regularizer="l1", dropout=0.1, return_sequences=True)(_lstm_output)
_lstm_output = LayerNormalization()(_lstm_output)
_lstm_output = LSTM(16 * N_STEPS, kernel_regularizer="l1", dropout=0.1)(_lstm_output)
_lstm_output = LayerNormalization()(_lstm_output)
lstm_output = Reshape((N_STEPS, 16))(_lstm_output)

# Regression output
reg_output = Dense(1, activation='linear', name='reg')(lstm_output)

# Classification output
clf_output = Dense(11, activation='softmax', name="clf")(lstm_output)

# Define the model with two outputs
model = Model(inputs=[encoded, fare_basis], outputs=[reg_output, clf_output])

# Compile the model with appropriate loss functions and metrics
model.compile(
    optimizer='adam', 
    loss={'reg': 'mse', 'clf': 'sparse_categorical_crossentropy'},
    metrics={'reg': 'mae', 'clf': 'accuracy'}
)

# Print model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 embed (InputLayer)             [(None, 14, 2)]      0           []                               
                                                                                                  
 embedding (Embedding)          (None, 14, 2, 4)     848         ['embed[0][0]']                  
                                                                                                  
 encoded (InputLayer)           [(None, 14, 30)]     0           []                               
                                                                                                  
 reshape (Reshape)              (None, 14, 8)        0           ['embedding[0][0]']              
                                                                                              

### LayerNormLSTM

In [16]:
from keras.layers import LSTMCell, LayerNormalization, RNN
from keras import backend
import tensorflow as tf

class LayerNormLSTMCell(LSTMCell):
    """
    LayerNormalization when cell return output and states.
    """
    def __init__(self, units, activation="tanh", **kwargs):
        super().__init__(units, activation, **kwargs)
        self.kernel_norm = LayerNormalization(name="kernel_norm")
        self.recurrent_norm = LayerNormalization(name="recurrent_norm")
        self.state_norm = LayerNormalization(name="state_norm")
    
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1, training, count=4)
        if 0.0 < self.dropout < 1.0:
            inputs *= dp_mask[0]
        z = self.kernel_norm(backend.dot(inputs, self.kernel))

        if 0.0 < self.recurrent_dropout < 1.0:
            h_tm1 *= rec_dp_mask[0]
        z += self.recurrent_norm(backend.dot(h_tm1, self.recurrent_kernel))
        if self.use_bias:
            z = backend.bias_add(z, self.bias)

        z = tf.split(z, num_or_size_splits=4, axis=1)
        c, o = self._compute_carry_and_output_fused(z, c_tm1)
        c = self.state_norm(c)
        h = o * self.activation(c)
        return h, [h, c]

    def build(self, input_shape):
        super().build(input_shape)

        def maybe_build_sublayer(sublayer, build_shape):
            if not sublayer.built:
                with tf.keras.backend.name_scope(sublayer.name):
                    sublayer.build(build_shape)
                    sublayer.built = True

        maybe_build_sublayer(self.kernel_norm, [input_shape[0], self.units * 4])
        maybe_build_sublayer(self.recurrent_norm, [input_shape[0], self.units * 4])
        maybe_build_sublayer(self.state_norm, [input_shape[0], self.units])

In [26]:
from keras.layers import Input, RepeatVector, Dense, TimeDistributed
from keras.models import Model
import tensorflow as tf

model_comment = "LayerNormLSTM"

LNLSTM_tensor = RNN(LayerNormLSTMCell(32, dropout=0.1, recurrent_dropout=0.1), name="LN_LSTM")(concatenated_tensor)
_lstm_output = RepeatVector(N_STEPS)(LNLSTM_tensor)
lstm_output = RNN(LayerNormLSTMCell(32, dropout=0.1, recurrent_dropout=0.1), name="LN_LSTM_2", return_sequences=True)(_lstm_output)

reg_output = TimeDistributed(Dense(1, activation='relu'), name='reg')(lstm_output)
clf_output = TimeDistributed(Dense(11, activation='softmax'), name="clf")(lstm_output)

model = Model(inputs=[encoded, fare_basis], outputs=[reg_output, clf_output])

model.compile(
    optimizer='adam',
    loss={'reg': 'mse', 'clf': 'sparse_categorical_crossentropy'},
    metrics={'reg': 'mae'}
)

model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 fare_basis (InputLayer)        [(None, 14, 1)]      0           []                               
                                                                                                  
 embedding (Embedding)          (None, 14, 1, 4)     1000        ['fare_basis[0][0]']             
                                                                                                  
 encoded (InputLayer)           [(None, 14, 34)]     0           []                               
                                                                                                  
 reshape (Reshape)              (None, 14, 4)        0           ['embedding[0][0]']              
                                                                                              

#### old

In [16]:
from keras.layers import LSTMCell, LayerNormalization, RNN

class LayerNormLSTMCell(LSTMCell):
    """
    LayerNormalization when cell return output and states.
    """
    def __init__(self, units, activation="tanh", **kwargs):
        super().__init__(units, activation, **kwargs)
        self.__layer_norm = LayerNormalization()
    
    def call(self, inputs, states):
        outputs, new_states = super().call(inputs, states)
        norm_outputs = self.activation(self.__layer_norm(outputs))
        norm_states = self.activation(self.__layer_norm(new_states[1]))
        return norm_outputs, [norm_outputs, norm_states] 

In [21]:
from keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Reshape, BatchNormalization, LayerNormalization
from keras.models import Model

model_comment = "LayerNormLSTM"

encoded = Input(shape=(LAGS, 30), name="encoded")
fare_basis = Input(shape=(LAGS, embed_n_feat), name="embed")

# Embedding layer for the second input
embedded_tensor2 = Embedding(input_dim=embed_n_classes, output_dim=embed_dim)(fare_basis)
embedded_tensor2 = Reshape((LAGS, embed_n_feat * embed_dim))(embedded_tensor2)

# Concatenate the embedded tensor with the first input tensor
concatenated_tensor = Concatenate(axis=-1)([encoded, embedded_tensor2])
concatenated_tensor = LayerNormalization()(concatenated_tensor)

# Shared LSTM layer for both regression and classification
_lstm_output = LSTM(64, kernel_regularizer="l1", dropout=0.1, return_sequences=True)(concatenated_tensor)
_lstm_output = LayerNormalization()(_lstm_output)
# _lstm_output = LSTM(32, kernel_regularizer="l1", dropout=0.1, return_sequences=True)(_lstm_output)
_lstm_output = RNN(LayerNormLSTMCell(32, kernel_regularizer="l1", dropout=0.1), return_sequences=True, name="LN_LSTM")(_lstm_output)
_lstm_output = LayerNormalization()(_lstm_output)
_lstm_output = LSTM(16 * N_STEPS, kernel_regularizer="l1", dropout=0.1)(_lstm_output)
_lstm_output = LayerNormalization()(_lstm_output)
lstm_output = Reshape((N_STEPS, 16))(_lstm_output)

# Regression output
reg_output = Dense(1, activation='linear', name='reg')(lstm_output)

# Classification output
clf_output = Dense(11, activation='softmax', name="clf")(lstm_output)

# Define the model with two outputs
model = Model(inputs=[encoded, fare_basis], outputs=[reg_output, clf_output])

# Compile the model with appropriate loss functions and metrics
model.compile(
    optimizer='adam', 
    loss={'reg': 'mse', 'clf': 'sparse_categorical_crossentropy'},
    metrics={'reg': 'mae', 'clf': 'accuracy'}
)

# Print model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 embed (InputLayer)             [(None, 14, 2)]      0           []                               
                                                                                                  
 embedding (Embedding)          (None, 14, 2, 4)     236         ['embed[0][0]']                  
                                                                                                  
 encoded (InputLayer)           [(None, 14, 30)]     0           []                               
                                                                                                  
 reshape (Reshape)              (None, 14, 8)        0           ['embedding[0][0]']              
                                                                                              

### LSTM encoder-decoder

In [34]:
from keras.layers import LSTM, Dense, Concatenate, Reshape, RepeatVector, LayerNormalization, TimeDistributed, add
from keras.callbacks import TensorBoard
from keras.models import Model

# model_comment = "LSTM_encoder_decoder"
model_comment = "embed_dim_test"

unit = concatenated_tensor.shape[-1]

_lstm_output = LayerNormalization()(
    add([LSTM(unit, kernel_regularizer="l2", dropout=0.1, return_sequences=True)(concatenated_tensor), concatenated_tensor])
)
_lstm_output = LayerNormalization()(
    add([LSTM(unit, kernel_regularizer="l2", dropout=0.1, return_sequences=True)(_lstm_output), _lstm_output])
)
_lstm_output = LayerNormalization()(
    LSTM(unit, kernel_regularizer="l2", dropout=0.1)(_lstm_output)
)
lstm_encode = RepeatVector(N_STEPS)(_lstm_output)

_lstm_decode = LayerNormalization()(
    add([LSTM(unit, kernel_regularizer="l2", dropout=0.1, return_sequences=True)(lstm_encode), lstm_encode])
)
lstm_output = LayerNormalization()(
    add([LSTM(unit, kernel_regularizer="l2", dropout=0.1, return_sequences=True)(_lstm_decode), _lstm_decode])
)

# Regression output
lstm_output = TimeDistributed(
    Dense(16, activation="relu")
)(lstm_output)
reg_output = TimeDistributed(
    Dense(1, activation='linear'), name='reg'
)(lstm_output)

# Classification output
clf_output = TimeDistributed(
    Dense(11, activation='softmax'), name="clf"
)(lstm_output)

model = Model(inputs=[encoded, fare_basis], outputs=[reg_output, clf_output])

model.compile(
    optimizer='adam',
    loss={'reg': 'mse', 'clf': 'sparse_categorical_crossentropy'},
    metrics={'reg': 'mae'}
)

model.summary()

tensor_board = TensorBoard(log_dir=f"log/{model_comment}/lstm_16/", histogram_freq=1)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 fare_basis (InputLayer)        [(None, 14, 1)]      0           []                               
                                                                                                  
 embedding (Embedding)          (None, 14, 1, 16)    4000        ['fare_basis[0][0]']             
                                                                                                  
 encoded (InputLayer)           [(None, 14, 34)]     0           []                               
                                                                                                  
 reshape (Reshape)              (None, 14, 16)       0           ['embedding[0][0]']              
                                                                                              

### GRU

In [8]:
import joblib
from keras.backend import clear_session
from keras.layers import (GRU, Concatenate, Dense, Embedding, Input,
                          LayerNormalization, RepeatVector, Reshape)
from keras.models import Model

from snapshot import *

# window = joblib.load("WindowData_window.gz")


clear_session()

embed_dim = 4

encoded = Input(shape=(LAGS, 34), name="encoded")
fare_basis = Input(shape=(LAGS, 1), name="fare_basis")

embedded = Embedding(input_dim=250, output_dim=embed_dim)(fare_basis)
embedded = Reshape((LAGS, embed_dim))(embedded)
concatenated_tensor = Concatenate(axis=-1)([encoded, embedded])


model_comment = "GRU_embed_4"

concatenated_tensor = LayerNormalization()(concatenated_tensor)
# Shared LSTM layer for both regression and classification
_lstm_output = GRU(64, kernel_regularizer="l2", dropout=0.1, return_sequences=True)(concatenated_tensor)
_lstm_output = LayerNormalization()(_lstm_output)
_lstm_output = GRU(32, kernel_regularizer="l2", dropout=0.1, return_sequences=True)(_lstm_output)
_lstm_output = LayerNormalization()(_lstm_output)
_lstm_output = GRU(16, kernel_regularizer="l2", dropout=0.1)(_lstm_output)
lstm_output = RepeatVector(N_STEPS)(_lstm_output)

# Regression output
_reg_output = Dense(8, activation="relu", name="hidden_reg")(lstm_output)
reg_output = Dense(1, activation='linear', name='reg')(_reg_output)

# Classification output
clf_output = Dense(11, activation='softmax', name="clf")(lstm_output)

# Define the model with two outputs
model = Model(inputs=[encoded, fare_basis], outputs=[reg_output, clf_output])

# Compile the model with appropriate loss functions and metrics
model.compile(
    optimizer='adam', 
    loss={'reg': 'mse', 'clf': 'sparse_categorical_crossentropy'},
    metrics={'reg': 'mae'}
)

# Print model summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 fare_basis (InputLayer)        [(None, 14, 1)]      0           []                               
                                                                                                  
 embedding (Embedding)          (None, 14, 1, 4)     1000        ['fare_basis[0][0]']             
                                                                                                  
 encoded (InputLayer)           [(None, 14, 34)]     0           []                               
                                                                                                  
 reshape (Reshape)              (None, 14, 4)        0           ['embedding[0][0]']              
                                                                                              

In [5]:
reg_output

<KerasTensor: shape=(None, 7, 1) dtype=float32 (created by layer 'reg')>

### CNN

In [97]:
import tensorflow as tf

class CausalModel(tf.keras.Model):
    def __init__(self, lags=14, n_steps=7):
        super(CausalModel, self).__init__()
        # self.input = tf.keras.layers.Input([lags, 20])
        self.time_dist_dense = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(n_steps))

    def call(self, inputs):
        return self.time_dist_dense(inputs)

    def predict(self, x):
        pred = super(CausalModel, self).predict(x)
        return pred[:, -1]

model = CausalModel()

# input_tensor = tf.keras.layers.Input([14, 20])
# model(input_tensor)
# model.save("custom")


In [41]:
_model = tf.keras.models.load_model("custom")
_model.summary()

Model: "causal_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_34 (TimeDi  multiple                 147       
 stributed)                                                      
                                                                 
Total params: 147
Trainable params: 147
Non-trainable params: 0
_________________________________________________________________


In [48]:
import numpy as np

arr = np.ones((1, 14, 2))

In [74]:
from keras.layers import Conv1D


conv = Conv1D(2, 3, padding="causal")

In [None]:
def last_time_step_mse(Y_true, Y_pred):
    return keras.metrics.mean_squared_error(Y_true[:, -1], Y_pred[:, -1]

In [76]:
conv(arr)

<tf.Tensor: shape=(1, 14, 2), dtype=float32, numpy=
array([[[-1.0523348 , -0.9404941 ],
        [-0.6090739 , -1.1133223 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ],
        [-0.24222544, -1.2826777 ]]], dtype=float32)>

In [49]:
padded = np.concatenate([np.zeros((1, 1, 2)), arr, np.zeros((1, 1, 2))], axis=1)

In [72]:
conv(arr)

<tf.Tensor: shape=(1, 6, 2), dtype=float32, numpy=
array([[[-0.3246593,  0.4348855],
        [-0.3246593,  0.4348855],
        [-0.3246593,  0.4348855],
        [-0.3246593,  0.4348855],
        [-0.3246593,  0.4348855],
        [-0.3246593,  0.4348855]]], dtype=float32)>

In [59]:
conv_ = Conv1D(4, 3)
conv_(padded);

In [60]:
conv_.set_weights(conv.get_weights())

In [61]:
conv_(padded)

<tf.Tensor: shape=(1, 14, 4), dtype=float32, numpy=
array([[[ 0.34678614, -0.05552459,  0.84444267,  0.25828576],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.3067615 ,  0.24568313,  1.3583382 , -0.1781897 ],
        [ 0.43274322,  0.5942487 ,  0.95666677,  0.34926495]]],
      dtype=float32)>

### training

In [10]:
test_run_data = joblib.load("test_run_data.gz")

In [35]:
from datetime import datetime

from keras.callbacks import EarlyStopping, TensorBoard

TIMESTAMP = datetime.now().strftime("%m%d_%H%M")
# tensor_board = TensorBoard(log_dir=f"log/{model_comment}/{TIMESTAMP}/", histogram_freq=1)
# tensor_board = TensorBoard(log_dir=f"log/{model_comment}/test_run_{TIMESTAMP}_lstm/", histogram_freq=1)
early_stopping = EarlyStopping(patience=30, monitor="val_reg_mae", mode="min", restore_best_weights=True)

history = model.fit(
    **test_run_data,
    # **fold_1,
    epochs=3000,
    batch_size=32,
    callbacks=[early_stopping, tensor_board],
    shuffle=False,
    verbose=1,
)

# model.save(f"models/{model_comment}_{TIMESTAMP}.keras")

Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/3000
Epoch 70/3000
Epoch 71/3000
Epoch 72/3000
E

In [118]:
from datetime import datetime

from keras.callbacks import EarlyStopping, TensorBoard

TIMESTAMP = datetime.now().strftime("%m%d_%H%M")
tensor_board = TensorBoard(log_dir=f"log/{model_comment}_{TIMESTAMP}/", histogram_freq=1)
early_stopping = EarlyStopping(patience=30, monitor="val_reg_mae", mode="min", restore_best_weights=True)

splits_ = SplitData(window)
last_epoch = 0
for i, fold in enumerate(splits_.train_valid_split()):
    print(last_epoch)
    history = model.fit(
        **fold,
        epochs=300,
        batch_size=32,
        initial_epoch=last_epoch,
        callbacks=[early_stopping, tensor_board],
        shuffle=False,
        verbose=0,
    )
    last_epoch += len(history.epoch)

    model.save(f"models/{model_comment}_{TIMESTAMP}_{i}.keras")

0
76
121
200
279


### evaluation

In [79]:
y_pred = model.predict(fold_1["validation_data"][0])



In [57]:
y_true = load_y_pred("models/y_true.npz")

In [80]:
# TODO: complete the function
def save_model_pred(): pass
y_pred[0] = splits.denormalize_fare(y_pred[0]).squeeze()
y_pred[1] = y_pred[1].argmax(axis=-1)
np.savez_compressed("simple_LSTM_1115_0002.npz", fare=y_pred[0], seat=y_pred[1])

In [88]:
y_pred = dict(zip(["fare", "seat"], y_pred))

In [104]:
# TODO: read pred result and draw picture
_len = y_pred["fare"].shape[0]
mean_absolute_error(y_true["fare"][:_len], y_pred["fare"]), mean_absolute_error(y_true["seat"][:_len], y_pred["seat"])

(33.45066234794296, 0.44249303919468835)

#### keep training

In [None]:
last_epoch = 0

In [30]:
last_epoch += len(history.epoch)

In [31]:
history = model.fit(
    **test_run_data,
    epochs=1000,
    batch_size=32,
    initial_epoch=last_epoch,
    callbacks=[early_stopping, tensor_board],
    shuffle=False
)

Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/1000
Epoch 358/1000
Epoch 359/1000
Epoch 360/1000
Epoch 361/1000
Epoch 362/1000
Epoch 363/1000
Epoch 364/1000
Epoch 365/1000
Epoch 366/1000
Epoch 367/1000
Epoch 368/1000
Epoch 369/1000
Epoch 370/1000
Epoch 371/1000
Epoch 372/1000
Epoch 373/1000
Epoch 374/1000
Epoch 375/1000
Epoch 376/1000
Epoch 377/1000
Epoch 378/1000
Epoch 379/1000
Epoch 380/1000
Epoch 381/1000
Epoch 382/1000
Epoch 383/1000
Epoch 384/1000
Epoch 385/1000
Epoch 386/1000
Epoch 387/1000
Epoch 388/1000
Epoch 389/1000
Epoch 390/1000
Epoch 391/1000
Epoch 392/1000
Epoch 393/1000
Epoch 394/1000
Epoch 395/1000
Epoch 396/1000
Epoch 397/1000
Epoch 398/1000
Epoch 399/1000
Epoch 400/1000
Epoch 401/1000
Epoch 402/1000
Epoch 403/1000
Epoch 404/1000
Epoch 405/

# Show allocated memory

In [61]:
from psutil import Process

print(f"{Process().memory_info().rss / 2 ** 20:.2f} MB")
# Process().memory_percent()

1392.50 MB
