In [8]:
import numpy as np
import pandas as pd
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from typing import *

In [6]:
train = pd.read_csv(Path().resolve().parent / "cache" / "train-10fold-debugTrue.csv")
train.head()

Unnamed: 0,time_step,u_in,u_out,pressure,kfold,corss,cross2,cross3,time_delta,time_step_cumsum,...,C_10,C_20,C_50,norm_time_step,u_in_max_diff,area_max_diff,area_insp_max_diff,u_in_mean_diff,area_mean_diff,area_insp_mean_diff
0,0.0,0.0833,0,5.836,4,0.0,0.0,0.0,0.0,0.0,...,0,0,1,0.0,-28.23,-27.64,-22.6,-10.06,-19.66,-18.64
1,0.03366,18.39,0,5.906,4,0.0,0.03366,0.0,0.03366,0.03366,...,0,0,1,0.01251,-9.93,-27.02,-21.97,8.234,-19.03,-18.03
2,0.0675,22.52,0,7.875,4,0.0,0.0675,0.0,0.03387,0.1012,...,0,0,1,0.0251,-5.805,-26.25,-21.22,12.36,-18.27,-17.27
3,0.10156,22.81,0,11.74,4,0.0,0.10156,0.0,0.03403,0.2028,...,0,0,1,0.03775,-5.504,-25.48,-20.44,12.664,-17.48,-16.48
4,0.1357,25.36,0,12.234,4,0.0,0.1357,0.0,0.0342,0.3384,...,0,0,1,0.05048,-2.957,-24.61,-19.56,15.21,-16.62,-15.625


In [21]:
class RCNoiseGenerator(keras.utils.Sequence):
    def __init__(
        self,
        X: np.ndarray,
        y: np.ndarray,
        noise_p: float,
        batch_size: int,
        rc_idxs: List[int],
        r_idxs: List[int],
        c_idxs: List[int],
    ) -> None:
        super().__init__()
        self.X = X
        self.y = y
        self.noise_p = noise_p
        self.batch_size = batch_size
        self.rc_idxs = rc_idxs
        self.r_idxs = r_idxs
        self.c_idxs = c_idxs

        self._rc_matrix = np.array([[0, 1, 2], [6, 7, 8], [3, 4, 5]])

    def __getitem__(self, idx: int):
        s_idx, e_idx = idx * self.batch_size, (idx + 1) * self.batch_size
        x, y = self.X[s_idx:e_idx, :, :].copy(), self.y[s_idx:e_idx, :].copy()

        r = x[:, 0, self.r_idxs]
        c = x[:, 0, self.c_idxs]

        r_change_idxs = np.random.rand(x.shape[0]) <= self.noise_p
        c_change_idxs = np.random.rand(x.shape[0]) <= self.noise_p

        inplace_r = np.random.choice(np.arange(3), size=np.sum(r_change_idxs), replace=True)
        inplace_c = np.random.choice(np.arange(3), size=np.sum(c_change_idxs), replace=True)
        inplace_r, inplace_c = np.eye(3)[inplace_r], np.eye(3)[inplace_c]

        r[r_change_idxs, :] = inplace_r
        c[c_change_idxs, :] = inplace_c

        rc = []
        for r_idx, c_idx in zip(np.argmax(r, axis=1), np.argmax(c, axis=1)):
            rc.append(self._find_rc(r_idx, c_idx))
        rc = np.eye(9)[np.array(rc)]

        rc = np.tile(rc, (1, x.shape[1])).reshape(-1, x.shape[1], 9)
        r = np.tile(r, (1, x.shape[1])).reshape(-1, x.shape[1], 3)
        c = np.tile(c, (1, x.shape[1])).reshape(-1, x.shape[1], 3)

        x[:, :, self.rc_idxs] = rc
        x[:, :, self.r_idxs] = r
        x[:, :, self.c_idxs] = c

        return x, y

    def _find_rc(self, r: int, c: int):
        return self._rc_matrix[r][c]

    def __len__(self):
        return int(np.ceil(self.X.shape[0] / self.batch_size))

    def on_epoch_end(self):
        idxs = np.arange(self.X.shape[0])
        np.random.shuffle(idxs)

        self.X = self.X[idxs, :, :]
        self.y = self.y[idxs, :]

In [12]:
features = np.array(train.columns)
rc_idxs = [i for i, f in enumerate(features) if "RC_" in f]
r_idxs = [i for i, f in enumerate(features) if "R_" in f]
c_idxs = [i for i, f in enumerate(features) if f in ["C_10", "C_20", "C_50"]]

In [17]:
X = train[features].values.reshape(-1, 80, features.shape[0])
y = train["pressure"].values.reshape(-1, 80)

In [31]:
rcng = RCNoiseGenerator(
    X=X, y=y, noise_p=0.1, batch_size=2, rc_idxs=rc_idxs, r_idxs=r_idxs, c_idxs=c_idxs
)

In [42]:
features[c_idxs]

array(['C_10', 'C_20', 'C_50'], dtype=object)

In [39]:
rcng[0][0][:, 0, rc_idxs]

array([[0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0.]])