In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import gc
import sys
import json
import warnings

import tensorflow as tf
from tensorflow import keras

from sklearn.preprocessing import RobustScaler

sys.path.append(str(Path().resolve().parent))

from src.utils import (
    seed_every_thing,
    fetch_data,
    Config,
    plot_metric,
    reduce_tf_gpu_memory,
    reduce_mem_usage,
    fetch_custom_data,
    CustomL1Loss
)
warnings.simplefilter('ignore')
pd.options.display.max_columns = 200

In [2]:
basedir = Path().resolve().parent
logdir = basedir / "logs"
datadir = basedir / "data"
cachedir = basedir / "cache"

In [3]:
rc_preds = pd.read_csv(logdir / "cnn-classify-rc-reliable_3" / "valid_preds.csv")
rc_test_preds = pd.read_csv(logdir / "cnn-classify-rc-reliable_3" / "test_preds.csv")

In [4]:
train = pd.read_csv(cachedir / "train-10fold-debugFalse.csv")
train.head()

Unnamed: 0,time_step,u_in,u_out,pressure,kfold,corss,cross2,cross3,time_delta,time_step_cumsum,u_in_cumsum,count,u_in_cummean,u_in_lag_b1,u_out_lag_b1,u_in_lag_b2,u_out_lag_b2,u_in_lag_b3,u_out_lag_b3,u_in_lag_b4,u_out_lag_b4,u_in_lag_b5,u_out_lag_b5,u_in_lag_f1,u_out_lag_f1,u_in_lag_f2,u_out_lag_f2,u_in_lag_f3,u_out_lag_f3,u_in_lag_f4,u_out_lag_f4,u_in_lag_f5,u_out_lag_f5,u_in_diff_b1,u_out_diff_b1,u_in_diff_b2,u_out_diff_b2,u_in_diff_b3,u_out_diff_b3,u_in_diff_b4,u_out_diff_b4,u_in_diff_b5,u_out_diff_b5,u_in_diff_f1,u_out_diff_f1,u_in_diff_f2,u_out_diff_f2,u_in_diff_f3,u_out_diff_f3,u_in_diff_f4,u_out_diff_f4,u_in_diff_f5,u_out_diff_f5,u_in_bwindow_mean,u_in_bwindow_max,u_in_bwindow_min,u_in_bwindow_std,u_in_fwindow_mean,u_in_fwindow_max,u_in_fwindow_min,u_in_fwindow_std,u_in_cwindow_mean,u_in_cwindow_max,u_in_cwindow_min,u_in_cwindow_std,u_in_bwindow_ewm,u_in_fwindow_ewm,u_in_cwindow_ewm,u_in_bwindow_mean_diff,u_in_bwindow_max_diff,u_in_bwindow_min_diff,u_in_fwindow_mean_diff,u_in_fwindow_max_diff,u_in_fwindow_min_diff,u_in_cwindow_mean_diff,u_in_cwindow_max_diff,u_in_cwindow_min_diff,area,area_insp,area_insp_last,u_in_max,u_in_mean,u_in_std,area_max,area_insp_max,area_mean,area_insp_mean,vibs,RC_20_10,RC_20_20,RC_20_50,RC_50_10,RC_50_20,RC_50_50,RC_5_10,RC_5_20,RC_5_50,R_20,R_5,R_50,C_10,C_20,C_50,norm_time_step,u_in_max_diff,area_max_diff,area_insp_max_diff,u_in_mean_diff,area_mean_diff,area_insp_mean_diff
0,0.0,0.0833,0,5.836,4,0.0,0.0,0.0,0.0,0.0,0.0833,1,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.39,0.0,22.52,0.0,22.81,0.0,25.36,0.0,27.27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-18.3,0.0,-22.42,0.0,-22.72,0.0,-25.27,0.0,-27.17,0.0,0.013885,0.0833,0.0,0.03105,19.4,27.27,0.0833,9.06,5.12,22.52,0.0,8.91,0.05957,26.45,20.69,0.06946,0.0,0.0833,-19.31,-27.17,0.0,-5.04,-22.42,0.0833,0.0,0.0,22.6,28.31,10.15,10.125,27.64,22.6,19.66,18.64,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.0,-28.23,-27.64,-22.6,-10.06,-19.66,-18.64
1,0.03366,18.39,0,5.906,4,0.0,0.03366,0.0,0.03366,0.03366,18.47,2,9.234,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.52,0.0,22.81,0.0,25.36,0.0,27.27,0.0,27.12,0.0,18.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.125,0.0,-4.426,0.0,-6.973,0.0,-8.875,0.0,-8.74,0.0,3.078,18.39,0.0,6.844,23.9,27.27,18.39,3.092,7.973,22.81,0.0,10.34,13.16,26.94,22.34,15.305,0.0,18.39,-5.523,-8.875,0.0,10.41,-4.426,18.39,0.6187,0.6187,22.6,28.31,10.15,10.125,27.64,22.6,19.66,18.64,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.01251,-9.93,-27.02,-21.97,8.234,-19.03,-18.03
2,0.0675,22.52,0,7.875,4,0.0,0.0675,0.0,0.03387,0.1012,40.97,3,13.66,18.39,0.0,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.81,0.0,25.36,0.0,27.27,0.0,27.12,0.0,26.81,0.0,4.125,0.0,22.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.2996,0.0,-2.846,0.0,-4.75,0.0,-4.617,0.0,-4.297,0.0,6.83,22.52,0.0,9.7,25.31,27.27,22.52,1.977,11.14,25.36,0.0,11.26,19.84,26.84,24.69,15.68,0.0,22.52,-2.803,-4.75,0.0,11.37,-2.846,22.52,1.381,1.381,22.6,28.31,10.15,10.125,27.64,22.6,19.66,18.64,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.0251,-5.805,-26.25,-21.22,12.36,-18.27,-17.27
3,0.10156,22.81,0,11.74,4,0.0,0.10156,0.0,0.03403,0.2028,63.78,4,15.945,22.52,0.0,18.39,0.0,0.0833,0.0,0.0,0.0,0.0,0.0,25.36,0.0,27.27,0.0,27.12,0.0,26.81,0.0,27.86,0.0,0.2996,0.0,4.426,0.0,22.72,0.0,0.0,0.0,0.0,0.0,-2.547,0.0,-4.45,0.0,-4.32,0.0,-3.998,0.0,-5.055,0.0,10.63,22.81,0.0,10.695,26.2,27.86,22.81,1.7,14.55,27.27,0.0,11.49,21.97,27.58,26.69,12.18,0.0,22.81,-3.395,-5.055,0.0,8.26,-4.45,22.81,2.156,2.156,22.6,28.31,10.15,10.125,27.64,22.6,19.66,18.64,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.03775,-5.504,-25.48,-20.44,12.664,-17.48,-16.48
4,0.1357,25.36,0,12.234,4,0.0,0.1357,0.0,0.0342,0.3384,89.1,5,17.83,22.81,0.0,22.52,0.0,18.39,0.0,0.0833,0.0,0.0,0.0,27.27,0.0,27.12,0.0,26.81,0.0,27.86,0.0,28.31,0.0,2.547,0.0,2.846,0.0,6.973,0.0,25.27,0.0,0.0,0.0,-1.904,0.0,-1.771,0.0,-1.452,0.0,-2.51,0.0,-2.957,0.0,14.86,25.36,0.0,10.67,27.12,28.31,25.36,0.9316,17.94,27.27,0.0,10.67,24.4,28.11,27.03,10.5,0.0,25.36,-1.766,-2.957,0.0,7.414,-1.904,25.36,3.025,3.025,22.6,28.31,10.15,10.125,27.64,22.6,19.66,18.64,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.05048,-2.957,-24.61,-19.56,15.21,-16.62,-15.625


In [7]:
test = pd.read_csv(cachedir / "test-10fold-debugFalse.csv")
test.head()

Unnamed: 0,time_step,u_in,u_out,corss,cross2,cross3,time_delta,time_step_cumsum,u_in_cumsum,count,u_in_cummean,u_in_lag_b1,u_out_lag_b1,u_in_lag_b2,u_out_lag_b2,u_in_lag_b3,u_out_lag_b3,u_in_lag_b4,u_out_lag_b4,u_in_lag_b5,u_out_lag_b5,u_in_lag_f1,u_out_lag_f1,u_in_lag_f2,u_out_lag_f2,u_in_lag_f3,u_out_lag_f3,u_in_lag_f4,u_out_lag_f4,u_in_lag_f5,u_out_lag_f5,u_in_diff_b1,u_out_diff_b1,u_in_diff_b2,u_out_diff_b2,u_in_diff_b3,u_out_diff_b3,u_in_diff_b4,u_out_diff_b4,u_in_diff_b5,u_out_diff_b5,u_in_diff_f1,u_out_diff_f1,u_in_diff_f2,u_out_diff_f2,u_in_diff_f3,u_out_diff_f3,u_in_diff_f4,u_out_diff_f4,u_in_diff_f5,u_out_diff_f5,u_in_bwindow_mean,u_in_bwindow_max,u_in_bwindow_min,u_in_bwindow_std,u_in_fwindow_mean,u_in_fwindow_max,u_in_fwindow_min,u_in_fwindow_std,u_in_cwindow_mean,u_in_cwindow_max,u_in_cwindow_min,u_in_cwindow_std,u_in_bwindow_ewm,u_in_fwindow_ewm,u_in_cwindow_ewm,u_in_bwindow_mean_diff,u_in_bwindow_max_diff,u_in_bwindow_min_diff,u_in_fwindow_mean_diff,u_in_fwindow_max_diff,u_in_fwindow_min_diff,u_in_cwindow_mean_diff,u_in_cwindow_max_diff,u_in_cwindow_min_diff,area,area_insp,area_insp_last,u_in_max,u_in_mean,u_in_std,area_max,area_insp_max,area_mean,area_insp_mean,vibs,RC_20_10,RC_20_20,RC_20_50,RC_50_10,RC_50_20,RC_50_50,RC_5_10,RC_5_20,RC_5_50,R_20,R_5,R_50,C_10,C_20,C_50,norm_time_step,u_in_max_diff,area_max_diff,area_insp_max_diff,u_in_mean_diff,area_mean_diff,area_insp_mean_diff
0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.516,0.0,14.65,0.0,21.23,0.0,26.33,0.0,30.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.516,0.0,-14.65,0.0,-21.23,0.0,-26.33,0.0,-30.48,0.0,0.0,0.0,0.0,0.0,16.7,30.48,0.0,10.57,2.771,14.65,0.0,5.12,0.0,28.69,12.695,0.0,0.0,0.0,-16.7,-30.48,0.0,-2.771,-14.65,0.0,0.0,0.0,19.48,37.53,9.33,12.18,23.81,19.48,17.48,16.69,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.0,-37.53,-23.81,-19.48,-9.33,-17.48,-16.69
1,0.0319,7.516,0,0.0,0.0319,0.0,0.0319,0.0319,7.516,2,3.758,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.65,0.0,21.23,0.0,26.33,0.0,30.48,0.0,33.53,0.0,7.516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.137,0.0,-13.72,0.0,-18.81,0.0,-22.97,0.0,-26.03,0.0,1.253,7.516,0.0,2.8,22.3,33.53,7.516,9.016,5.426,21.23,0.0,7.797,5.37,32.16,19.33,6.26,0.0,7.516,-14.77,-26.03,0.0,2.09,-13.72,7.516,0.2397,0.2397,19.48,37.53,9.33,12.18,23.81,19.48,17.48,16.69,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.012665,-30.03,-23.58,-19.25,-1.8125,-17.25,-16.45
2,0.06384,14.65,0,0.0,0.06384,0.0,0.03192,0.0957,22.17,3,7.39,7.516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.23,0.0,26.33,0.0,30.48,0.0,33.53,0.0,35.72,0.0,7.137,0.0,14.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-6.58,0.0,-11.67,0.0,-15.836,0.0,-18.89,0.0,-21.06,0.0,3.695,14.65,0.0,5.617,27.0,35.72,14.65,7.27,8.71,26.33,0.0,10.05,12.01,34.72,24.77,10.96,0.0,14.65,-12.34,-21.06,0.0,5.938,-11.67,14.65,0.7075,0.7075,19.48,37.53,9.33,12.18,23.81,19.48,17.48,16.69,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.02533,-22.89,-23.11,-18.78,5.324,-16.78,-15.984
3,0.09576,21.23,0,0.0,0.09576,0.0,0.03192,0.1915,43.4,4,10.85,14.65,0.0,7.516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.33,0.0,30.48,0.0,33.53,0.0,35.72,0.0,36.97,0.0,6.58,0.0,13.72,0.0,21.23,0.0,0.0,0.0,0.0,0.0,-5.09,0.0,-9.26,0.0,-12.31,0.0,-14.484,0.0,-15.74,0.0,7.234,21.23,0.0,8.25,30.72,36.97,21.23,5.5,12.52,30.48,0.0,11.664,18.61,36.34,29.22,14.0,0.0,21.23,-9.484,-15.74,0.0,8.7,-9.26,21.23,1.385,1.385,19.48,37.53,9.33,12.18,23.81,19.48,17.48,16.69,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.038,-16.31,-22.42,-18.11,11.91,-16.11,-15.305
4,0.1277,26.33,0,0.0,0.1277,0.0,0.0319,0.319,69.7,5,13.945,21.23,0.0,14.65,0.0,7.516,0.0,0.0,0.0,0.0,0.0,30.48,0.0,33.53,0.0,35.72,0.0,36.97,0.0,37.53,0.0,5.09,0.0,11.67,0.0,18.81,0.0,26.33,0.0,0.0,0.0,-4.164,0.0,-7.227,0.0,-9.4,0.0,-10.65,0.0,-11.22,0.0,11.62,26.33,0.0,10.04,33.44,37.53,26.33,3.955,16.72,33.53,0.0,12.414,24.12,37.22,32.6,14.7,0.0,26.33,-7.11,-11.22,0.0,9.6,-7.227,26.33,2.225,2.225,19.48,37.53,9.33,12.18,23.81,19.48,17.48,16.69,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.05066,-11.22,-21.58,-17.27,17.0,-15.266,-14.47


In [18]:
train = train[train["count"] <= 35].reset_index(drop=True)
test = test[test["count"] <= 35].reset_index(drop=True)

In [13]:
rc_cols = [c for c in train.columns if "RC_" in c]
r_cols = ["R_20", "R_5", "R_50"]
c_cols = ["C_10", "C_20", "C_50"]

In [28]:
train[rc_cols] += np.tile(rc_preds, (1, 35)).reshape(-1, rc_preds.shape[1])
test[rc_cols] += np.tile(rc_test_preds, (1, 35)).reshape(-1, rc_test_preds.shape[1])

In [30]:
train.to_csv(cachedir / "train-10fold-cut-soft-rc-debugFalse.csv", index=False)
train.iloc[:1000 * 35].to_csv(cachedir / "train-10fold-cut-soft-rc-debugTrue.csv", index=False)

In [31]:
test.to_csv(cachedir / "test-10fold-cut-soft-rc-debugFalse.csv", index=False)
test.iloc[:1000 * 35].to_csv(cachedir / "test-10fold-cut-soft-rc-debugTrue.csv", index=False)

In [36]:
train.drop(r_cols + c_cols, axis=1).to_csv(cachedir / "train-10fold-cut-only-soft-rc-debugFalse.csv", index=False)
train.drop(r_cols + c_cols, axis=1).iloc[:1000 * 35].to_csv(cachedir / "train-10fold-cut-only-soft-rc-debugTrue.csv", index=False)

In [37]:
test.drop(r_cols + c_cols, axis=1).to_csv(cachedir / "test-10fold-cut-only-soft-rc-debugFalse.csv", index=False)
test.drop(r_cols + c_cols, axis=1).iloc[:1000 * 35].to_csv(cachedir / "test-10fold-cut-only-soft-rc-debugTrue.csv", index=False)