In [1]:
from random import seed
import numpy as np
import pandas as pd
import json
import os
import sys
import gc
import shutil
from pprint import pprint
from pathlib import Path

import sys

print(str(Path().resolve().parent))
sys.path.append(str(Path().resolve().parent))

from src.utils import (
    seed_every_thing,
    fetch_data,
    Config,
    plot_metric,
    reduce_tf_gpu_memory,
    reduce_mem_usage,
    fetch_custom_data
)

pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

/home/yamaguchi-milkcocholate/GoogleBrain-VentilatorPressurePrediction


In [3]:
datadir = Path().resolve().parent / 'data'
cachedir = Path().resolve().parent / "cache"
train_df, test_df, submission_df = fetch_custom_data(datadir=datadir, n_splits=10)

fetching data ...


  mask |= (ar1 == a)


done.


In [4]:
def _add_features(df_):
    df = df_.copy()
    df["R"] = df["R"].astype(str)
    df["C"] = df["C"].astype(str)
    df["RC"] = df["R"] + "_" + df["C"]
    df["tmp0"] = 1
    
    df["corss"] = df["u_in"] * df["u_out"]
    df["cross2"] = df["time_step"] * (1 - df["u_out"])
    df["cross3"] = df["time_step"] * df["u_out"]

    grp_by = df.groupby("breath_id")
    df["time_delta"] = grp_by["time_step"].diff(1).fillna(0.)
    df["time_step_cumsum"] = grp_by["time_step"].cumsum()
    df["u_in_cumsum"] = grp_by["u_in"].cumsum()
    df["count"] = grp_by["tmp0"].cumsum()
    
    df["u_in_cummean"] = df["u_in_cumsum"] / df["count"]

    # lag
    for n_lag in range(1, 6):
        df[f"u_in_lag_b{n_lag}"] = grp_by["u_in"].shift(n_lag).fillna(0.)
        df[f"u_out_lag_b{n_lag}"] = grp_by["u_out"].shift(n_lag).fillna(0.)
    for n_lag in range(1, 6):
        df[f"u_in_lag_f{n_lag}"] = grp_by["u_in"].shift(-n_lag).fillna(0.)
        df[f"u_out_lag_f{n_lag}"] = grp_by["u_out"].shift(-n_lag).fillna(0.)
    
    # diff
    for n_diff in range(1, 6):
        df[f"u_in_diff_b{n_diff}"] = grp_by["u_in"].diff(n_diff).fillna(0.)
        df[f"u_out_diff_b{n_diff}"] = grp_by["u_out"].diff(n_diff).fillna(0.)
    for n_diff in range(1, 6):
        df[f"u_in_diff_f{n_diff}"] = grp_by["u_in"].diff(-n_diff).fillna(0.)
        df[f"u_out_diff_f{n_diff}"] = grp_by["u_out"].diff(-n_diff).fillna(0.)
    
    # window
    cols_list = (
        ["u_in"] + [f"u_in_lag_b{n_lag}" for n_lag in range(1, 6)],  # back
        list(reversed([f"u_in_lag_f{n_lag}" for n_lag in range(1, 6)]))
        + ["u_in"],  # front
        list(reversed([f"u_in_lag_f{n_lag}" for n_lag in range(1, 3)]))
        + ["u_in"]
        + [f"u_in_lag_b{n_lag}" for n_lag in range(1, 6)],  # center
    )
    for cols, prefix in zip(cols_list, ("b", "f", "c")):
        for lam in ["mean", "max", "min", "std"]:
            df[f"u_in_{prefix}window_{lam}"] = getattr(np, lam)(df[cols].values, axis=1)
            
    weights1 = np.array([(2 / (len(cols_list[0]) + 1)) ** (i + 1) for i in range(len(cols_list[0]))])
    weights1 /= np.sum(weights1)
    weights2 = np.array([(2 / (len(cols_list[-1]) + 1)) ** (i + 1) for i in range(len(cols_list[-1]))])
    weights2 /= np.sum(weights2)
    for cols, weights, prefix in zip(cols_list, (weights1, weights1, weights2), ("b", "f", "c")):
        df[f"u_in_{prefix}window_ewm"] = np.dot(df[cols].values, weights)

    # window x u_in
    for prefix in ("b", "f", "c"):
        for lam in ["mean", "max", "min"]:
            df[f"u_in_{prefix}window_{lam}_diff"] = (
                df["u_in"] - df[f"u_in_{prefix}window_{lam}"]
            )

    df["u_in_diff_sign"] = np.sign(df["u_in_diff_b1"])

    df["tmp1"] = df["time_delta"] * df["u_in"]
    df["tmp2"] = df["time_delta"] * ((1 - df["u_out"]) * df["u_in"])

    grp_by = df.groupby("breath_id")
    df["u_in_diff_change"] = (
        np.sign(grp_by["u_in_diff_sign"].diff(1).fillna(0)) != 0
    ).astype(int)
    df["area"] = grp_by["tmp1"].cumsum()
    df["area_insp"] = grp_by["tmp2"].cumsum()

    df.drop(["tmp0", "tmp1", "tmp2"], axis=1, inplace=True)
    return df


def calc_stats(df_):
    first_df = df_.loc[0::80]
    last_df = df_.loc[79::80]

    df = pd.DataFrame(
        {"breath_id": first_df["breath_id"].values, "RC": first_df["RC"].values, "R": first_df["R"], "C": first_df["C"]}
    )
    df["area_insp_last"] = last_df["area_insp"].values
    df["total_time"] = last_df["time_step"].values

    grp_by = df_.groupby("breath_id")
    for lam in ["max", "mean", "std"]:
        df[f"u_in_{lam}"] = df["breath_id"].map(
            getattr(grp_by["u_in"], lam)().to_dict()
        )

    for lam in ["max", "mean"]:
        df[f"area_{lam}"] = df["breath_id"].map(
            getattr(grp_by["area"], lam)().to_dict()
        )
        df[f"area_insp_{lam}"] = df["breath_id"].map(
            getattr(grp_by["area_insp"], lam)().to_dict()
        )

    df["vibs"] = df["breath_id"].map(grp_by["u_in_diff_change"].sum().to_dict())
    df = pd.get_dummies(df)

    return df


def add_features(df_):
    df = df_.copy()
    df = _add_features(df)
    df_stats = calc_stats(df)
    df_stats = df_stats.set_index("breath_id")
    cols = df_stats.columns
    for c in cols:
        df[c] = df.breath_id.map(df_stats[c].to_dict())

    df["norm_time_step"] = df["time_step"] / df["total_time"]
    df.drop(["total_time"], axis=1, inplace=True)

    for lam in ["max", "mean"]:
        df[f"u_in_{lam}_diff"] = df["u_in"] - df[f"u_in_{lam}"]
        df[f"area_{lam}_diff"] = df["area"] - df[f"area_{lam}"]
        df[f"area_insp_{lam}_diff"] = df["area_insp"] - df[f"area_insp_{lam}"]

    df.drop(
        ["id", "RC", "R", "C", "breath_id", "u_in_diff_sign", "u_in_diff_change"],
        axis=1,
        inplace=True,
    )

    return reduce_mem_usage(df)

In [5]:
train_ = add_features(train_df.iloc[:80 * 10])
print(train_.shape)
train_.head()

Memory usage of dataframe is 0.68 MB
Memory usage after optimization is: 0.19 MB
Decreased by 72.4%
(800, 105)


Unnamed: 0,time_step,u_in,u_out,pressure,kfold,corss,cross2,cross3,time_delta,time_step_cumsum,u_in_cumsum,count,u_in_cummean,u_in_lag_b1,u_out_lag_b1,u_in_lag_b2,u_out_lag_b2,u_in_lag_b3,u_out_lag_b3,u_in_lag_b4,u_out_lag_b4,u_in_lag_b5,u_out_lag_b5,u_in_lag_f1,u_out_lag_f1,u_in_lag_f2,u_out_lag_f2,u_in_lag_f3,u_out_lag_f3,u_in_lag_f4,u_out_lag_f4,u_in_lag_f5,u_out_lag_f5,u_in_diff_b1,u_out_diff_b1,u_in_diff_b2,u_out_diff_b2,u_in_diff_b3,u_out_diff_b3,u_in_diff_b4,u_out_diff_b4,u_in_diff_b5,u_out_diff_b5,u_in_diff_f1,u_out_diff_f1,u_in_diff_f2,u_out_diff_f2,u_in_diff_f3,u_out_diff_f3,u_in_diff_f4,...,u_in_bwindow_min,u_in_bwindow_std,u_in_fwindow_mean,u_in_fwindow_max,u_in_fwindow_min,u_in_fwindow_std,u_in_cwindow_mean,u_in_cwindow_max,u_in_cwindow_min,u_in_cwindow_std,u_in_bwindow_ewm,u_in_fwindow_ewm,u_in_cwindow_ewm,u_in_bwindow_mean_diff,u_in_bwindow_max_diff,u_in_bwindow_min_diff,u_in_fwindow_mean_diff,u_in_fwindow_max_diff,u_in_fwindow_min_diff,u_in_cwindow_mean_diff,u_in_cwindow_max_diff,u_in_cwindow_min_diff,area,area_insp,area_insp_last,u_in_max,u_in_mean,u_in_std,area_max,area_insp_max,area_mean,area_insp_mean,vibs,RC_20_20,RC_20_50,RC_50_20,RC_50_50,RC_5_50,R_20,R_5,R_50,C_20,C_50,norm_time_step,u_in_max_diff,area_max_diff,area_insp_max_diff,u_in_mean_diff,area_mean_diff,area_insp_mean_diff
0,0.0,0.083313,0,5.835938,4,0.0,0.0,0.0,0.0,0.0,0.083313,1,0.083313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.390625,0.0,22.515625,0.0,22.8125,0.0,25.359375,0.0,27.265625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-18.296875,0.0,-22.421875,0.0,-22.71875,0.0,-25.265625,...,0.0,0.031052,19.40625,27.265625,0.083313,9.0625,5.121094,22.515625,0.0,8.90625,0.05957,26.453125,20.6875,0.069458,0.0,0.083313,-19.3125,-27.171875,0.0,-5.039062,-22.421875,0.083313,0.0,0.0,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,1,0,0,0,1,0,0,0,1,0.0,-28.234375,-27.640625,-22.59375,-10.0625,-19.65625,-18.640625
1,0.033661,18.390625,0,5.90625,4,0.0,0.033661,0.0,0.033661,0.033661,18.46875,2,9.234375,0.083313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.515625,0.0,22.8125,0.0,25.359375,0.0,27.265625,0.0,27.125,0.0,18.296875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.125,0.0,-4.425781,0.0,-6.972656,0.0,-8.875,...,0.0,6.84375,23.90625,27.265625,18.390625,3.091797,7.972656,22.8125,0.0,10.34375,13.15625,26.9375,22.34375,15.304688,0.0,18.390625,-5.523438,-8.875,0.0,10.40625,-4.425781,18.390625,0.618652,0.618652,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,1,0,0,0,1,0,0,0,1,0.012512,-9.929688,-27.015625,-21.96875,8.234375,-19.03125,-18.03125
2,0.067505,22.515625,0,7.875,4,0.0,0.067505,0.0,0.033875,0.101196,40.96875,3,13.65625,18.390625,0.0,0.083313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.8125,0.0,25.359375,0.0,27.265625,0.0,27.125,0.0,26.8125,0.0,4.125,0.0,22.421875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.299561,0.0,-2.845703,0.0,-4.75,0.0,-4.617188,...,0.0,9.703125,25.3125,27.265625,22.515625,1.976562,11.140625,25.359375,0.0,11.257812,19.84375,26.84375,24.6875,15.679688,0.0,22.515625,-2.802734,-4.75,0.0,11.367188,-2.845703,22.515625,1.380859,1.380859,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,1,0,0,0,1,0,0,0,1,0.025101,-5.804688,-26.25,-21.21875,12.359375,-18.265625,-17.265625
3,0.101562,22.8125,0,11.742188,4,0.0,0.101562,0.0,0.034027,0.202759,63.78125,4,15.945312,22.515625,0.0,18.390625,0.0,0.083313,0.0,0.0,0.0,0.0,0.0,25.359375,0.0,27.265625,0.0,27.125,0.0,26.8125,0.0,27.859375,0.0,0.299561,0.0,4.425781,0.0,22.71875,0.0,0.0,0.0,0.0,0.0,-2.546875,0.0,-4.449219,0.0,-4.320312,0.0,-3.998047,...,0.0,10.695312,26.203125,27.859375,22.8125,1.700195,14.546875,27.265625,0.0,11.492188,21.96875,27.578125,26.6875,12.179688,0.0,22.8125,-3.394531,-5.054688,0.0,8.257812,-4.449219,22.8125,2.15625,2.15625,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,1,0,0,0,1,0,0,0,1,0.03775,-5.503906,-25.484375,-20.4375,12.664062,-17.484375,-16.484375
4,0.135742,25.359375,0,12.234375,4,0.0,0.135742,0.0,0.03421,0.338379,89.125,5,17.828125,22.8125,0.0,22.515625,0.0,18.390625,0.0,0.083313,0.0,0.0,0.0,27.265625,0.0,27.125,0.0,26.8125,0.0,27.859375,0.0,28.3125,0.0,2.546875,0.0,2.845703,0.0,6.972656,0.0,25.265625,0.0,0.0,0.0,-1.904297,0.0,-1.771484,0.0,-1.452148,0.0,-2.509766,...,0.0,10.671875,27.125,28.3125,25.359375,0.931641,17.9375,27.265625,0.0,10.671875,24.40625,28.109375,27.03125,10.5,0.0,25.359375,-1.765625,-2.957031,0.0,7.414062,-1.904297,25.359375,3.025391,3.025391,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,1,0,0,0,1,0,0,0,1,0.050476,-2.957031,-24.609375,-19.5625,15.210938,-16.625,-15.625


In [6]:
train = add_features(train_df)
print(train.shape)
train.head()

Memory usage of dataframe is 5111.66 MB
Memory usage after optimization is: 1203.08 MB
Decreased by 76.5%
(6036000, 110)


Unnamed: 0,time_step,u_in,u_out,pressure,kfold,corss,cross2,cross3,time_delta,time_step_cumsum,u_in_cumsum,count,u_in_cummean,u_in_lag_b1,u_out_lag_b1,u_in_lag_b2,u_out_lag_b2,u_in_lag_b3,u_out_lag_b3,u_in_lag_b4,u_out_lag_b4,u_in_lag_b5,u_out_lag_b5,u_in_lag_f1,u_out_lag_f1,u_in_lag_f2,u_out_lag_f2,u_in_lag_f3,u_out_lag_f3,u_in_lag_f4,u_out_lag_f4,u_in_lag_f5,u_out_lag_f5,u_in_diff_b1,u_out_diff_b1,u_in_diff_b2,u_out_diff_b2,u_in_diff_b3,u_out_diff_b3,u_in_diff_b4,u_out_diff_b4,u_in_diff_b5,u_out_diff_b5,u_in_diff_f1,u_out_diff_f1,u_in_diff_f2,u_out_diff_f2,u_in_diff_f3,u_out_diff_f3,u_in_diff_f4,...,u_in_fwindow_std,u_in_cwindow_mean,u_in_cwindow_max,u_in_cwindow_min,u_in_cwindow_std,u_in_bwindow_ewm,u_in_fwindow_ewm,u_in_cwindow_ewm,u_in_bwindow_mean_diff,u_in_bwindow_max_diff,u_in_bwindow_min_diff,u_in_fwindow_mean_diff,u_in_fwindow_max_diff,u_in_fwindow_min_diff,u_in_cwindow_mean_diff,u_in_cwindow_max_diff,u_in_cwindow_min_diff,area,area_insp,area_insp_last,u_in_max,u_in_mean,u_in_std,area_max,area_insp_max,area_mean,area_insp_mean,vibs,RC_20_10,RC_20_20,RC_20_50,RC_50_10,RC_50_20,RC_50_50,RC_5_10,RC_5_20,RC_5_50,R_20,R_5,R_50,C_10,C_20,C_50,norm_time_step,u_in_max_diff,area_max_diff,area_insp_max_diff,u_in_mean_diff,area_mean_diff,area_insp_mean_diff
0,0.0,0.083313,0,5.835938,4,0.0,0.0,0.0,0.0,0.0,0.083313,1,0.083313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.390625,0.0,22.515625,0.0,22.8125,0.0,25.359375,0.0,27.265625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-18.296875,0.0,-22.421875,0.0,-22.71875,0.0,-25.265625,...,9.0625,5.121094,22.515625,0.0,8.90625,0.05957,26.453125,20.6875,0.069458,0.0,0.083313,-19.3125,-27.171875,0.0,-5.039062,-22.421875,0.083313,0.0,0.0,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.0,-28.234375,-27.640625,-22.59375,-10.0625,-19.65625,-18.640625
1,0.033661,18.390625,0,5.90625,4,0.0,0.033661,0.0,0.033661,0.033661,18.46875,2,9.234375,0.083313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.515625,0.0,22.8125,0.0,25.359375,0.0,27.265625,0.0,27.125,0.0,18.296875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.125,0.0,-4.425781,0.0,-6.972656,0.0,-8.875,...,3.091797,7.972656,22.8125,0.0,10.34375,13.15625,26.9375,22.34375,15.304688,0.0,18.390625,-5.523438,-8.875,0.0,10.40625,-4.425781,18.390625,0.618652,0.618652,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.012512,-9.929688,-27.015625,-21.96875,8.234375,-19.03125,-18.03125
2,0.067505,22.515625,0,7.875,4,0.0,0.067505,0.0,0.033875,0.101196,40.96875,3,13.65625,18.390625,0.0,0.083313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.8125,0.0,25.359375,0.0,27.265625,0.0,27.125,0.0,26.8125,0.0,4.125,0.0,22.421875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.299561,0.0,-2.845703,0.0,-4.75,0.0,-4.617188,...,1.976562,11.140625,25.359375,0.0,11.257812,19.84375,26.84375,24.6875,15.679688,0.0,22.515625,-2.802734,-4.75,0.0,11.367188,-2.845703,22.515625,1.380859,1.380859,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.025101,-5.804688,-26.25,-21.21875,12.359375,-18.265625,-17.265625
3,0.101562,22.8125,0,11.742188,4,0.0,0.101562,0.0,0.034027,0.202759,63.78125,4,15.945312,22.515625,0.0,18.390625,0.0,0.083313,0.0,0.0,0.0,0.0,0.0,25.359375,0.0,27.265625,0.0,27.125,0.0,26.8125,0.0,27.859375,0.0,0.299561,0.0,4.425781,0.0,22.71875,0.0,0.0,0.0,0.0,0.0,-2.546875,0.0,-4.449219,0.0,-4.320312,0.0,-3.998047,...,1.700195,14.546875,27.265625,0.0,11.492188,21.96875,27.578125,26.6875,12.179688,0.0,22.8125,-3.394531,-5.054688,0.0,8.257812,-4.449219,22.8125,2.15625,2.15625,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.03775,-5.503906,-25.484375,-20.4375,12.664062,-17.484375,-16.484375
4,0.135742,25.359375,0,12.234375,4,0.0,0.135742,0.0,0.03421,0.338379,89.125,5,17.828125,22.8125,0.0,22.515625,0.0,18.390625,0.0,0.083313,0.0,0.0,0.0,27.265625,0.0,27.125,0.0,26.8125,0.0,27.859375,0.0,28.3125,0.0,2.546875,0.0,2.845703,0.0,6.972656,0.0,25.265625,0.0,0.0,0.0,-1.904297,0.0,-1.771484,0.0,-1.452148,0.0,-2.509766,...,0.931641,17.9375,27.265625,0.0,10.671875,24.40625,28.109375,27.03125,10.5,0.0,25.359375,-1.765625,-2.957031,0.0,7.414062,-1.904297,25.359375,3.025391,3.025391,22.59375,28.3125,10.148438,10.125,27.640625,22.59375,19.65625,18.640625,14,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0.050476,-2.957031,-24.609375,-19.5625,15.210938,-16.625,-15.625


In [7]:
train.to_csv(cachedir / "train-10fold-debugFalse.csv", index=False)
train.iloc[: 1000 * 80].to_csv(cachedir / "train-10fold-debugTrue.csv", index=False)

In [8]:
del train
gc.collect()

18

In [9]:
test = add_features(test_df)
print(test.shape)
test.head()

Memory usage of dataframe is 3315.67 MB
Memory usage after optimization is: 759.84 MB
Decreased by 77.1%
(4024000, 108)


Unnamed: 0,time_step,u_in,u_out,corss,cross2,cross3,time_delta,time_step_cumsum,u_in_cumsum,count,u_in_cummean,u_in_lag_b1,u_out_lag_b1,u_in_lag_b2,u_out_lag_b2,u_in_lag_b3,u_out_lag_b3,u_in_lag_b4,u_out_lag_b4,u_in_lag_b5,u_out_lag_b5,u_in_lag_f1,u_out_lag_f1,u_in_lag_f2,u_out_lag_f2,u_in_lag_f3,u_out_lag_f3,u_in_lag_f4,u_out_lag_f4,u_in_lag_f5,u_out_lag_f5,u_in_diff_b1,u_out_diff_b1,u_in_diff_b2,u_out_diff_b2,u_in_diff_b3,u_out_diff_b3,u_in_diff_b4,u_out_diff_b4,u_in_diff_b5,u_out_diff_b5,u_in_diff_f1,u_out_diff_f1,u_in_diff_f2,u_out_diff_f2,u_in_diff_f3,u_out_diff_f3,u_in_diff_f4,u_out_diff_f4,u_in_diff_f5,...,u_in_fwindow_std,u_in_cwindow_mean,u_in_cwindow_max,u_in_cwindow_min,u_in_cwindow_std,u_in_bwindow_ewm,u_in_fwindow_ewm,u_in_cwindow_ewm,u_in_bwindow_mean_diff,u_in_bwindow_max_diff,u_in_bwindow_min_diff,u_in_fwindow_mean_diff,u_in_fwindow_max_diff,u_in_fwindow_min_diff,u_in_cwindow_mean_diff,u_in_cwindow_max_diff,u_in_cwindow_min_diff,area,area_insp,area_insp_last,u_in_max,u_in_mean,u_in_std,area_max,area_insp_max,area_mean,area_insp_mean,vibs,RC_20_10,RC_20_20,RC_20_50,RC_50_10,RC_50_20,RC_50_50,RC_5_10,RC_5_20,RC_5_50,R_20,R_5,R_50,C_10,C_20,C_50,norm_time_step,u_in_max_diff,area_max_diff,area_insp_max_diff,u_in_mean_diff,area_mean_diff,area_insp_mean_diff
0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.515625,0.0,14.648438,0.0,21.234375,0.0,26.328125,0.0,30.484375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.515625,0.0,-14.648438,0.0,-21.234375,0.0,-26.328125,0.0,-30.484375,...,10.570312,2.771484,14.648438,0.0,5.121094,0.0,28.6875,12.695312,0.0,0.0,0.0,-16.703125,-30.484375,0.0,-2.771484,-14.648438,0.0,0.0,0.0,19.484375,37.53125,9.328125,12.179688,23.8125,19.484375,17.484375,16.6875,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.0,-37.53125,-23.8125,-19.484375,-9.328125,-17.484375,-16.6875
1,0.031891,7.515625,0,0.0,0.031891,0.0,0.031891,0.031891,7.515625,2,3.757812,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.648438,0.0,21.234375,0.0,26.328125,0.0,30.484375,0.0,33.53125,0.0,7.515625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.136719,0.0,-13.71875,0.0,-18.8125,0.0,-22.96875,0.0,-26.03125,...,9.015625,5.425781,21.234375,0.0,7.796875,5.371094,32.15625,19.328125,6.261719,0.0,7.515625,-14.773438,-26.03125,0.0,2.089844,-13.71875,7.515625,0.239746,0.239746,19.484375,37.53125,9.328125,12.179688,23.8125,19.484375,17.484375,16.6875,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.012665,-30.03125,-23.578125,-19.25,-1.8125,-17.25,-16.453125
2,0.063843,14.648438,0,0.0,0.063843,0.0,0.031921,0.095703,22.171875,3,7.390625,7.515625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.234375,0.0,26.328125,0.0,30.484375,0.0,33.53125,0.0,35.71875,0.0,7.136719,0.0,14.648438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-6.578125,0.0,-11.671875,0.0,-15.835938,0.0,-18.890625,0.0,-21.0625,...,7.269531,8.710938,26.328125,0.0,10.046875,12.007812,34.71875,24.765625,10.960938,0.0,14.648438,-12.34375,-21.0625,0.0,5.9375,-11.671875,14.648438,0.70752,0.70752,19.484375,37.53125,9.328125,12.179688,23.8125,19.484375,17.484375,16.6875,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.02533,-22.890625,-23.109375,-18.78125,5.324219,-16.78125,-15.984375
3,0.095764,21.234375,0,0.0,0.095764,0.0,0.031921,0.191528,43.40625,4,10.851562,14.648438,0.0,7.515625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.328125,0.0,30.484375,0.0,33.53125,0.0,35.71875,0.0,36.96875,0.0,6.578125,0.0,13.71875,0.0,21.234375,0.0,0.0,0.0,0.0,0.0,-5.089844,0.0,-9.257812,0.0,-12.3125,0.0,-14.484375,0.0,-15.742188,...,5.5,12.523438,30.484375,0.0,11.664062,18.609375,36.34375,29.21875,14.0,0.0,21.234375,-9.484375,-15.742188,0.0,8.703125,-9.257812,21.234375,1.384766,1.384766,19.484375,37.53125,9.328125,12.179688,23.8125,19.484375,17.484375,16.6875,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.037994,-16.3125,-22.421875,-18.109375,11.90625,-16.109375,-15.304688
4,0.127686,26.328125,0,0.0,0.127686,0.0,0.031891,0.319092,69.6875,5,13.945312,21.234375,0.0,14.648438,0.0,7.515625,0.0,0.0,0.0,0.0,0.0,30.484375,0.0,33.53125,0.0,35.71875,0.0,36.96875,0.0,37.53125,0.0,5.089844,0.0,11.671875,0.0,18.8125,0.0,26.328125,0.0,0.0,0.0,-4.164062,0.0,-7.226562,0.0,-9.398438,0.0,-10.648438,0.0,-11.21875,...,3.955078,16.71875,33.53125,0.0,12.414062,24.125,37.21875,32.59375,14.703125,0.0,26.328125,-7.109375,-11.21875,0.0,9.601562,-7.226562,26.328125,2.224609,2.224609,19.484375,37.53125,9.328125,12.179688,23.8125,19.484375,17.484375,16.6875,4,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0.050659,-11.21875,-21.578125,-17.265625,17.0,-15.265625,-14.46875


In [10]:
test.to_csv(cachedir / "test-10fold-debugFalse.csv", index=False)
test.iloc[: 1000 * 80].to_csv(cachedir / "test-10fold-debugTrue.csv", index=False)