In [1]:
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from pathlib import Path
from datetime import datetime
import datetime

# from utils.eda import plot_corr_heatmap, desc_correlation

pd.options.display.max_rows = None
pd.options.display.max_columns = None

## *Load Data*

In [2]:
RAW_DATA_PATH = Path("../data/raw/")

df_tr = pd.read_csv(RAW_DATA_PATH / "anomaly_train1.csv", parse_dates=['date'])
df_acch = pd.read_csv(RAW_DATA_PATH / "accumulation_hour1.csv", parse_dates=['date'])
df_cooler = pd.read_csv(RAW_DATA_PATH / "cooler.csv")
df_power = pd.read_csv(RAW_DATA_PATH / "power.csv")

# generate primary key
df_tr["oven_layer_id"] = df_tr.apply(lambda x: x.oven_id + "_" + str(x.layer_id), axis=1)
df_acch["oven_layer_id"] = df_acch.apply(lambda x: x.oven_id + "_" + str(x.layer_id), axis=1)

# split lamp ids
df_tr["lamp_ids"] = df_tr["lamp_id"].apply(lambda x: x.split("_"))
df_tr.head()

# tables & names
dfs = [df_tr, df_acch, df_cooler, df_power]
names = ["df_tr", "df_acch", "df_cooler", "df_power"]

oven_ids = df_acch["oven_id"].unique().tolist()


In [4]:
df_tr.head()

Unnamed: 0,date,oven_id,layer_id,lamp_id,anomaly_accumulation_hour,anomaly_total_number,oven_layer_id,lamp_ids
0,2021-12-27,1B0,5,26_49,5116,2,1B0_5,"[26, 49]"
1,2021-12-27,1C0,3,45_91,4699,2,1C0_3,"[45, 91]"
2,2021-12-27,1D0,14,64,3241,1,1D0_14,[64]
3,2021-12-27,1E0,1,96,4138,1,1E0_1,[96]
4,2021-12-27,1E0,8,51,3818,1,1E0_8,[51]


## *Extend DataFrame*

In [None]:
new = []
for i, row in df_tr.iterrows():
    new_row = row.copy()
    for id in row.lamp_ids:
        new_row["lamp_id"] = int(id)
        new.append(new_row.copy())


df_ex = pd.DataFrame(new)
df_ex.head()

Unnamed: 0,date,oven_id,layer_id,lamp_id,anomaly_accumulation_hour,anomaly_total_number,oven_layer_id,lamp_ids
0,2021-12-27,1B0,5,26,5116,2,1B0_5,"[26, 49]"
0,2021-12-27,1B0,5,49,5116,2,1B0_5,"[26, 49]"
1,2021-12-27,1C0,3,45,4699,2,1C0_3,"[45, 91]"
1,2021-12-27,1C0,3,91,4699,2,1C0_3,"[45, 91]"
2,2021-12-27,1D0,14,64,3241,1,1D0_14,[64]


In [6]:

new = []
for i, date in enumerate(df_tr.date.unique()):
    for oid in oven_ids:        
        for lid in range(1, 20):
            for lpid in range(1, 123):
                new.append([date, oid, lid, lpid])

df_train = pd.DataFrame(new, columns=["date", "oven_id", "layer_id", "lamp_id"])
display(df_train.head())
df_train.shape


Unnamed: 0,date,oven_id,layer_id,lamp_id
0,2021-12-27,1B0,1,1
1,2021-12-27,1B0,1,2
2,2021-12-27,1B0,1,3
3,2021-12-27,1B0,1,4
4,2021-12-27,1B0,1,5


(880840, 4)

In [7]:
df_train.dtypes, df_ex.dtypes

(date        datetime64[ns]
 oven_id             object
 layer_id             int64
 lamp_id              int64
 dtype: object,
 date                         datetime64[ns]
 oven_id                              object
 layer_id                              int64
 lamp_id                               int64
 anomaly_accumulation_hour             int64
 anomaly_total_number                  int64
 oven_layer_id                        object
 lamp_ids                             object
 dtype: object)

In [8]:
df_train = pd.merge(df_train, df_ex[["date", "oven_id", "layer_id", "lamp_id", "anomaly_accumulation_hour"]], how="outer", on=["date", "oven_id", "layer_id", "lamp_id"])
df_train["label"] = df_train["anomaly_accumulation_hour"].notna().astype(int)
df_train.head(10)


Unnamed: 0,date,oven_id,layer_id,lamp_id,anomaly_accumulation_hour,label
0,2021-12-27,1B0,1,1,,0
1,2021-12-27,1B0,1,2,,0
2,2021-12-27,1B0,1,3,,0
3,2021-12-27,1B0,1,4,,0
4,2021-12-27,1B0,1,5,,0
5,2021-12-27,1B0,1,6,,0
6,2021-12-27,1B0,1,7,,0
7,2021-12-27,1B0,1,8,,0
8,2021-12-27,1B0,1,9,,0
9,2021-12-27,1B0,1,10,,0


In [9]:
df_train[["date", "oven_id", "layer_id", "lamp_id", "label"]].head()
df_tr[["date", "oven_id", "layer_id", "anomaly_accumulation_hour"]].head()

Unnamed: 0,date,oven_id,layer_id,anomaly_accumulation_hour
0,2021-12-27,1B0,5,5116
1,2021-12-27,1C0,3,4699
2,2021-12-27,1D0,14,3241
3,2021-12-27,1E0,1,4138
4,2021-12-27,1E0,8,3818


In [10]:
# Extend anomaly_accumulation_hour to oven layer
df_train = pd.merge(df_train[["date", "oven_id", "layer_id", "lamp_id", "label"]], df_tr[["date", "oven_id", "layer_id", "anomaly_accumulation_hour"]], how="outer", on=["date", "oven_id", "layer_id"])
df_train.head()

Unnamed: 0,date,oven_id,layer_id,lamp_id,label,anomaly_accumulation_hour
0,2021-12-27,1B0,1,1,0,
1,2021-12-27,1B0,1,2,0,
2,2021-12-27,1B0,1,3,0,
3,2021-12-27,1B0,1,4,0,
4,2021-12-27,1B0,1,5,0,


In [11]:
df_power["lower_bound"] = df_power["accumulation_hour"].apply(lambda x: int(x.split("-")[0]))
df_power["upper_bound"] = df_power["accumulation_hour"].apply(lambda x: int(x.split("-")[1]))


df_power.head()

Unnamed: 0,item,accumulation_hour,power_setup(other_lamp),power_setup(lamp_1_2_60_61_62_63_121_122),lower_bound,upper_bound
0,1,0-50,35.0,39.0,0,50
1,2,51-100,36.0,40.0,51,100
2,3,101-150,37.0,41.0,101,150
3,4,151-200,38.0,42.0,151,200
4,5,201-300,39.0,43.0,201,300


In [12]:
# Add feature: Power
s1 = set([1, 2, 60, 61, 62, 63, 121, 122])
s2 = set([i for i in range(1, 123)]).difference(s1)


In [13]:
hr2power_s1 = df_power[["accumulation_hour", "power_setup(lamp_1_2_60_61_62_63_121_122)"]].set_index("accumulation_hour").to_dict()["power_setup(lamp_1_2_60_61_62_63_121_122)"]
hr2power_s2 = df_power[["accumulation_hour", "power_setup(other_lamp)"]].set_index("accumulation_hour").to_dict()["power_setup(other_lamp)"]


In [14]:
# drop rows with NaN `anomaly_accumulation_hour`
df_train = df_train[df_train["anomaly_accumulation_hour"].notna()]
df_train.shape

(43066, 6)

In [15]:

power = []
for i, row in df_train.iterrows():
    acch = row.anomaly_accumulation_hour
    if acch != acch:
            power.append(np.nan)
    else:
        xrange = ""
        for lb, ub in zip(df_power["lower_bound"], df_power["upper_bound"]):
            if acch >= lb and acch <= ub:
                xrange = f"{lb}-{ub}"
                if row.lamp_id in s1:
                    power.append(hr2power_s1[xrange])
                else:
                    power.append(hr2power_s2[xrange])
                break
        if xrange == "":
            print(f"{i}: {acch}")

df_train["power"] = power

In [16]:
len(df_train)

43066

In [17]:
(df_train["anomaly_accumulation_hour"] >9999).sum()

0

In [19]:

df_train.head(10)

Unnamed: 0,date,oven_id,layer_id,lamp_id,label,anomaly_accumulation_hour,power
488,2021-12-27,1B0,5,1,0,5116.0,63.5
489,2021-12-27,1B0,5,2,0,5116.0,63.5
490,2021-12-27,1B0,5,3,0,5116.0,59.5
491,2021-12-27,1B0,5,4,0,5116.0,59.5
492,2021-12-27,1B0,5,5,0,5116.0,59.5
493,2021-12-27,1B0,5,6,0,5116.0,59.5
494,2021-12-27,1B0,5,7,0,5116.0,59.5
495,2021-12-27,1B0,5,8,0,5116.0,59.5
496,2021-12-27,1B0,5,9,0,5116.0,59.5
497,2021-12-27,1B0,5,10,0,5116.0,59.5


In [20]:
df_train.notna().sum()

date                         43066
oven_id                      43066
layer_id                     43066
lamp_id                      43066
label                        43066
anomaly_accumulation_hour    43066
power                        43066
dtype: int64

In [21]:
df_cooler_T = df_cooler.T
df_cooler_T.columns = df_cooler_T.iloc[0, :]
df_cooler_T = df_cooler_T.iloc[1:, :]#.to_dict()
df_cooler_T = df_cooler_T.reset_index().rename(columns={"index": "oven_id"}) # oven2feat
df_cooler_T

cooler_id,oven_id,Slot1_water_volume,Slot2_water_volume,Slot3_water_volume,Slot4_water_volume,Slot5_water_volume,Slot6_water_volume,Slot7_water_volume,Slot8_water_volume,Slot9_water_volume,Slot10_water_volume,Slot11_water_volume,Slot12_water_volume,Slot13_water_volume,Slot14_water_volume,Slot15_water_volume,Slot16_water_volume,Slot17_water_volume,Slot18_water_volume,Slot19_water_volume,Slot20_water_volume,Slot1-10_in_temperature,Slot1-10_out_temperature,Slot11-20_in_temperature,Slot11-20_out_temperature,S01_A_temperature,S01_B_temperature,S02_A_temperature,S02_B_temperature,S03_A_temperature,S03_B_temperature,S04_A_temperature,S04_B_temperature,S05_A_temperature,S05_B_temperature,S06_A_temperature,S06_B_temperature,S07_A_temperature,S07_B_temperature,S08_A_temperature,S08_B_temperature,S09_A_temperature,S09_B_temperature,S10_A_temperature,S10_B_temperature,S11_A_temperature,S11_B_temperature,S12_A_temperature,S12_B_temperature,S13_A_temperature,S13_B_temperature,S14_A_temperature,S14_B_temperature,S15_A_temperature,S15_B_temperature,S16_A_temperature,S16_B_temperature,S17_A_temperature,S17_B_temperature,S18_A_temperature,S18_B_temperature,S19_A_temperature,S19_B_temperature,S20_A_temperature,S20_B_temperature
0,1B0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
1,1C0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,10.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,22.8,25.5,23.4,24.1,24.2,23.0,23.1,24.0,24.9,24.3,24.0,24.9,24.5,24.3,25.3,25.4,24.3,25.2,24.7,25.2,24.7,25.2,25.6,25.4,25.2,25.9,25.2,25.0,29.2,27.4,28.1,28.1,30.5,27.8,28.0,27.4,26.6,25.3,27.1,26.0,27.2,25.0,,
2,1D0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,22.6,23.5,19.5,23.6,23.8,23.8,22.9,23.6,24.0,24.4,24.1,24.8,25.6,24.4,25.0,25.0,25.2,25.6,25.7,25.0,24.5,24.4,27.4,26.2,26.3,26.6,27.4,26.4,27.5,26.7,26.5,26.7,26.6,27.3,25.4,26.0,27.1,26.9,25.9,26.9,27.5,26.8,,
3,1E0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,19.7,24.0,19.8,23.8,24.9,24.0,24.0,26.0,24.9,25.2,25.4,24.6,26.1,25.4,25.4,25.5,25.0,26.5,26.0,25.7,25.6,26.1,25.1,24.9,25.5,26.2,25.6,25.5,25.6,27.0,25.5,25.2,26.6,25.8,26.0,26.4,25.7,25.2,25.8,25.5,25.2,23.8,,
4,1G0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,21.0,27.1,22.0,30.1,26.6,25.9,25.4,25.7,24.8,25.7,25.7,25.0,25.6,26.0,26.3,25.9,26.1,27.7,27.1,28.2,28.0,28.7,29.5,28.8,29.3,28.4,29.4,28.2,28.9,29.9,29.3,27.2,28.5,27.5,27.9,26.7,28.7,29.3,29.5,26.9,29.5,27.0,,
5,2B0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.1,23.3,21.0,24.5,22.2,22.6,22.6,22.4,23.4,23.5,23.1,22.4,24.0,23.2,23.0,23.0,24.1,24.4,23.1,24.3,23.6,24.2,22.7,23.8,23.5,24.0,22.9,23.9,23.3,23.1,22.7,23.5,22.7,22.7,22.9,22.6,22.7,22.7,22.6,22.7,22.9,25.9,,
6,2C0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,19.5,24.4,19.5,25.1,24.3,25.3,23.5,24.6,24.4,24.1,23.6,24.6,23.9,24.5,24.2,24.6,24.3,24.6,24.1,24.7,24.9,24.4,25.0,25.0,25.0,24.9,25.1,25.2,25.2,25.3,25.2,24.5,25.0,24.9,24.7,24.6,25.0,24.9,24.9,24.5,23.6,24.5,,
7,2D0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,19.6,24.6,19.9,25.3,22.8,24.3,23.0,23.5,23.5,23.6,23.5,23.7,23.5,23.8,23.8,24.5,23.4,24.0,23.7,23.6,23.7,23.7,23.9,23.2,24.4,25.1,24.0,24.6,25.3,24.9,25.0,25.9,25.0,25.9,24.3,25.3,25.0,26.0,24.2,26.0,25.1,25.2,,
8,2E0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.9,23.7,21.1,25.1,22.3,24.2,21.6,22.9,23.2,23.3,22.5,24.0,24.1,23.3,22.9,23.8,24.2,23.4,23.6,24.2,23.7,23.3,23.9,23.8,24.1,23.1,24.5,23.3,24.2,23.3,24.3,23.8,24.1,24.3,24.4,24.6,23.8,24.9,23.7,24.5,24.4,24.3,,
9,2G0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,21.2,25.2,21.1,26.3,22.7,24.4,22.6,22.8,23.4,23.0,23.5,23.2,23.6,23.2,23.3,23.9,23.8,24.0,23.6,24.3,23.8,23.9,24.7,24.2,24.7,24.2,24.8,24.4,24.2,25.2,24.6,25.5,23.3,24.6,24.0,24.6,23.5,24.4,22.7,24.0,24.9,24.8,,


In [22]:
df_train = pd.merge(df_train, df_cooler_T, how="outer", on=["oven_id"])
df_train.head()

Unnamed: 0,date,oven_id,layer_id,lamp_id,label,anomaly_accumulation_hour,power,Slot1_water_volume,Slot2_water_volume,Slot3_water_volume,Slot4_water_volume,Slot5_water_volume,Slot6_water_volume,Slot7_water_volume,Slot8_water_volume,Slot9_water_volume,Slot10_water_volume,Slot11_water_volume,Slot12_water_volume,Slot13_water_volume,Slot14_water_volume,Slot15_water_volume,Slot16_water_volume,Slot17_water_volume,Slot18_water_volume,Slot19_water_volume,Slot20_water_volume,Slot1-10_in_temperature,Slot1-10_out_temperature,Slot11-20_in_temperature,Slot11-20_out_temperature,S01_A_temperature,S01_B_temperature,S02_A_temperature,S02_B_temperature,S03_A_temperature,S03_B_temperature,S04_A_temperature,S04_B_temperature,S05_A_temperature,S05_B_temperature,S06_A_temperature,S06_B_temperature,S07_A_temperature,S07_B_temperature,S08_A_temperature,S08_B_temperature,S09_A_temperature,S09_B_temperature,S10_A_temperature,S10_B_temperature,S11_A_temperature,S11_B_temperature,S12_A_temperature,S12_B_temperature,S13_A_temperature,S13_B_temperature,S14_A_temperature,S14_B_temperature,S15_A_temperature,S15_B_temperature,S16_A_temperature,S16_B_temperature,S17_A_temperature,S17_B_temperature,S18_A_temperature,S18_B_temperature,S19_A_temperature,S19_B_temperature,S20_A_temperature,S20_B_temperature
0,2021-12-27,1B0,5,1,0,5116.0,63.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
1,2021-12-27,1B0,5,2,0,5116.0,63.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
2,2021-12-27,1B0,5,3,0,5116.0,59.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
3,2021-12-27,1B0,5,4,0,5116.0,59.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
4,2021-12-27,1B0,5,5,0,5116.0,59.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,


In [23]:
df_train.head()

Unnamed: 0,date,oven_id,layer_id,lamp_id,label,anomaly_accumulation_hour,power,Slot1_water_volume,Slot2_water_volume,Slot3_water_volume,Slot4_water_volume,Slot5_water_volume,Slot6_water_volume,Slot7_water_volume,Slot8_water_volume,Slot9_water_volume,Slot10_water_volume,Slot11_water_volume,Slot12_water_volume,Slot13_water_volume,Slot14_water_volume,Slot15_water_volume,Slot16_water_volume,Slot17_water_volume,Slot18_water_volume,Slot19_water_volume,Slot20_water_volume,Slot1-10_in_temperature,Slot1-10_out_temperature,Slot11-20_in_temperature,Slot11-20_out_temperature,S01_A_temperature,S01_B_temperature,S02_A_temperature,S02_B_temperature,S03_A_temperature,S03_B_temperature,S04_A_temperature,S04_B_temperature,S05_A_temperature,S05_B_temperature,S06_A_temperature,S06_B_temperature,S07_A_temperature,S07_B_temperature,S08_A_temperature,S08_B_temperature,S09_A_temperature,S09_B_temperature,S10_A_temperature,S10_B_temperature,S11_A_temperature,S11_B_temperature,S12_A_temperature,S12_B_temperature,S13_A_temperature,S13_B_temperature,S14_A_temperature,S14_B_temperature,S15_A_temperature,S15_B_temperature,S16_A_temperature,S16_B_temperature,S17_A_temperature,S17_B_temperature,S18_A_temperature,S18_B_temperature,S19_A_temperature,S19_B_temperature,S20_A_temperature,S20_B_temperature
0,2021-12-27,1B0,5,1,0,5116.0,63.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
1,2021-12-27,1B0,5,2,0,5116.0,63.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
2,2021-12-27,1B0,5,3,0,5116.0,59.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
3,2021-12-27,1B0,5,4,0,5116.0,59.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,
4,2021-12-27,1B0,5,5,0,5116.0,59.5,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,20.3,24.8,20.0,24.6,23.3,25.2,24.2,22.4,23.5,23.1,24.8,25.7,25.1,24.5,24.8,23.8,25.1,25.1,25.0,24.6,27.9,26.9,26.1,25.9,29.6,25.9,26.9,25.9,27.7,27.0,27.7,25.8,26.2,27.0,26.9,26.6,30.9,26.8,25.6,27.2,26.9,27.2,,


In [None]:
# Training data
# df_select[["oven_id", "layer_id", "lamp_id", "anomaly_accumulation_hour", "power", "label"]]

In [24]:
df_train.to_csv("train_v1.csv")

In [25]:
df_train.tail()

Unnamed: 0,date,oven_id,layer_id,lamp_id,label,anomaly_accumulation_hour,power,Slot1_water_volume,Slot2_water_volume,Slot3_water_volume,Slot4_water_volume,Slot5_water_volume,Slot6_water_volume,Slot7_water_volume,Slot8_water_volume,Slot9_water_volume,Slot10_water_volume,Slot11_water_volume,Slot12_water_volume,Slot13_water_volume,Slot14_water_volume,Slot15_water_volume,Slot16_water_volume,Slot17_water_volume,Slot18_water_volume,Slot19_water_volume,Slot20_water_volume,Slot1-10_in_temperature,Slot1-10_out_temperature,Slot11-20_in_temperature,Slot11-20_out_temperature,S01_A_temperature,S01_B_temperature,S02_A_temperature,S02_B_temperature,S03_A_temperature,S03_B_temperature,S04_A_temperature,S04_B_temperature,S05_A_temperature,S05_B_temperature,S06_A_temperature,S06_B_temperature,S07_A_temperature,S07_B_temperature,S08_A_temperature,S08_B_temperature,S09_A_temperature,S09_B_temperature,S10_A_temperature,S10_B_temperature,S11_A_temperature,S11_B_temperature,S12_A_temperature,S12_B_temperature,S13_A_temperature,S13_B_temperature,S14_A_temperature,S14_B_temperature,S15_A_temperature,S15_B_temperature,S16_A_temperature,S16_B_temperature,S17_A_temperature,S17_B_temperature,S18_A_temperature,S18_B_temperature,S19_A_temperature,S19_B_temperature,S20_A_temperature,S20_B_temperature
43061,2022-04-22,2G0,10,118,0,4123.0,55.5,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,21.2,25.2,21.1,26.3,22.7,24.4,22.6,22.8,23.4,23.0,23.5,23.2,23.6,23.2,23.3,23.9,23.8,24.0,23.6,24.3,23.8,23.9,24.7,24.2,24.7,24.2,24.8,24.4,24.2,25.2,24.6,25.5,23.3,24.6,24.0,24.6,23.5,24.4,22.7,24.0,24.9,24.8,,
43062,2022-04-22,2G0,10,119,0,4123.0,55.5,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,21.2,25.2,21.1,26.3,22.7,24.4,22.6,22.8,23.4,23.0,23.5,23.2,23.6,23.2,23.3,23.9,23.8,24.0,23.6,24.3,23.8,23.9,24.7,24.2,24.7,24.2,24.8,24.4,24.2,25.2,24.6,25.5,23.3,24.6,24.0,24.6,23.5,24.4,22.7,24.0,24.9,24.8,,
43063,2022-04-22,2G0,10,120,0,4123.0,55.5,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,21.2,25.2,21.1,26.3,22.7,24.4,22.6,22.8,23.4,23.0,23.5,23.2,23.6,23.2,23.3,23.9,23.8,24.0,23.6,24.3,23.8,23.9,24.7,24.2,24.7,24.2,24.8,24.4,24.2,25.2,24.6,25.5,23.3,24.6,24.0,24.6,23.5,24.4,22.7,24.0,24.9,24.8,,
43064,2022-04-22,2G0,10,121,0,4123.0,59.5,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,21.2,25.2,21.1,26.3,22.7,24.4,22.6,22.8,23.4,23.0,23.5,23.2,23.6,23.2,23.3,23.9,23.8,24.0,23.6,24.3,23.8,23.9,24.7,24.2,24.7,24.2,24.8,24.4,24.2,25.2,24.6,25.5,23.3,24.6,24.0,24.6,23.5,24.4,22.7,24.0,24.9,24.8,,
43065,2022-04-22,2G0,10,122,0,4123.0,59.5,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,21.2,25.2,21.1,26.3,22.7,24.4,22.6,22.8,23.4,23.0,23.5,23.2,23.6,23.2,23.3,23.9,23.8,24.0,23.6,24.3,23.8,23.9,24.7,24.2,24.7,24.2,24.8,24.4,24.2,25.2,24.6,25.5,23.3,24.6,24.0,24.6,23.5,24.4,22.7,24.0,24.9,24.8,,


In [26]:
df_train.isna().sum()

date                             0
oven_id                          0
layer_id                         0
lamp_id                          0
label                            0
anomaly_accumulation_hour        0
power                            0
Slot1_water_volume               0
Slot2_water_volume               0
Slot3_water_volume               0
Slot4_water_volume               0
Slot5_water_volume               0
Slot6_water_volume               0
Slot7_water_volume               0
Slot8_water_volume               0
Slot9_water_volume               0
Slot10_water_volume              0
Slot11_water_volume              0
Slot12_water_volume              0
Slot13_water_volume              0
Slot14_water_volume              0
Slot15_water_volume              0
Slot16_water_volume              0
Slot17_water_volume              0
Slot18_water_volume              0
Slot19_water_volume              0
Slot20_water_volume              0
Slot1-10_in_temperature          0
Slot1-10_out_tempera

In [28]:
df_train.shape

(43066, 71)

# Notes
1. `lamp_id` 數量與 `anomaly_total_number` 不同的sample修正為以`lamp_id` 數量為準 <br>
2. 修正5/4異常紀錄中 `anomaly_accumulation_hour` > `accumulation_hour`數值的sample(共3筆), 兩欄位皆填入平均 
2. 刪除 `S20_A_temperature` 和 `S20_B_temperature` 兩項特徵


## Additional Preprocess for train2
- `accumulation_hour` == 0的層 (未運行過的爐層）不會出現異常燈管