# TPS-Aug-2022

In [1]:
class Config:
    NB = '211'
    dataset_NB = '109'

    raw_data_dir = '../data/raw/'
    processed_data_dir = '../data/processed/'
    interim_dir = '../data/interim/'
    submission_dir = '../data/submission/'

    random_seed = 42
    n_folds = 5

    row_id = 'id'
    target = 'failure'

## Import libralies

In [2]:
import os
import gc
import warnings
warnings.filterwarnings('ignore')

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
import itertools

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set(style='white', context='notebook', palette='deep')

In [3]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

plotly_template = dict(
    layout=go.Layout(
        template='plotly_dark',
        font=dict(
            family="Franklin Gothic",
            size=12
        ),
        height=500,
        width=1000,
    )
)


color_palette = {
    'Bin': ['#016CC9','#E876A3'],
    'Cat5': ['#E876A3', '#E0A224', '#63B70D', '#6BCFF6', '#13399E'],
}

In [4]:
import random
import joblib
import itertools
from itertools import combinations

from sklearn.model_selection import StratifiedKFold, GroupKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import roc_auc_score, roc_curve, auc
from lightgbm import LGBMClassifier, early_stopping

## Load and check data

In [5]:
df_train = pd.read_pickle(Config.processed_data_dir + f'nb{Config.dataset_NB}_train.pkl', compression='zip')
df_test = pd.read_pickle(Config.processed_data_dir + f'nb{Config.dataset_NB}_test.pkl', compression='zip')

submission = pd.read_csv(Config.raw_data_dir + 'sample_submission.csv', header=None)

df_train.shape

(26570, 341)

In [6]:
df_train.head()

Unnamed: 0,id,product_code,loading,attribute_2,attribute_3,measurement_0,measurement_1,measurement_2,measurement_3,measurement_4,measurement_5,measurement_6,measurement_7,measurement_8,measurement_9,measurement_10,measurement_11,measurement_12,measurement_13,measurement_14,measurement_15,measurement_16,measurement_17,failure,missing_loading,missing_measurement_3,missing_measurement_4,missing_measurement_5,missing_measurement_9,ohe0_5,ohe0_7,ohe1_5,ohe1_6,ohe1_7,ohe1_8,mes_0x1,mes_0-1,mes_0x2,mes_0-2,mes_0x3,mes_0-3,mes_0x4,mes_0-4,mes_0x5,mes_0-5,mes_0x6,mes_0-6,mes_0x7,mes_0-7,mes_0x8,mes_0-8,mes_0x9,mes_0-9,mes_0x10,mes_0-10,mes_0x11,mes_0-11,mes_0x12,mes_0-12,mes_0x13,mes_0-13,mes_0x14,mes_0-14,mes_0x15,mes_0-15,mes_0x16,mes_0-16,mes_0x17,mes_0-17,mes_1x2,mes_1-2,mes_1x3,mes_1-3,mes_1x4,mes_1-4,mes_1x5,mes_1-5,mes_1x6,mes_1-6,mes_1x7,mes_1-7,mes_1x8,mes_1-8,mes_1x9,mes_1-9,mes_1x10,mes_1-10,mes_1x11,mes_1-11,mes_1x12,mes_1-12,mes_1x13,mes_1-13,mes_1x14,mes_1-14,mes_1x15,mes_1-15,mes_1x16,mes_1-16,mes_1x17,mes_1-17,mes_2x3,mes_2-3,mes_2x4,mes_2-4,mes_2x5,mes_2-5,mes_2x6,mes_2-6,mes_2x7,mes_2-7,mes_2x8,mes_2-8,mes_2x9,mes_2-9,mes_2x10,mes_2-10,mes_2x11,mes_2-11,mes_2x12,mes_2-12,mes_2x13,mes_2-13,mes_2x14,mes_2-14,mes_2x15,mes_2-15,mes_2x16,mes_2-16,mes_2x17,mes_2-17,mes_3x4,mes_3-4,mes_3x5,mes_3-5,mes_3x6,mes_3-6,mes_3x7,mes_3-7,mes_3x8,mes_3-8,mes_3x9,mes_3-9,mes_3x10,mes_3-10,mes_3x11,mes_3-11,mes_3x12,mes_3-12,mes_3x13,mes_3-13,mes_3x14,mes_3-14,mes_3x15,mes_3-15,mes_3x16,mes_3-16,mes_3x17,mes_3-17,mes_4x5,mes_4-5,mes_4x6,mes_4-6,mes_4x7,mes_4-7,mes_4x8,mes_4-8,mes_4x9,mes_4-9,mes_4x10,mes_4-10,mes_4x11,mes_4-11,mes_4x12,mes_4-12,mes_4x13,mes_4-13,mes_4x14,mes_4-14,mes_4x15,mes_4-15,mes_4x16,mes_4-16,mes_4x17,mes_4-17,mes_5x6,mes_5-6,mes_5x7,mes_5-7,mes_5x8,mes_5-8,mes_5x9,mes_5-9,mes_5x10,mes_5-10,mes_5x11,mes_5-11,mes_5x12,mes_5-12,mes_5x13,mes_5-13,mes_5x14,mes_5-14,mes_5x15,mes_5-15,mes_5x16,mes_5-16,mes_5x17,mes_5-17,mes_6x7,mes_6-7,mes_6x8,mes_6-8,mes_6x9,mes_6-9,mes_6x10,mes_6-10,mes_6x11,mes_6-11,mes_6x12,mes_6-12,mes_6x13,mes_6-13,mes_6x14,mes_6-14,mes_6x15,mes_6-15,mes_6x16,mes_6-16,mes_6x17,mes_6-17,mes_7x8,mes_7-8,mes_7x9,mes_7-9,mes_7x10,mes_7-10,mes_7x11,mes_7-11,mes_7x12,mes_7-12,mes_7x13,mes_7-13,mes_7x14,mes_7-14,mes_7x15,mes_7-15,mes_7x16,mes_7-16,mes_7x17,mes_7-17,mes_8x9,mes_8-9,mes_8x10,mes_8-10,mes_8x11,mes_8-11,mes_8x12,mes_8-12,mes_8x13,mes_8-13,mes_8x14,mes_8-14,mes_8x15,mes_8-15,mes_8x16,mes_8-16,mes_8x17,mes_8-17,mes_9x10,mes_9-10,mes_9x11,mes_9-11,mes_9x12,mes_9-12,mes_9x13,mes_9-13,mes_9x14,mes_9-14,mes_9x15,mes_9-15,mes_9x16,mes_9-16,mes_9x17,mes_9-17,mes_10x11,mes_10-11,mes_10x12,mes_10-12,mes_10x13,mes_10-13,mes_10x14,mes_10-14,mes_10x15,mes_10-15,mes_10x16,mes_10-16,mes_10x17,mes_10-17,mes_11x12,mes_11-12,mes_11x13,mes_11-13,mes_11x14,mes_11-14,mes_11x15,mes_11-15,mes_11x16,mes_11-16,mes_11x17,mes_11-17,mes_12x13,mes_12-13,mes_12x14,mes_12-14,mes_12x15,mes_12-15,mes_12x16,mes_12-16,mes_12x17,mes_12-17,mes_13x14,mes_13-14,mes_13x15,mes_13-15,mes_13x16,mes_13-16,mes_13x17,mes_13-17,mes_14x15,mes_14-15,mes_14x16,mes_14-16,mes_14x17,mes_14-17,mes_15x16,mes_15-16,mes_15x17,mes_15-17,mes_16x17,mes_16-17
0,0,A,80.1,9,5,7.0,8.0,11.0,18.04,12.518,15.748,19.292,11.739,20.155,10.672,15.859,17.594,15.193,15.029,14.537333,13.034,14.684,764.1,0.0,False,False,False,False,False,0.0,1.0,0.0,0.0,0.0,1.0,56.0,1.0,77.0,4.0,126.28,11.04,87.626,5.518,110.236,8.748,135.044,12.292,82.173,4.739,141.085,13.155,74.704,3.672,111.013,8.859,123.158,10.594,106.351,8.193,105.203,8.029,101.761333,7.537333,91.238,6.034,102.788,7.684,5348.7,757.1,88.0,3.0,144.32,10.04,100.144,4.518,125.984,7.748,154.336,11.292,93.912,3.739,161.24,12.155,85.376,2.672,126.872,7.859,140.752,9.594,121.544,7.193,120.232,7.029,116.298667,6.537333,104.272,5.034,117.472,6.684,6112.8,756.1,198.44,7.04,137.698,1.518,173.228,4.748,212.212,8.292,129.129,0.739,221.705,9.155,117.392,0.328,174.449,4.859,193.534,6.594,167.123,4.193,165.319,4.029,159.910667,3.537333,143.374,2.034,161.524,3.684,8405.1,753.1,225.82472,5.522,284.09392,2.292,348.02768,1.252,211.77156,6.301,363.5962,2.115,192.52288,7.368,286.09636,2.181,317.39576,0.446,274.08172,2.847,271.12316,3.011,262.253493,3.502667,235.13336,5.006,264.89936,3.356,13784.364,746.06,197.133464,3.23,241.497256,6.774,146.948802,0.779,252.30029,7.637,133.592096,1.846,198.522962,3.341,220.241692,5.076,190.185974,2.675,188.133022,2.511,181.978339,2.019333,163.159612,0.516,183.814312,2.166,9565.0038,751.582,303.810416,3.544,184.865772,4.009,317.40094,4.407,168.062656,5.076,249.747532,0.111,277.070312,1.846,239.259364,0.555,236.676692,0.719,228.933925,1.210667,205.259432,2.714,231.243632,1.064,12033.0468,748.352,226.468788,7.553,388.83026,0.863,205.884224,8.62,305.951828,3.433,339.423448,1.698,293.103356,4.099,289.939468,4.263,280.454235,4.754667,251.451928,6.258,283.283728,4.608,14741.0172,744.808,236.599545,8.416,125.278608,1.067,186.168801,4.12,206.535966,5.855,178.350627,3.454,176.425431,3.29,170.653756,2.798333,153.006126,1.295,172.375476,2.945,8969.7699,752.361,215.09416,9.483,319.638145,4.296,354.60707,2.561,306.214915,4.962,302.909495,5.126,292.999953,5.617667,262.70027,7.121,295.95602,5.471,15400.4355,743.945,169.247248,5.187,187.763168,6.922,162.139696,4.521,160.389488,4.357,155.142421,3.865333,139.098848,2.362,156.707648,4.012,8154.4752,753.428,279.023246,1.735,240.945787,0.666,238.344911,0.83,230.547569,1.321667,206.706206,2.825,232.873556,1.175,12117.8619,748.241,267.305642,2.401,264.420226,2.565,255.769843,3.056667,229.320196,4.56,258.350296,2.91,13443.5754,746.506,228.335597,0.164,220.865705,0.655667,198.025562,2.159,223.094012,0.509,11608.9713,748.907,218.481583,0.491667,195.887986,1.995,220.685836,0.345,11483.6589,749.071,189.479603,1.503333,213.466203,0.146667,11107.9764,749.562667,191.391256,1.65,9959.2794,751.066,11220.0444,749.416
1,1,A,84.89,9,5,14.0,3.0,11.0,18.213,11.54,17.717,17.893,12.748,17.889,12.448,17.947,17.915,11.755,14.732,15.425,14.395,15.631,682.057,0.0,False,False,False,False,False,0.0,1.0,0.0,0.0,0.0,1.0,42.0,11.0,154.0,3.0,254.982,4.213,161.56,2.46,248.038,3.717,250.502,3.893,178.472,1.252,250.446,3.889,174.272,1.552,251.258,3.947,250.81,3.915,164.57,2.245,206.248,0.732,215.95,1.425,201.53,0.395,218.834,1.631,9548.798,668.057,33.0,8.0,54.639,15.213,34.62,8.54,53.151,14.717,53.679,14.893,38.244,9.748,53.667,14.889,37.344,9.448,53.841,14.947,53.745,14.915,35.265,8.755,44.196,11.732,46.275,12.425,43.185,11.395,46.893,12.631,2046.171,679.057,200.343,7.213,126.94,0.54,194.887,6.717,196.823,6.893,140.228,1.748,196.779,6.889,136.928,1.448,197.417,6.947,197.065,6.915,129.305,0.755,162.052,3.732,169.675,4.425,158.345,3.395,171.941,4.631,7502.627,671.057,210.17802,6.673,322.679721,0.496,325.885209,0.32,232.179324,5.465,325.812357,0.324,226.715424,5.765,326.868711,0.266,326.285895,0.298,214.093815,6.458,268.313916,3.481,280.935525,2.788,262.176135,3.818,284.687403,2.582,12422.304141,663.844,204.45418,6.177,206.48522,6.353,147.11192,1.208,206.43906,6.349,143.64992,0.908,207.10838,6.407,206.7391,6.375,135.6527,0.215,170.00728,3.192,178.0045,3.885,166.1183,2.855,180.38174,4.091,7870.93778,670.517,317.010281,0.176,225.856316,4.969,316.939413,0.172,220.541216,5.269,317.966999,0.23,317.400055,0.198,208.263335,5.962,261.006844,2.985,273.284725,2.292,255.036215,3.322,276.934427,2.086,12084.003869,664.34,228.099964,5.145,320.087877,0.004,222.732064,5.445,321.125671,0.054,320.553095,0.022,210.332215,6.138,263.599676,3.161,275.999525,2.468,257.569735,3.498,279.685483,2.262,12204.045901,664.164,228.048972,5.141,158.687104,0.3,228.788356,5.199,228.38042,5.167,149.85274,0.993,187.803536,1.984,196.6379,2.677,183.50746,1.647,199.263988,2.883,8694.862636,669.309,222.682272,5.441,321.053883,0.058,320.481435,0.026,210.285195,6.134,263.540748,3.157,275.937825,2.464,257.512155,3.494,279.622959,2.258,12201.317673,664.168,223.404256,5.499,223.00592,5.467,146.32624,0.693,183.383936,2.284,192.0104,2.977,179.18896,1.947,194.574688,3.183,8490.245536,669.609,321.520505,0.032,210.966985,6.192,264.395204,3.215,276.832475,2.522,258.347065,3.552,280.529557,2.316,12240.876979,664.11,210.590825,6.16,263.92378,3.183,276.338875,2.49,257.886425,3.52,280.029365,2.284,12219.051155,664.142,173.17466,2.977,181.320875,3.67,169.213225,2.64,183.742405,3.876,8017.580035,670.302,227.2411,0.693,212.06714,0.337,230.275892,0.899,10048.063724,667.325,222.042875,1.03,241.108175,0.206,10520.729225,666.632,225.008245,1.236,9818.210515,667.662,10661.232967,666.426
2,2,A,82.43,9,5,12.0,1.0,11.0,18.057,11.652,16.738,18.24,12.718,18.288,12.715,15.607,19.391,13.798,16.711,18.631,14.094,17.946,663.376,0.0,False,False,False,False,False,0.0,1.0,0.0,0.0,0.0,1.0,12.0,11.0,132.0,1.0,216.684,6.057,139.824,0.348,200.856,4.738,218.88,6.24,152.616,0.718,219.456,6.288,152.58,0.715,187.284,3.607,232.692,7.391,165.576,1.798,200.532,4.711,223.572,6.631,169.128,2.094,215.352,5.946,7960.512,651.376,11.0,10.0,18.057,17.057,11.652,10.652,16.738,15.738,18.24,17.24,12.718,11.718,18.288,17.288,12.715,11.715,15.607,14.607,19.391,18.391,13.798,12.798,16.711,15.711,18.631,17.631,14.094,13.094,17.946,16.946,663.376,662.376,198.627,7.057,128.172,0.652,184.118,5.738,200.64,7.24,139.898,1.718,201.168,7.288,139.865,1.715,171.677,4.607,213.301,8.391,151.778,2.798,183.821,5.711,204.941,7.631,155.034,3.094,197.406,6.946,7297.136,652.376,210.400164,6.405,302.238066,1.319,329.35968,0.183,229.648926,5.339,330.226416,0.231,229.594755,5.342,281.815599,2.45,350.143287,1.334,249.150486,4.259,301.750527,1.346,336.419967,0.574,254.495358,3.963,324.050922,0.111,11978.580432,645.319,195.031176,5.086,212.53248,6.588,148.190136,1.066,213.091776,6.636,148.15518,1.063,181.852764,3.955,225.943932,7.739,160.774296,2.146,194.716572,5.059,217.088412,6.979,164.223288,2.442,209.106792,6.294,7729.657152,651.724,305.30112,1.502,212.873884,4.02,306.104544,1.55,212.82367,4.023,261.229966,1.131,324.566558,2.653,230.950924,2.94,279.708718,0.027,311.845678,1.893,235.905372,2.644,300.380148,1.208,11103.587488,646.638,231.97632,5.522,333.57312,0.048,231.9216,5.525,284.67168,2.633,353.69184,1.151,251.67552,4.442,304.80864,1.529,339.82944,0.391,257.07456,4.146,327.33504,0.294,12099.97824,645.136,232.586784,5.57,161.70937,0.003,198.489826,2.889,246.614738,6.673,175.482964,1.08,212.530498,3.993,236.949058,5.913,179.247492,1.376,228.237228,5.228,8436.815968,650.658,232.53192,5.573,285.420816,2.681,354.622608,1.103,252.337824,4.49,305.610768,1.577,340.723728,0.343,257.751072,4.194,328.196448,0.342,12131.820288,645.088,198.443005,2.892,246.556565,6.676,175.44157,1.083,212.480365,3.996,236.893165,5.916,179.20521,1.379,228.18339,5.231,8434.82584,650.661,302.635337,3.784,215.345386,1.809,260.808577,1.104,290.774017,3.024,219.965058,1.513,280.083222,2.339,10353.309232,647.769,267.557018,5.593,324.043001,2.68,361.273721,0.76,273.296754,5.297,347.990886,1.445,12863.524016,643.985,230.578378,2.913,257.070538,4.833,194.469012,0.296,247.618908,4.148,9153.262048,649.578,311.342641,1.92,235.524834,2.617,299.895606,1.235,11085.676336,646.665,262.585314,4.537,334.351926,0.685,12359.358256,644.745,252.930924,3.852,9349.621344,649.282,11904.945696,645.43
3,3,A,101.07,9,5,13.0,2.0,11.0,17.295,11.188,18.576,18.339,12.583,19.06,12.471,16.346,18.377,10.02,15.25,15.562,16.154,17.172,826.282,0.0,False,False,False,False,False,0.0,1.0,0.0,0.0,0.0,1.0,26.0,11.0,143.0,2.0,224.835,4.295,145.444,1.812,241.488,5.576,238.407,5.339,163.579,0.417,247.78,6.06,162.123,0.529,212.498,3.346,238.901,5.377,130.26,2.98,198.25,2.25,202.306,2.562,210.002,3.154,223.236,4.172,10741.666,813.282,22.0,9.0,34.59,15.295,22.376,9.188,37.152,16.576,36.678,16.339,25.166,10.583,38.12,17.06,24.942,10.471,32.692,14.346,36.754,16.377,20.04,8.02,30.5,13.25,31.124,13.562,32.308,14.154,34.344,15.172,1652.564,824.282,190.245,6.295,123.068,0.188,204.336,7.576,201.729,7.339,138.413,1.583,209.66,8.06,137.181,1.471,179.806,5.346,202.147,7.377,110.22,0.98,167.75,4.25,171.182,4.562,177.694,5.154,188.892,6.172,9089.102,815.282,193.49646,6.107,321.27192,1.281,317.173005,1.044,217.622985,4.712,329.6427,1.765,215.685945,4.824,282.70407,0.949,317.830215,1.082,173.2959,7.275,263.74875,2.045,269.14479,1.733,279.38343,1.141,296.98974,0.123,14290.54719,808.987,207.828288,7.388,205.176732,7.151,140.778604,1.395,213.24328,7.872,139.525548,1.283,182.879048,5.158,205.601876,7.189,112.10376,1.168,170.617,4.062,174.107656,4.374,180.730952,4.966,192.120336,5.984,9244.443016,815.094,340.665264,0.237,233.741808,5.993,354.05856,0.484,231.661296,6.105,303.643296,2.23,341.371152,0.199,186.13152,8.556,283.284,3.326,289.079712,3.014,300.076704,2.422,318.987072,1.404,15349.014432,807.706,230.759637,5.756,349.54134,0.721,228.705669,5.868,299.769294,1.993,337.015803,0.038,183.75678,8.319,279.66975,3.089,285.391518,2.777,296.248206,2.185,314.917308,1.167,15153.185598,807.943,239.83198,6.477,156.922593,0.112,205.681718,3.763,231.237791,5.794,126.08166,2.563,191.89075,2.667,195.816646,2.979,203.265782,3.571,216.075276,4.589,10397.106406,813.699,237.69726,6.589,311.55476,2.714,350.26562,0.683,190.9812,9.04,290.665,3.81,296.61172,3.498,307.89524,2.906,327.29832,1.888,15748.93492,807.222,203.850966,3.875,229.179567,5.906,124.95942,2.451,190.18275,2.779,194.073702,3.091,201.456534,3.683,214.152012,4.701,10304.562822,813.811,300.390442,2.031,163.78692,6.326,249.2765,1.096,254.376452,0.784,264.053284,0.192,280.693512,0.826,13506.405572,809.936,184.13754,8.357,280.24925,3.127,285.982874,2.815,296.862058,2.223,315.569844,1.205,15184.584314,807.905,152.805,5.23,155.93124,5.542,161.86308,6.134,172.06344,7.152,8279.34564,816.262,237.3205,0.312,246.3485,0.904,261.873,1.922,12600.8005,811.032,251.388548,0.592,267.230664,1.61,12858.600484,810.72,277.396488,1.018,13347.759428,810.128,14188.914504,809.11
4,4,A,188.06,9,5,9.0,2.0,11.0,19.346,12.95,16.99,15.746,11.306,18.093,10.337,17.082,19.932,12.428,16.182,12.76,13.153,16.412,579.885,0.0,False,False,False,False,False,0.0,1.0,0.0,0.0,0.0,1.0,18.0,7.0,99.0,2.0,174.114,10.346,116.55,3.95,152.91,7.99,141.714,6.746,101.754,2.306,162.837,9.093,93.033,1.337,153.738,8.082,179.388,10.932,111.852,3.428,145.638,7.182,114.84,3.76,118.377,4.153,147.708,7.412,5218.965,570.885,22.0,9.0,38.692,17.346,25.9,10.95,33.98,14.99,31.492,13.746,22.612,9.306,36.186,16.093,20.674,8.337,34.164,15.082,39.864,17.932,24.856,10.428,32.364,14.182,25.52,10.76,26.306,11.153,32.824,14.412,1159.77,577.885,212.806,8.346,142.45,1.95,186.89,5.99,173.206,4.746,124.366,0.306,199.023,7.093,113.707,0.663,187.902,6.082,219.252,8.932,136.708,1.428,178.002,5.182,140.36,1.76,144.683,2.153,180.532,5.412,6378.735,568.885,250.5307,6.396,328.68854,2.356,304.622116,3.6,218.725876,8.04,350.027178,1.253,199.979602,9.009,330.468372,2.264,385.604472,0.586,240.432088,6.918,313.056972,3.164,246.85496,6.586,254.457938,6.193,317.506552,2.934,11218.45521,560.539,220.0205,4.04,203.9107,2.796,146.4127,1.644,234.30435,5.143,133.86415,2.613,221.2119,4.132,258.1194,6.982,160.9426,0.522,209.5569,3.232,165.242,0.19,170.33135,0.203,212.5354,3.462,7509.51075,566.935,267.52454,1.244,192.08894,5.684,307.40007,1.103,175.62563,6.653,290.22318,0.092,338.64468,2.942,211.15172,4.562,274.93218,0.808,216.7924,4.23,223.46947,3.837,278.83988,0.578,9852.24615,562.895,178.024276,4.44,284.892378,2.347,162.766402,5.409,268.973172,1.336,313.849272,4.186,195.691288,3.318,254.801772,0.436,200.91896,2.986,207.107138,2.593,258.423352,0.666,9130.86921,564.139,204.559458,6.787,116.870122,0.969,193.129092,5.776,225.351192,8.626,140.510968,1.122,182.953692,4.876,144.26456,1.454,148.707818,1.847,185.554072,5.106,6556.17981,568.579,187.027341,7.756,309.064626,1.011,360.629676,1.839,224.859804,5.665,292.780926,1.911,230.86668,5.333,237.977229,4.94,296.942316,1.681,10491.859305,561.792,176.576634,6.745,206.037084,9.595,128.468236,2.091,167.273334,5.845,131.90012,2.423,135.962561,2.816,169.650844,6.075,5994.271245,569.548,340.478424,2.85,212.295096,4.654,276.420924,0.9,217.96632,4.322,224.679546,3.929,280.349784,0.67,9905.59557,562.803,247.714896,7.504,322.539624,3.75,254.33232,7.172,262.165596,6.779,327.123984,3.52,11558.26782,559.953,201.109896,3.754,158.58128,0.332,163.465484,0.725,203.968336,3.984,7206.81078,567.457,206.48232,3.422,212.841846,3.029,265.578984,0.23,9383.69907,563.703,167.83228,0.393,209.41712,3.652,7399.3326,567.125,215.867036,3.259,7627.227405,566.732,9517.07262,563.473


In [7]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26570 entries, 0 to 26569
Columns: 341 entries, id to mes_16-17
dtypes: bool(5), float64(332), int64(3), object(1)
memory usage: 68.2+ MB


In [8]:
def seed_everything(seed):

    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [9]:
# Get feature list
features = [col for col in df_train.columns if col not in [Config.row_id, Config.target, 'product_code']]

In [10]:
# 約40分

# Create a numpy array to store test predictions
test_predictions = np.zeros(len(df_test))

# Create a numpy array to store out of folds predictions
oof_predictions = np.zeros(len(df_train))

feature_importance_df = pd.DataFrame(index=features)
y_valids, val_preds =[],[]
amex_scores = []

kfold = GroupKFold(n_splits=Config.n_folds) # must be 5 because of the 5 product codes
for fold, (train_idx, valid_idx) in enumerate(kfold.split(df_train, df_train[Config.target], df_train['product_code'])):

    print(' ')
    print('-'*50)
    print(f'Training fold {fold+1} with {len(features)} features...')

    x_train, x_val = df_train[features].iloc[train_idx], df_train[features].iloc[valid_idx]
    y_train, y_val = df_train[Config.target].iloc[train_idx], df_train[Config.target].iloc[valid_idx]

    model = LogisticRegression()
    model.fit(x_train, y_train)

    print(f'================================== training {fold+1} fin. ==================================')

    # Predict validation data
    print(f'================================== validation-data predicting ... ==================================')
    val_pred = model.predict_proba(x_val)[:, 1]
    oof_predictions[valid_idx] = val_pred

    # Predict test data
    print(f'================================== test-data predicting ... ==================================')
    test_pred = model.predict_proba(df_test[features])[:, 1]
    test_predictions += test_pred / Config.n_folds

    # save results
    y_valids.append(y_val)
    val_preds.append(val_pred)
    feature_importance_df["Importance_Fold"+str(fold+1)]=model.coef_.ravel()

    # Compute fold metric
    val_pred = pd.DataFrame(data={'prediction': val_pred})
    y_val = pd.DataFrame(data={'target': y_val.reset_index(drop=True)})
    auc_score = roc_auc_score(y_val, val_pred)

    print(f'Fold {fold+1} CV result')
    print(f' ROC metric : {auc_score}')

    del x_train, x_val, y_train, y_val
    _ = gc.collect()

# Compute out of folds metric
oof_predictions = pd.DataFrame(data={'prediction': oof_predictions})
y_true = pd.DataFrame(data={Config.target: df_train[Config.target]})

print(' ')
print('-'*50)
print(f'TOTAL AUC socre : {roc_auc_score(df_train[Config.target], oof_predictions["prediction"])}')
print('-'*50)

# Create a dataframe to store out of folds predictions
oof_df = pd.DataFrame({Config.row_id: df_train[Config.row_id], Config.target: df_train[Config.target], 'prediction': oof_predictions['prediction']})

# Create a dataframe to store test prediction
test_df = pd.DataFrame({Config.row_id: df_test[Config.row_id], Config.target: test_predictions})

 
--------------------------------------------------
Training fold 1 with 338 features...
Fold 1 CV result
 ROC metric : 0.5715776659633176
 
--------------------------------------------------
Training fold 2 with 338 features...
Fold 2 CV result
 ROC metric : 0.5551959594735263
 
--------------------------------------------------
Training fold 3 with 338 features...
Fold 3 CV result
 ROC metric : 0.5621217418397679
 
--------------------------------------------------
Training fold 4 with 338 features...
Fold 4 CV result
 ROC metric : 0.5686857014388489
 
--------------------------------------------------
Training fold 5 with 338 features...
Fold 5 CV result
 ROC metric : 0.5900566575276965
 
--------------------------------------------------
TOTAL AUC socre : 0.5682556374205914
--------------------------------------------------


In [None]:
# Save results
# oof_df.to_csv(f'/content/drive/MyDrive/Amex/OOF/oof_lgbm_baseline_{CFG.n_folds}fold_seed{CFG.seed}.csv', index = False)
# test_df.to_csv(f'/content/drive/MyDrive/Amex/Predictions/test_lgbm_baseline_{CFG.n_folds}fold_seed{CFG.seed}.csv', index = False)

In [11]:
oof_df.head()

Unnamed: 0,id,failure,prediction
0,0,0.0,0.157203
1,1,0.0,0.162999
2,2,0.0,0.166988
3,3,0.0,0.213539
4,4,0.0,0.2103


In [12]:
def plot_roc(y_val, y_prob):
    #colors=px.colors.qualitative.Prism
    fig = go.Figure(layout=plotly_template['layout'])
    fig.add_trace(go.Scatter(x=np.linspace(0,1,11), y=np.linspace(0,1,11), name='Random Chance', mode='lines', showlegend=False, line=dict(color="Black", width=1, dash="dot")))

    for i in range(len(y_val)):
        y=y_val[i]
        prob=y_prob[i]
        fpr, tpr, _ = roc_curve(y, prob)
        roc_auc = auc(fpr,tpr)
        fig.add_trace(go.Scatter(x=fpr, y=tpr, line=dict(color=color_palette['Cat5'][i], width=3),
                                 hovertemplate = 'True positive rate = %{y:.3f}<br>False positive rate = %{x:.3f}',
                                 name='Fold {}: AUC = {:.3f}'.format(i+1, roc_auc)))

    fig.update_layout(template=plotly_template, title="Cross-Validation ROC Curves",
                      hovermode="x unified", width=700, height=600,
                      xaxis_title='False Positive Rate (1 - Specificity)',
                      yaxis_title='True Positive Rate (Sensitivity)',
                      legend=dict(orientation='v', y=.07, x=1, xanchor="right",
                                  bordercolor="black", borderwidth=.5))
    fig.show()

plot_roc(y_valids, val_preds)

In [13]:
top = 50

feature_importance_df['avg'] = feature_importance_df.mean(axis=1)
feature_importance_top = feature_importance_df.avg.nlargest(top).sort_values(ascending=True)

pal=sns.color_palette("YlGnBu", 65).as_hex()
fig=go.Figure()
for i in range(len(feature_importance_top.index)):
    fig.add_shape(dict(type="line", y0=i, y1=i, x0=0, x1=feature_importance_top[i],
                       line_color=pal[::-1][i],opacity=0.8,line_width=4))

fig.add_trace(go.Scatter(x=feature_importance_top, y=feature_importance_top.index, mode='markers',
                         marker_color=pal[::-1], marker_size=8,
                         hovertemplate='%{y} Importance = %{x:.0f}<extra></extra>'))

fig.update_layout(template=plotly_template,title=f'LGBM Feature Importance<br>Top {top}',
                  margin=dict(l=150,t=80),
                  xaxis=dict(title='Importance', zeroline=False),
                  yaxis_showgrid=False, height=1000, width=800)
fig.show()

In [14]:
# test_df = pd.DataFrame({Config.row_id: test[Config.row_id], 'prediction': test_predictions})

df = pd.DataFrame(data={'Target':test_df[Config.target].apply(lambda x: 1 if x>0.25 else 0)})
df = df.Target.value_counts(normalize=True)
df.rename(index={1:'Positive', 0:'Negative'}, inplace=True)

#pal, color=['#016CC9','#DEB078'], ['#8DBAE2','#EDD3B3']
fig=go.Figure()

fig.add_trace(go.Pie(labels=df.index, values=df*100, hole=.45,
                     showlegend=True,sort=False,
                     marker=dict(colors=color_palette['Bin'],line=dict(color=pal,width=2.5)),
                     hovertemplate = "%{label}: %{value:.2f}%<extra></extra>"))

fig.update_layout(template=plotly_template, title='Predicted Target Distribution',
                  legend=dict(traceorder='reversed',y=1.05,x=0),
                  uniformtext_minsize=15, uniformtext_mode='hide',width=700)
fig.show()

In [15]:
test_df

Unnamed: 0,id,failure
26570,26570,0.221225
26571,26571,0.174204
26572,26572,0.206362
26573,26573,0.199302
26574,26574,0.286500
...,...,...
47340,47340,0.204051
47341,47341,0.186489
47342,47342,0.164894
47343,47343,0.200885


In [16]:
test_df[Config.target].describe()

count    20775.000000
mean         0.216753
std          0.041743
min          0.119649
25%          0.188707
50%          0.209180
75%          0.235830
max          0.614860
Name: failure, dtype: float64

In [17]:
Config.NB

'211'

In [18]:
test_df.to_csv(Config.submission_dir + f'nb{Config.NB}.csv', index=False)

## 検証メモ