In [1]:
%load_ext autoreload
%autoreload 2
is_debug = 1
import os
import re
import gc
import sys
import glob
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

#========================================================================
# original library 
HOME = os.path.expanduser('~')
sys.path.append(f"{HOME}/kaggle/data_analysis/library/")
sys.path.append(f"{HOME}/kaggle/data_analysis/model/")
sys.path.append(f"../py")
import MS_utils
from params_MS import params_lgb
import utils, ml_utils
from utils import logger_func
try:
    if not logger:
        logger=logger_func()
except NameError:
    logger=logger_func()
#========================================================================

"""
argv[1]: comment
argv[2]: feature_key
"""
# Basic Args
seed = 1208
set_type = 'all'
fold_n = 4
key, raw_target, ignore_list = MS_utils.get_basic_var()
comment = sys.argv[1]

# Base
vi_col = 'f000_AvSigVersion'
base_path = '../input/base_exclude*'
base_path = '../input/base_Av*'
#  base_path = '../input/base_group*'
base = utils.read_df_pkl(base_path)

#========================================================================
# Make Validation
cv = KFold(n_splits=fold_n, shuffle=False, random_state=seed)
if is_debug:
    base_train = base[base[raw_target].isnull()].head(10000).sort_values(by=vi_col)
else:
    base_train = base[base[raw_target].isnull()].sort_values(by=vi_col)

kfold = list(cv.split(base_train[[key, 'country_group']], base_train[vi_col]))
del base_train
gc.collect()
base = base[[key, raw_target]]
#========================================================================

#  train, test = MS_utils.get_dataset(base=base)
test, train = MS_utils.get_dataset(base=base)

if is_debug:
    train = train.head(10000)
    test = test.head(500)
print(train.shape, test.shape)

#========================================================================
# Categorical Encode
ignore_list = [key, raw_target]
cat_cols = utils.get_categorical_features(df=train, ignore_list=ignore_list)
print(f"Categorical: {cat_cols}")

#Fit LabelEncoder
for col in cat_cols:
    # 最も頻度の多いカテゴリでimpute
    max_freq = train[col].mode()
    train[col].fillna(max_freq, inplace=True)
    test[col].fillna(max_freq, inplace=True)
    le = LabelEncoder().fit(pd.concat([train[col], test[col]], axis=0).value_counts().index.tolist())
    train[col] = le.transform(train[col])
    test[col]  = le.transform(test[col])
#========================================================================

2019-03-08 23:12:57,337 utils 346 [INFO]    [logger_func] start 
100%|██████████| 3/3 [00:01<00:00,  2.84it/s]


(16774736, 2)
(8921483, 83) (7853253, 83)
Categorical: ['f000_EngineVersion', 'f000_Census_OSBranch', 'f000_Census_GenuineStateName', 'f000_Census_OSArchitecture', 'f000_Census_OSVersion', 'f000_Platform', 'f000_Census_PrimaryDiskTypeName', 'f000_ProductName', 'f000_SkuEdition', 'f000_SmartScreen', 'f000_PuaMode', 'f000_Census_ActivationChannel', 'f000_Census_DeviceFamily', 'f000_Census_OSSkuName', 'f000_OsVer', 'f000_Census_ChassisTypeName', 'f000_Census_InternalBatteryType', 'f000_OsPlatformSubRelease', 'f000_Census_ProcessorClass', 'f000_Census_PowerPlatformRoleName', 'f000_Census_OSInstallTypeName', 'f000_Census_OSWUAutoUpdateOptionsName', 'f000_Census_OSEdition', 'f000_Census_FlightRing', 'f000_AppVersion', 'f000_Census_MDC2FormFactor', 'f000_Processor', 'f000_OsBuildLab']
(8921483, 83) (7853253, 83)
(10000, 83) (500, 83)
Categorical: ['f000_AvSigVersion']


In [5]:
model_type_list = ['lgb', 'rmf', 'lgr']
model_type_list = ['lgb']
model_type = 'lgb'
metric = 'auc'
metric = 'rmse'

target_list = [
    'f000_SmartScreen'
    ,'f000_AvSigVersion'
    ,'f000_EngineVersion'
    ,'f000_AppVersion'
    ,'f000_Census_OSVersion'
    ,'f000_AVProductStatesIdentifier'
    ,'f000_CountryIdentifier'
    ,'f000_AVProductsInstalled'
    ]

for target in target_list:

    feim_list = []
    score_list = []
    oof_pred = np.zeros(len(train))
    y_test = np.zeros(len(test))

    Y = train[target]
    ignore_list = [key, raw_target, target, 'country_group']
    use_cols = [col for col in train.columns if col not in ignore_list]
    x_test = test[use_cols]
    start_time = "{0:%Y%m%d_%H%M%S}".format(datetime.datetime.now())

    if model_type=='lgb':
        params = params_lgb()
        #  params['num_leaves'] = 4
        params['num_threads'] = 36
        try:
            #  params['learning_rate'] = float(sys.argv[1])
            params['learning_rate'] = 0.05
        except IndexError:
            pass
    else:
        params = {}

    logger.info(f"{model_type} Train Start!!")

    for num_fold, (trn_idx, val_idx) in enumerate(kfold):
        params['objective'] = 'regression'
        x_train, y_train = train[use_cols].iloc[trn_idx, :], Y.iloc[trn_idx]
        x_val, y_val = train[use_cols].iloc[val_idx, :], Y.iloc[val_idx]

        logger.info(f"Fold{num_fold} | Train:{x_train.shape} | Valid:{x_val.shape}")

        #  score, tmp_oof, tmp_pred, feim = ml_utils.Classifier(
        score, tmp_oof, tmp_pred, feim = ml_utils.Regressor(
            model_type=model_type
            , x_train=x_train
            , y_train=y_train
            , x_val=x_val
            , y_val=y_val
            , x_test=x_test
            , params=params
            , seed=seed
            , get_score=metric
        )
        feim_list.append(feim.set_index('feature').rename(columns={'importance':f'imp_{num_fold}'}))

        logger.info(f"Fold{num_fold} CV: {score}")
        score_list.append(score)
        oof_pred[val_idx] = tmp_oof
        y_test += tmp_pred

    feim = pd.concat(feim_list, axis=1)
    feim_cols = [col for col in feim.columns if col.count('imp_')]
    feim['importance'] = feim[feim_cols].mean(axis=1)
    feim.drop(feim_cols, axis=1, inplace=True)
    feim.sort_values(by='importance', ascending=False, inplace=True)

    cv_score = np.mean(score_list)
    logger.info(f'''
    #========================================================================
    # Model: {model_type}
    # CV   : {cv_score}
    #========================================================================''')

    y_test /= (num_fold+1)

    pred_col = 'prediction'
    train[pred_col] = oof_pred
    test[pred_col] = y_test
    stack_cols = [key, target, pred_col]

    df_stack = pd.concat([train[stack_cols], test[stack_cols]], ignore_index=True, axis=0)

    #========================================================================
    # Saving
    feim.to_csv(f'../valid/{start_time[4:12]}_{model_type}_SET-{set_type}_feat{len(x_train.columns)}_{comment}_CV{str(cv_score)[:7]}_LB.csv', index=True)
    utils.to_pkl_gzip(obj=df_stack, path=f'../stack/{start_time[4:12]}_{model_type}_SET-{set_type}_feat{len(x_train.columns)}_{comment}_CV{str(cv_score)[:7]}_LB')
    #========================================================================

2019-03-08 23:17:48,747 utils 43 [INFO]    [<module>] lgb Train Start!! 
2019-03-08 23:17:48,757 utils 50 [INFO]    [<module>] Fold0 | Train:(7500, 80) | Valid:(2500, 80) 


Training until validation scores don't improve for 150 rounds.


2019-03-08 23:17:49,610 utils 66 [INFO]    [<module>] Fold0 CV: 2.224527614451031 
2019-03-08 23:17:49,624 utils 50 [INFO]    [<module>] Fold1 | Train:(7500, 80) | Valid:(2500, 80) 


[200]	valid_0's l2: 5.03831
Early stopping, best iteration is:
[69]	valid_0's l2: 4.94852

        # R2 Score: 0.03273590550133032
        
Training until validation scores don't improve for 150 rounds.


2019-03-08 23:17:50,403 utils 66 [INFO]    [<module>] Fold1 CV: 2.235360261322957 
2019-03-08 23:17:50,414 utils 50 [INFO]    [<module>] Fold2 | Train:(7500, 80) | Valid:(2500, 80) 


[200]	valid_0's l2: 5.05305
Early stopping, best iteration is:
[87]	valid_0's l2: 4.99684

        # R2 Score: 0.031527183273216575
        
Training until validation scores don't improve for 150 rounds.


2019-03-08 23:17:51,542 utils 66 [INFO]    [<module>] Fold2 CV: 2.299812497080278 
2019-03-08 23:17:51,553 utils 50 [INFO]    [<module>] Fold3 | Train:(7500, 80) | Valid:(2500, 80) 


[200]	valid_0's l2: 5.36604
Early stopping, best iteration is:
[80]	valid_0's l2: 5.28914

        # R2 Score: 0.027981910449799807
        
Training until validation scores don't improve for 150 rounds.


2019-03-08 23:17:52,339 utils 66 [INFO]    [<module>] Fold3 CV: 2.3105774484273303 
2019-03-08 23:17:52,348 utils 82 [INFO]    [<module>] 
    # Model: lgb
    # CV   : 2.267569455320399


[200]	valid_0's l2: 5.37073
Early stopping, best iteration is:
[86]	valid_0's l2: 5.33877

        # R2 Score: 0.04627956469155836
        


2019-03-08 23:17:52,435 utils 43 [INFO]    [<module>] lgb Train Start!! 
2019-03-08 23:17:52,447 utils 50 [INFO]    [<module>] Fold0 | Train:(7500, 81) | Valid:(2500, 81) 


Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 30525.1
[400]	valid_0's l2: 28546.9
[600]	valid_0's l2: 28182.4
[800]	valid_0's l2: 28042.5


2019-03-08 23:17:55,315 utils 66 [INFO]    [<module>] Fold0 CV: 167.39651438775647 
2019-03-08 23:17:55,327 utils 50 [INFO]    [<module>] Fold1 | Train:(7500, 81) | Valid:(2500, 81) 


Early stopping, best iteration is:
[815]	valid_0's l2: 28021.6

        # R2 Score: 0.8566814877500699
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 33912.1
[400]	valid_0's l2: 31814.9
[600]	valid_0's l2: 31471.3


2019-03-08 23:17:57,851 utils 66 [INFO]    [<module>] Fold1 CV: 177.22513141697624 
2019-03-08 23:17:57,863 utils 50 [INFO]    [<module>] Fold2 | Train:(7500, 81) | Valid:(2500, 81) 


[800]	valid_0's l2: 31441.4
Early stopping, best iteration is:
[650]	valid_0's l2: 31408.7

        # R2 Score: 0.8433596801190059
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 27929.2
[400]	valid_0's l2: 25727.6
[600]	valid_0's l2: 25411.8


2019-03-08 23:18:00,081 utils 66 [INFO]    [<module>] Fold2 CV: 159.26607670806857 
2019-03-08 23:18:00,097 utils 50 [INFO]    [<module>] Fold3 | Train:(7500, 81) | Valid:(2500, 81) 


Early stopping, best iteration is:
[537]	valid_0's l2: 25365.7

        # R2 Score: 0.8747183110438908
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 31763.1
[400]	valid_0's l2: 29582
[600]	valid_0's l2: 29328.1


2019-03-08 23:18:02,430 utils 66 [INFO]    [<module>] Fold3 CV: 171.07853749497895 
2019-03-08 23:18:02,440 utils 82 [INFO]    [<module>] 
    # Model: lgb
    # CV   : 168.74156500194505
2019-03-08 23:18:02,522 utils 43 [INFO]    [<module>] lgb Train Start!! 


Early stopping, best iteration is:
[573]	valid_0's l2: 29267.9

        # R2 Score: 0.8538383399113123
        


2019-03-08 23:18:02,563 utils 50 [INFO]    [<module>] Fold0 | Train:(7500, 81) | Valid:(2500, 81) 


Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 0.927369
[400]	valid_0's l2: 0.751901
[600]	valid_0's l2: 0.71189
[800]	valid_0's l2: 0.694475
[1000]	valid_0's l2: 0.686066
[1200]	valid_0's l2: 0.682709
[1400]	valid_0's l2: 0.679968
[1600]	valid_0's l2: 0.67875
[1800]	valid_0's l2: 0.678258
[2000]	valid_0's l2: 0.678202
[2200]	valid_0's l2: 0.677879
[2400]	valid_0's l2: 0.677275
[2600]	valid_0's l2: 0.677166
[2800]	valid_0's l2: 0.677161
[3000]	valid_0's l2: 0.677152
[3200]	valid_0's l2: 0.677151
[3400]	valid_0's l2: 0.677151
[3600]	valid_0's l2: 0.677151
[3800]	valid_0's l2: 0.677151
[4000]	valid_0's l2: 0.677151
[4200]	valid_0's l2: 0.677151
[4400]	valid_0's l2: 0.677151
Early stopping, best iteration is:
[4365]	valid_0's l2: 0.677151


2019-03-08 23:18:08,310 utils 66 [INFO]    [<module>] Fold0 CV: 0.8228920603191071 
2019-03-08 23:18:08,323 utils 50 [INFO]    [<module>] Fold1 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.9776507713849846
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 0.783941
[400]	valid_0's l2: 0.623599
[600]	valid_0's l2: 0.591378
[800]	valid_0's l2: 0.575248
[1000]	valid_0's l2: 0.570383
[1200]	valid_0's l2: 0.567386
[1400]	valid_0's l2: 0.566045
[1600]	valid_0's l2: 0.565786
[1800]	valid_0's l2: 0.56551
[2000]	valid_0's l2: 0.565413
Early stopping, best iteration is:
[1869]	valid_0's l2: 0.56538

        # R2 Score: 0.983057035154226
        


2019-03-08 23:18:13,609 utils 66 [INFO]    [<module>] Fold1 CV: 0.7519391730915874 
2019-03-08 23:18:13,630 utils 50 [INFO]    [<module>] Fold2 | Train:(7500, 81) | Valid:(2500, 81) 


Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 1.0685
[400]	valid_0's l2: 0.894721
[600]	valid_0's l2: 0.856326
[800]	valid_0's l2: 0.837633
[1000]	valid_0's l2: 0.829654
[1200]	valid_0's l2: 0.826586
[1400]	valid_0's l2: 0.824843
[1600]	valid_0's l2: 0.824393
Early stopping, best iteration is:
[1451]	valid_0's l2: 0.82437


2019-03-08 23:18:18,314 utils 66 [INFO]    [<module>] Fold2 CV: 0.9080635897598495 
2019-03-08 23:18:18,327 utils 50 [INFO]    [<module>] Fold3 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.9749546103116329
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 0.668516
[400]	valid_0's l2: 0.52741
[600]	valid_0's l2: 0.495605
[800]	valid_0's l2: 0.478384
[1000]	valid_0's l2: 0.472878
[1200]	valid_0's l2: 0.470935
[1400]	valid_0's l2: 0.469507
[1600]	valid_0's l2: 0.468312
[1800]	valid_0's l2: 0.467855
[2000]	valid_0's l2: 0.46775
[2200]	valid_0's l2: 0.467692
Early stopping, best iteration is:
[2196]	valid_0's l2: 0.467676


2019-03-08 23:18:23,519 utils 66 [INFO]    [<module>] Fold3 CV: 0.6838694283240511 
2019-03-08 23:18:23,525 utils 82 [INFO]    [<module>] 
    # Model: lgb
    # CV   : 0.7916910628736488
2019-03-08 23:18:23,601 utils 43 [INFO]    [<module>] lgb Train Start!! 
2019-03-08 23:18:23,615 utils 50 [INFO]    [<module>] Fold0 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.982572269698898
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 131.459
[400]	valid_0's l2: 130.259
Early stopping, best iteration is:
[332]	valid_0's l2: 129.963


2019-03-08 23:18:24,949 utils 66 [INFO]    [<module>] Fold0 CV: 11.400114897660757 
2019-03-08 23:18:24,960 utils 50 [INFO]    [<module>] Fold1 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.7165222771004875
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 139.963


2019-03-08 23:18:26,109 utils 66 [INFO]    [<module>] Fold1 CV: 11.810554842580085 
2019-03-08 23:18:26,121 utils 50 [INFO]    [<module>] Fold2 | Train:(7500, 81) | Valid:(2500, 81) 


Early stopping, best iteration is:
[249]	valid_0's l2: 139.489

        # R2 Score: 0.6988690595214785
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 129.643
[400]	valid_0's l2: 129.249
Early stopping, best iteration is:
[322]	valid_0's l2: 129.014

        # R2 Score: 0.72180067890592
        


2019-03-08 23:18:27,440 utils 66 [INFO]    [<module>] Fold2 CV: 11.358435445978182 
2019-03-08 23:18:27,455 utils 50 [INFO]    [<module>] Fold3 | Train:(7500, 81) | Valid:(2500, 81) 


Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 132.263
[400]	valid_0's l2: 132.194
Early stopping, best iteration is:
[317]	valid_0's l2: 131.434

        # R2 Score: 0.7262418587949495
        

2019-03-08 23:18:28,769 utils 66 [INFO]    [<module>] Fold3 CV: 11.464456495895103 
2019-03-08 23:18:28,776 utils 82 [INFO]    [<module>] 
    # Model: lgb
    # CV   : 11.508390420528531
2019-03-08 23:18:28,856 utils 43 [INFO]    [<module>] lgb Train Start!! 
2019-03-08 23:18:28,872 utils 50 [INFO]    [<module>] Fold0 | Train:(7500, 81) | Valid:(2500, 81) 



Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 39.2996
[400]	valid_0's l2: 30.9919
[600]	valid_0's l2: 28.6634
[800]	valid_0's l2: 27.9749
[1000]	valid_0's l2: 27.5217
[1200]	valid_0's l2: 27.2821
[1400]	valid_0's l2: 27.1459
[1600]	valid_0's l2: 27.0366
[1800]	valid_0's l2: 26.9846
[2000]	valid_0's l2: 26.9625
[2200]	valid_0's l2: 26.9426
[2400]	valid_0's l2: 26.9331
[2600]	valid_0's l2: 26.9241
[2800]	valid_0's l2: 26.9172
[3000]	valid_0's l2: 26.9131
[3200]	valid_0's l2: 26.9094
[3400]	valid_0's l2: 26.9068
[3600]	valid_0's l2: 26.9043
[3800]	valid_0's l2: 26.9031
[4000]	valid_0's l2: 26.9031
[4200]	valid_0's l2: 26.9031
[4400]	valid_0's l2: 26.9031
[4600]	valid_0's l2: 26.9031
[4800]	valid_0's l2: 26.9031
[5000]	valid_0's l2: 26.9031
[5200]	valid_0's l2: 26.9031
[5400]	valid_0's l2: 26.9031
[5600]	valid_0's l2: 26.9031
[5800]	valid_0's l2: 26.9031
[6000]	valid_0's l2: 26.9031
Early stopping, best iteration is:
[5850]	valid_0's l2: 26.9031


2019-03-08 23:18:39,641 utils 66 [INFO]    [<module>] Fold0 CV: 5.186816930272006 
2019-03-08 23:18:39,657 utils 50 [INFO]    [<module>] Fold1 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.9980133018629879
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 26.9156
[400]	valid_0's l2: 19.8108
[600]	valid_0's l2: 18.0133
[800]	valid_0's l2: 17.3633
[1000]	valid_0's l2: 16.9904
[1200]	valid_0's l2: 16.7566
[1400]	valid_0's l2: 16.675
[1600]	valid_0's l2: 16.593
[1800]	valid_0's l2: 16.5512
[2000]	valid_0's l2: 16.5188
[2200]	valid_0's l2: 16.5063
[2400]	valid_0's l2: 16.4994
[2600]	valid_0's l2: 16.4941
[2800]	valid_0's l2: 16.4919
[3000]	valid_0's l2: 16.4876
[3200]	valid_0's l2: 16.4851
[3400]	valid_0's l2: 16.4833
[3600]	valid_0's l2: 16.481
[3800]	valid_0's l2: 16.4804
[4000]	valid_0's l2: 16.4796
[4200]	valid_0's l2: 16.4794
[4400]	valid_0's l2: 16.4791
Early stopping, best iteration is:
[4402]	valid_0's l2: 16.479


2019-03-08 23:18:50,768 utils 66 [INFO]    [<module>] Fold1 CV: 4.059444693845272 
2019-03-08 23:18:50,778 utils 50 [INFO]    [<module>] Fold2 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.9987155284196295
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 21.9804
[400]	valid_0's l2: 16.5143
[600]	valid_0's l2: 14.9992
[800]	valid_0's l2: 14.5038
[1000]	valid_0's l2: 14.2105
[1200]	valid_0's l2: 14.0752
[1400]	valid_0's l2: 13.9932
[1600]	valid_0's l2: 13.9427
[1800]	valid_0's l2: 13.9111
[2000]	valid_0's l2: 13.8921
[2200]	valid_0's l2: 13.8807
[2400]	valid_0's l2: 13.8744
[2600]	valid_0's l2: 13.8669
[2800]	valid_0's l2: 13.8639
[3000]	valid_0's l2: 13.8602
[3200]	valid_0's l2: 13.8569
[3400]	valid_0's l2: 13.8555
[3600]	valid_0's l2: 13.8541
[3800]	valid_0's l2: 13.8533
[4000]	valid_0's l2: 13.8522
[4200]	valid_0's l2: 13.8503
[4400]	valid_0's l2: 13.8495
[4600]	valid_0's l2: 13.8489
[4800]	valid_0's l2: 13.8484
[5000]	valid_0's l2: 13.8481
[5200]	valid_0's l2: 13.8478
[5400]	valid_0's l2: 13.8474
[5600]	valid_0's l2: 13.8472
[5800]	valid_0's l2: 13.847
[6000]	valid_0's l2: 13.8469
[6200]	valid_0's l2: 13

2019-03-08 23:19:03,575 utils 66 [INFO]    [<module>] Fold2 CV: 3.7208004751422803 
2019-03-08 23:19:03,588 utils 50 [INFO]    [<module>] Fold3 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.9989983520544132
        
Training until validation scores don't improve for 150 rounds.
[200]	valid_0's l2: 16.862
[400]	valid_0's l2: 12.4169
[600]	valid_0's l2: 11.1936
[800]	valid_0's l2: 10.8596
[1000]	valid_0's l2: 10.6584
[1200]	valid_0's l2: 10.5709
[1400]	valid_0's l2: 10.5014
[1600]	valid_0's l2: 10.4631
[1800]	valid_0's l2: 10.4415
[2000]	valid_0's l2: 10.43
[2200]	valid_0's l2: 10.4242
[2400]	valid_0's l2: 10.4184
[2600]	valid_0's l2: 10.4157
[2800]	valid_0's l2: 10.4137
[3000]	valid_0's l2: 10.4112
[3200]	valid_0's l2: 10.4095
[3400]	valid_0's l2: 10.4084
[3600]	valid_0's l2: 10.4075
[3800]	valid_0's l2: 10.4073
[4000]	valid_0's l2: 10.4068
[4200]	valid_0's l2: 10.4066
[4400]	valid_0's l2: 10.4064
[4600]	valid_0's l2: 10.4064
[4800]	valid_0's l2: 10.4063
[5000]	valid_0's l2: 10.4063
[5200]	valid_0's l2: 10.4062
[5400]	valid_0's l2: 10.4062
[5600]	valid_0's l2: 10.4062
[5800]	valid_0's l2: 10.4061
[6000]	valid_0's l2: 10.4061
[6200]	valid_0's l2: 10.4

2019-03-08 23:19:15,105 utils 66 [INFO]    [<module>] Fold3 CV: 3.2258521813080154 
2019-03-08 23:19:15,112 utils 82 [INFO]    [<module>] 
    # Model: lgb
    # CV   : 4.048228570141893
2019-03-08 23:19:15,186 utils 43 [INFO]    [<module>] lgb Train Start!! 
2019-03-08 23:19:15,202 utils 50 [INFO]    [<module>] Fold0 | Train:(7500, 81) | Valid:(2500, 81) 



        # R2 Score: 0.9992326156457059
        
Training until validation scores don't improve for 150 rounds.


TypeError: 'NoneType' object is not iterable

In [16]:
x_train.head()

Unnamed: 0,f000_EngineVersion,f000_Census_TotalPhysicalRAM,f000_IsBeta,f000_RtpStateBitfield,f000_Census_OSBranch,f000_Census_IsSecureBootEnabled,f000_Census_GenuineStateName,f000_Census_ProcessorCoreCount,f000_Census_OSArchitecture,f000_AVProductsEnabled,...,f000_Census_ThresholdOptIn,f000_UacLuaenable,f000_Census_MDC2FormFactor,f000_Census_SystemVolumeTotalCapacity,f000_Wdft_RegionIdentifier,f000_Census_IsTouchEnabled,f000_Processor,f000_Census_IsVirtualDevice,f000_OsBuildLab,prediction
2500,69,8192.0,0,7.0,30,0,1,4.0,0,1.0,...,0.0,1.0,2,476438.0,10.0,0,1,0.0,46,40.042642
2501,72,8192.0,0,7.0,12,1,1,4.0,0,1.0,...,,1.0,8,119434.0,3.0,0,1,0.0,306,430.783805
2502,71,16384.0,0,7.0,12,0,1,4.0,0,1.0,...,0.0,1.0,2,2861021.0,15.0,0,1,0.0,306,428.36098
2503,69,4096.0,0,7.0,10,1,1,4.0,0,1.0,...,,1.0,8,459239.0,11.0,0,1,0.0,292,363.105245
2504,69,12288.0,0,7.0,10,1,1,4.0,0,1.0,...,,1.0,8,468133.0,11.0,0,1,0.0,292,363.587898
