In [3]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import xgboost as xgb
from sklearn.linear_model import BayesianRidge
from sklearn.model_selection import KFold, RepeatedKFold
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from scipy import sparse
import warnings
import time
import sys
import os
import re
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
# import plotly.offline as py
# py.init_notebook_mode(connected=True)
# import plotly.graph_objs as go
# import plotly.tools as tls
from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss

import logging

logging.basicConfig(level=logging.DEBUG, filename="baseline_logfile_1_15",
                    filemode="a+", format="%(asctime)-15s %(levelname)-8s %(message)s")

train = pd.read_csv('../../datasets/jinnan/jinnan_round1_train_20181227-1.csv', encoding = 'gb18030')
test  = pd.read_csv('../../datasets/jinnan/jinnan_round1_testA_20181227.csv', encoding = 'gb18030')

target_col = "收率"

# 删除异常值
print(train[train['收率'] < 0.87])

train = train[train['收率'] > 0.87]
train.loc[train['B14'] == 40, 'B14'] = 400
train = train[train['B14']>=400]

# 合并数据集, 顺便处理异常数据
target = train['收率']
train.loc[train['A25'] == '1900/3/10 0:00', 'A25'] = train['A25'].value_counts().values[0]
train['A25'] = train['A25'].astype(int)
train.loc[train['B14'] == 40, 'B14'] = 400
# test.loc[test['B14'] == 385, 'B14'] = 385

test_select = {}
for v in [280, 385, 390, 785]:
    print(v)
    print(test[test['B14'] == v]['样本id'])
    test_select[v] = test[test['B14'] == v]['样本id'].index
    print(test[test['B14'] == v]['样本id'].index)
    print(test_select[v])

del train['收率']
data = pd.concat([train,test],axis=0,ignore_index=True)
data = data.fillna(-1)

def timeTranSecond(t):
    try:
        t, m, s = t.split(":")
    except:
        if t == '1900/1/9 7:00':
            return 7 * 3600 / 3600
        elif t == '1900/1/1 2:30':
            return (2 * 3600 + 30 * 60) / 3600
        elif t == -1:
            return -1
        else:
            return 0

    try:
        tm = (int(t) * 3600 + int(m) * 60 + int(s)) / 3600
    except:
        return (30 * 60) / 3600

    return tm


for f in ['A5', 'A7', 'A9', 'A11', 'A14', 'A16', 'A24', 'A26', 'B5', 'B7']:
    try:
        data[f] = data[f].apply(timeTranSecond)
    except:
        print(f, '应该在前面被删除了！')


def getDuration(se):
    try:
        sh, sm, eh, em = re.findall(r"\d+\.?\d*", se)
    except:
        if se == -1:
            return -1

    try:
        if int(sh) > int(eh):
            tm = (int(eh) * 3600 + int(em) * 60 - int(sm) * 60 - int(sh) * 3600) / 3600 + 24
        else:
            tm = (int(eh) * 3600 + int(em) * 60 - int(sm) * 60 - int(sh) * 3600) / 3600
    except:
        if se == '19:-20:05':
            return 1
        elif se == '15:00-1600':
            return 1

    return tm


for f in ['A20', 'A28', 'B4', 'B9', 'B10', 'B11']:
    data[f] = data.apply(lambda df: getDuration(df[f]), axis=1)

data['样本id'] = data['样本id'].apply(lambda x: x.split('_')[1])
data['样本id'] = data['样本id'].astype(int)

# 基本数据处理完毕, 开始拼接数据
train = data[:train.shape[0]]
test  = data[train.shape[0]:]

train['target'] = list(target)

new_train = train.copy()
new_train = new_train.sort_values(['样本id'], ascending=True)
train_copy = train.copy()
train_copy = train_copy.sort_values(['样本id'], ascending=True)

# 把train加长两倍
train_len = len(new_train)
new_train = pd.concat([new_train, train_copy])

# 把加长两倍的train拼接到test后面
new_test = test.copy()
new_test = pd.concat([new_test, new_train])

import sys
# 开始向后做差
diff_train = pd.DataFrame()
ids = list(train_copy['样本id'].values)
print(ids)


from tqdm import tqdm
import os
# 构造新的训练集
if os.path.exists('./input/diff_train.csv'):
    diff_train = pd.read_csv('./input/diff_train.csv')
else:
    for i in tqdm(range(1, train_len)):
        # 分别间隔 -1, -2, ... -len行 进行差值,得到实验的所有对比实验
        diff_tmp = new_train.diff(-i)
        diff_tmp = diff_tmp[:train_len]
        diff_tmp.columns = [col_ + '_difference' for col_ in
                            diff_tmp.columns.values]
        # 求完差值后加上样本id
        diff_tmp['样本id'] = ids
        diff_train = pd.concat([diff_train, diff_tmp])

    # diff_train.to_csv('../input/diff_train.csv', index=False)

# 构造新的测试集
diff_test = pd.DataFrame()
ids_test = list(test['样本id'].values)
test_len = len(test)


if os.path.exists('./input/diff_test.csv'):
    diff_test = pd.read_csv('./input/diff_test.csv')
else:
    for i in tqdm(range(test_len, test_len+train_len)):
        # 分别间隔 - test_len , -test_len -1 ,.... - test_len - train_len +1 进行差值, 得到实验的所有对比实验
        diff_tmp = new_test.diff(-i)
        diff_tmp = diff_tmp[:test_len]
        diff_tmp.columns = [col_ + '_difference' for col_ in
                            diff_tmp.columns.values]
        # 求完差值后加上样本id
        diff_tmp['样本id'] = ids_test
        diff_test = pd.concat([diff_test, diff_tmp])

    diff_test = diff_test[diff_train.columns]
    # diff_test.to_csv('../input/diff_test.csv', index=False)


print(train.columns.values)
# 和train顺序一致的target
train_target = train['target']
train.drop(['target'], axis=1, inplace=True)
# 拼接原始特征
diff_train = pd.merge(diff_train, train, how='left', on='样本id')
diff_test = pd.merge(diff_test, test, how='left', on='样本id')

target = diff_train['target_difference']
diff_train.drop(['target_difference'], axis=1, inplace=True)
diff_test.drop(['target_difference'], axis=1, inplace=True)

X_train = diff_train
y_train = target
X_test = diff_test

print(X_train.columns.values)

param = {'num_leaves': 31, #31
         'min_data_in_leaf': 20,
         'objective': 'regression',
         'max_depth': -1,
         'learning_rate': 0.01,
         # "min_child_samples": 30,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 1,
         "bagging_fraction": 0.9,
         "bagging_seed": 11,
         "metric": 'mse',
         "lambda_l2": 0.1,
         # "lambda_l1": 0.1,
         'num_thread': 4,
         "verbosity": -1}
groups = X_train['样本id'].values

folds = KFold(n_splits=5, shuffle=True, random_state=2018)
oof_lgb = np.zeros(len(diff_train))
predictions_lgb = np.zeros(len(diff_test))

feature_importance = pd.DataFrame()
feature_importance['feature_name'] = X_train.columns.values


for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train, y_train)):
    print("fold n°{}".format(fold_ + 1))
    dev = X_train.iloc[trn_idx]
    val = X_train.iloc[val_idx]

    trn_data = lgb.Dataset(dev, y_train.iloc[trn_idx])
    val_data = lgb.Dataset(val, y_train.iloc[val_idx])

    num_round = 3000
    clf = lgb.train(param, trn_data, num_round, valid_sets=[trn_data, val_data], verbose_eval=5,
                    early_stopping_rounds=100)
    oof_lgb[val_idx] = clf.predict(val, num_iteration=clf.best_iteration)

    predictions_lgb += clf.predict(X_test, num_iteration=clf.best_iteration) / folds.n_splits

    importance = clf.feature_importance(importance_type='gain')
    feature_name = clf.feature_name()
    tmp_df = pd.DataFrame({'feature_name':feature_name, 'importance':importance})

    feature_importance = pd.merge(feature_importance, tmp_df, how='left',
                                  on='feature_name')
    print(len(feature_importance['feature_name']))

print(len(diff_train))
feature_importance.to_csv('./eda/feature_importance.csv', index=False)
# 还原train target
diff_train['compare_id'] = diff_train['样本id'] - diff_train['样本id_difference']
train['compare_id'] = train['样本id']
train['compare_target'] = list(train_target)
# 把做差的target拼接回去
diff_train = pd.merge(diff_train, train[['compare_id', 'compare_target']], how='left', on='compare_id')
print(diff_train.columns.values)
diff_train['pre_target_diff'] = oof_lgb
diff_train['pre_target'] = diff_train['pre_target_diff'] + diff_train['compare_target']

mean_result = diff_train.groupby('样本id')['pre_target'].mean().reset_index(name='pre_target_mean')
true_result = train[['样本id', 'compare_target']]
mean_result = pd.merge(mean_result, true_result, how='left', on='样本id')
print(mean_result)
print("CV score: {:<8.8f}".format(mean_squared_error(oof_lgb, target)))
logging.info("Lgb CV score: {:<8.8f}".format(mean_squared_error(oof_lgb, target)))

print("CV score: {:<8.8f}".format(mean_squared_error(mean_result['pre_target_mean'].values,  mean_result['compare_target'].values)))
logging.info("Lgb CV score: {:<8.8f}".format(mean_squared_error(mean_result['pre_target_mean'].values,  mean_result['compare_target'].values)))

# pre_target = mean_result['pre_target_mean'].values
# true_target = mean_result['']

# 还原test target
diff_test['compare_id'] = diff_test['样本id'] - diff_test['样本id_difference']
diff_test = pd.merge(diff_test, train[['compare_id', 'compare_target']], how='left', on='compare_id')
diff_test['pre_target_diff'] = predictions_lgb
diff_test['pre_target'] = diff_test['pre_target_diff'] + diff_test['compare_target']

mean_result_test = diff_test.groupby(diff_test['样本id'], sort=False)['pre_target'].mean().reset_index(name='pre_target_mean')
print(mean_result_test)
test = pd.merge(test, mean_result_test, how='left', on='样本id')
sub_df = pd.read_csv('./input/jinnan_round1_submit_20181227.csv', header=None)
sub_df[1] = test['pre_target_mean']
sub_df[1] = sub_df[1].apply(lambda x:round(x, 3))

for v in test_select.keys():
    if v == 280:
        x = 0.947
    elif v == 385 or v == 785:
        x = 0.879
    elif v == 390:
        x = 0.89

    print(v)
    print(test_select[v])
    # sub_df.iloc[test_select[v]][1] = x
    sub_df.loc[test_select[v], 1] = x

sub_df.to_csv('./output/jinnan_round_submit_diff.csv', index=False, header=False)

print(len(diff_train))

             样本id   A1     A2     A3   A4        A5    A6   A7  A8        A9  \
120    sample_842  300  125.0    NaN  980  14:30:00  24.0  NaN NaN  16:00:00   
138   sample_1001  300    NaN  405.0  700   1:00:00  21.0  NaN NaN   2:30:00   
248   sample_1040  200    NaN  270.0  470   9:30:00  21.0  NaN NaN  11:00:00   
322    sample_996  300    NaN  405.0  700   4:00:00  30.0  NaN NaN   5:00:00   
447    sample_994  300    NaN  405.0  700  10:40:00  28.0  NaN NaN  11:50:00   
484    sample_752  300    NaN  405.0  700  20:00:00  28.0  NaN NaN  22:00:00   
687    sample_748  300    NaN  405.0  700  15:00:00  30.0  NaN NaN  17:00:00   
762    sample_849  300  125.0    NaN  980   5:30:00  21.0  NaN NaN   7:00:00   
869    sample_293  300  125.0    NaN  980  15:00:00  21.0  NaN NaN  16:00:00   
956    sample_450  300    NaN  405.0  700  23:00:00  21.0  NaN NaN   0:30:00   
1012   sample_751  300    NaN  405.0  700   8:30:00  28.0  NaN NaN  10:30:00   
1195   sample_750  300    NaN  405.0  70

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1, 4, 8, 9, 11, 12, 13, 14, 16, 17, 21, 22, 24, 25, 26, 27, 29, 30, 33, 34, 36, 37, 39, 41, 42, 43, 44, 45, 47, 50, 51, 52, 57, 58, 60, 61, 63, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 79, 80, 82, 84, 85, 87, 88, 89, 91, 92, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 124, 126, 127, 128, 129, 130, 131, 132, 133, 135, 137, 138, 139, 140, 141, 144, 147, 148, 150, 151, 152, 154, 156, 158, 159, 161, 163, 164, 165, 166, 167, 168, 169, 170, 173, 174, 175, 177, 179, 180, 181, 182, 184, 186, 187, 189, 190, 194, 195, 196, 198, 200, 202, 204, 206, 208, 209, 210, 212, 213, 214, 217, 219, 222, 223, 225, 226, 227, 229, 231, 232, 233, 235, 236, 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 251, 253, 256, 257, 258, 259, 262, 264, 266, 267, 268, 270, 273, 274, 275, 276, 278, 281, 282, 283, 284, 285, 286, 287, 288, 289, 291, 292, 294, 296, 297, 298, 301, 305, 306, 307, 308, 313, 314, 315, 317, 321, 322, 324

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1329/1329 [03:47<00:00,  3.45it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1330/1330 [01:21<00:00, 13.83it/s]


['样本id' 'A1' 'A2' 'A3' 'A4' 'A5' 'A6' 'A7' 'A8' 'A9' 'A10' 'A11' 'A12'
 'A13' 'A14' 'A15' 'A16' 'A17' 'A18' 'A19' 'A20' 'A21' 'A22' 'A23' 'A24'
 'A25' 'A26' 'A27' 'A28' 'B1' 'B2' 'B3' 'B4' 'B5' 'B6' 'B7' 'B8' 'B9'
 'B10' 'B11' 'B12' 'B13' 'B14' 'target']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


['样本id_difference' 'A1_difference' 'A2_difference' 'A3_difference'
 'A4_difference' 'A5_difference' 'A6_difference' 'A7_difference'
 'A8_difference' 'A9_difference' 'A10_difference' 'A11_difference'
 'A12_difference' 'A13_difference' 'A14_difference' 'A15_difference'
 'A16_difference' 'A17_difference' 'A18_difference' 'A19_difference'
 'A20_difference' 'A21_difference' 'A22_difference' 'A23_difference'
 'A24_difference' 'A25_difference' 'A26_difference' 'A27_difference'
 'A28_difference' 'B1_difference' 'B2_difference' 'B3_difference'
 'B4_difference' 'B5_difference' 'B6_difference' 'B7_difference'
 'B8_difference' 'B9_difference' 'B10_difference' 'B11_difference'
 'B12_difference' 'B13_difference' 'B14_difference' '样本id' 'A1' 'A2' 'A3'
 'A4' 'A5' 'A6' 'A7' 'A8' 'A9' 'A10' 'A11' 'A12' 'A13' 'A14' 'A15' 'A16'
 'A17' 'A18' 'A19' 'A20' 'A21' 'A22' 'A23' 'A24' 'A25' 'A26' 'A27' 'A28'
 'B1' 'B2' 'B3' 'B4' 'B5' 'B6' 'B7' 'B8' 'B9' 'B10' 'B11' 'B12' 'B13'
 'B14']
fold n°1
Training until valid

[1305]	training's l2: 0.000201897	valid_1's l2: 0.00020354
[1310]	training's l2: 0.00020163	valid_1's l2: 0.000203273
[1315]	training's l2: 0.000201391	valid_1's l2: 0.000203036
[1320]	training's l2: 0.000201103	valid_1's l2: 0.000202746
[1325]	training's l2: 0.000200794	valid_1's l2: 0.000202436
[1330]	training's l2: 0.000200488	valid_1's l2: 0.000202129
[1335]	training's l2: 0.0002002	valid_1's l2: 0.000201839
[1340]	training's l2: 0.000199905	valid_1's l2: 0.000201543
[1345]	training's l2: 0.000199628	valid_1's l2: 0.00020126
[1350]	training's l2: 0.000199311	valid_1's l2: 0.000200946
[1355]	training's l2: 0.000199039	valid_1's l2: 0.000200673
[1360]	training's l2: 0.000198748	valid_1's l2: 0.000200378
[1365]	training's l2: 0.000198495	valid_1's l2: 0.000200124
[1370]	training's l2: 0.000198255	valid_1's l2: 0.000199884
[1375]	training's l2: 0.000197891	valid_1's l2: 0.00019952
[1380]	training's l2: 0.00019764	valid_1's l2: 0.000199268
[1385]	training's l2: 0.000197418	valid_1's l2:

[2670]	training's l2: 0.000155425	valid_1's l2: 0.000156841
[2675]	training's l2: 0.000155297	valid_1's l2: 0.000156713
[2680]	training's l2: 0.00015517	valid_1's l2: 0.000156585
[2685]	training's l2: 0.000155068	valid_1's l2: 0.000156484
[2690]	training's l2: 0.000154936	valid_1's l2: 0.000156352
[2695]	training's l2: 0.000154833	valid_1's l2: 0.000156245
[2700]	training's l2: 0.00015471	valid_1's l2: 0.000156124
[2705]	training's l2: 0.0001546	valid_1's l2: 0.000156014
[2710]	training's l2: 0.000154456	valid_1's l2: 0.000155869
[2715]	training's l2: 0.000154362	valid_1's l2: 0.000155772
[2720]	training's l2: 0.000154259	valid_1's l2: 0.000155671
[2725]	training's l2: 0.000154077	valid_1's l2: 0.000155485
[2730]	training's l2: 0.000153968	valid_1's l2: 0.000155375
[2735]	training's l2: 0.000153866	valid_1's l2: 0.000155276
[2740]	training's l2: 0.000153776	valid_1's l2: 0.000155187
[2745]	training's l2: 0.000153684	valid_1's l2: 0.000155096
[2750]	training's l2: 0.000153557	valid_1's 

[1040]	training's l2: 0.000217686	valid_1's l2: 0.000216593
[1045]	training's l2: 0.000217295	valid_1's l2: 0.000216204
[1050]	training's l2: 0.000216946	valid_1's l2: 0.000215855
[1055]	training's l2: 0.000216508	valid_1's l2: 0.00021542
[1060]	training's l2: 0.000216193	valid_1's l2: 0.00021511
[1065]	training's l2: 0.000215836	valid_1's l2: 0.000214759
[1070]	training's l2: 0.000215477	valid_1's l2: 0.000214409
[1075]	training's l2: 0.000215141	valid_1's l2: 0.000214074
[1080]	training's l2: 0.000214791	valid_1's l2: 0.00021373
[1085]	training's l2: 0.000214421	valid_1's l2: 0.000213364
[1090]	training's l2: 0.000214137	valid_1's l2: 0.000213086
[1095]	training's l2: 0.000213798	valid_1's l2: 0.000212754
[1100]	training's l2: 0.000213438	valid_1's l2: 0.0002124
[1105]	training's l2: 0.000213101	valid_1's l2: 0.00021207
[1110]	training's l2: 0.000212777	valid_1's l2: 0.000211752
[1115]	training's l2: 0.000212475	valid_1's l2: 0.000211461
[1120]	training's l2: 0.000212177	valid_1's l2

[2405]	training's l2: 0.000160905	valid_1's l2: 0.000160457
[2410]	training's l2: 0.000160808	valid_1's l2: 0.000160361
[2415]	training's l2: 0.000160674	valid_1's l2: 0.000160227
[2420]	training's l2: 0.000160531	valid_1's l2: 0.000160088
[2425]	training's l2: 0.000160391	valid_1's l2: 0.000159951
[2430]	training's l2: 0.000160244	valid_1's l2: 0.000159804
[2435]	training's l2: 0.000160101	valid_1's l2: 0.000159667
[2440]	training's l2: 0.00015998	valid_1's l2: 0.000159548
[2445]	training's l2: 0.000159837	valid_1's l2: 0.000159404
[2450]	training's l2: 0.000159652	valid_1's l2: 0.000159223
[2455]	training's l2: 0.000159548	valid_1's l2: 0.000159123
[2460]	training's l2: 0.000159423	valid_1's l2: 0.000159
[2465]	training's l2: 0.000159246	valid_1's l2: 0.000158827
[2470]	training's l2: 0.000159119	valid_1's l2: 0.000158702
[2475]	training's l2: 0.000159017	valid_1's l2: 0.000158603
[2480]	training's l2: 0.000158891	valid_1's l2: 0.000158479
[2485]	training's l2: 0.000158759	valid_1's 

[770]	training's l2: 0.000240467	valid_1's l2: 0.000240628
[775]	training's l2: 0.000239962	valid_1's l2: 0.00024013
[780]	training's l2: 0.000239466	valid_1's l2: 0.000239635
[785]	training's l2: 0.00023899	valid_1's l2: 0.000239162
[790]	training's l2: 0.000238489	valid_1's l2: 0.000238671
[795]	training's l2: 0.000237989	valid_1's l2: 0.00023816
[800]	training's l2: 0.000237498	valid_1's l2: 0.000237672
[805]	training's l2: 0.000237036	valid_1's l2: 0.000237213
[810]	training's l2: 0.000236575	valid_1's l2: 0.000236755
[815]	training's l2: 0.000236137	valid_1's l2: 0.000236316
[820]	training's l2: 0.000235644	valid_1's l2: 0.000235823
[825]	training's l2: 0.0002352	valid_1's l2: 0.000235389
[830]	training's l2: 0.000234723	valid_1's l2: 0.000234917
[835]	training's l2: 0.000234246	valid_1's l2: 0.000234435
[840]	training's l2: 0.000233794	valid_1's l2: 0.000233986
[845]	training's l2: 0.000233296	valid_1's l2: 0.000233493
[850]	training's l2: 0.000232851	valid_1's l2: 0.000233045
[8

[2140]	training's l2: 0.000169565	valid_1's l2: 0.00017007
[2145]	training's l2: 0.000169456	valid_1's l2: 0.000169964
[2150]	training's l2: 0.000169294	valid_1's l2: 0.000169808
[2155]	training's l2: 0.000169117	valid_1's l2: 0.000169636
[2160]	training's l2: 0.00016897	valid_1's l2: 0.000169493
[2165]	training's l2: 0.000168837	valid_1's l2: 0.000169361
[2170]	training's l2: 0.00016871	valid_1's l2: 0.000169237
[2175]	training's l2: 0.000168603	valid_1's l2: 0.000169131
[2180]	training's l2: 0.000168465	valid_1's l2: 0.000168995
[2185]	training's l2: 0.00016834	valid_1's l2: 0.000168872
[2190]	training's l2: 0.000168216	valid_1's l2: 0.000168748
[2195]	training's l2: 0.00016811	valid_1's l2: 0.000168643
[2200]	training's l2: 0.000167951	valid_1's l2: 0.00016849
[2205]	training's l2: 0.00016786	valid_1's l2: 0.000168399
[2210]	training's l2: 0.000167728	valid_1's l2: 0.000168269
[2215]	training's l2: 0.000167492	valid_1's l2: 0.000168037
[2220]	training's l2: 0.000167315	valid_1's l2:

[505]	training's l2: 0.000278065	valid_1's l2: 0.000278862
[510]	training's l2: 0.000277087	valid_1's l2: 0.000277892
[515]	training's l2: 0.000276125	valid_1's l2: 0.000276935
[520]	training's l2: 0.000275278	valid_1's l2: 0.000276085
[525]	training's l2: 0.000274454	valid_1's l2: 0.000275262
[530]	training's l2: 0.000273507	valid_1's l2: 0.000274304
[535]	training's l2: 0.00027249	valid_1's l2: 0.000273286
[540]	training's l2: 0.000271604	valid_1's l2: 0.000272402
[545]	training's l2: 0.000270651	valid_1's l2: 0.000271451
[550]	training's l2: 0.000269794	valid_1's l2: 0.000270595
[555]	training's l2: 0.000268916	valid_1's l2: 0.000269709
[560]	training's l2: 0.000268025	valid_1's l2: 0.000268816
[565]	training's l2: 0.000267186	valid_1's l2: 0.00026798
[570]	training's l2: 0.000266368	valid_1's l2: 0.000267163
[575]	training's l2: 0.000265563	valid_1's l2: 0.000266361
[580]	training's l2: 0.000264812	valid_1's l2: 0.000265609
[585]	training's l2: 0.000264089	valid_1's l2: 0.000264887

[1875]	training's l2: 0.00017722	valid_1's l2: 0.000178204
[1880]	training's l2: 0.000177069	valid_1's l2: 0.000178052
[1885]	training's l2: 0.000176879	valid_1's l2: 0.000177862
[1890]	training's l2: 0.000176703	valid_1's l2: 0.000177688
[1895]	training's l2: 0.000176528	valid_1's l2: 0.000177511
[1900]	training's l2: 0.000176372	valid_1's l2: 0.000177356
[1905]	training's l2: 0.000176251	valid_1's l2: 0.000177235
[1910]	training's l2: 0.000176059	valid_1's l2: 0.000177044
[1915]	training's l2: 0.000175906	valid_1's l2: 0.000176892
[1920]	training's l2: 0.000175749	valid_1's l2: 0.000176738
[1925]	training's l2: 0.000175569	valid_1's l2: 0.000176556
[1930]	training's l2: 0.000175365	valid_1's l2: 0.000176354
[1935]	training's l2: 0.000175182	valid_1's l2: 0.000176171
[1940]	training's l2: 0.000175021	valid_1's l2: 0.000176011
[1945]	training's l2: 0.000174896	valid_1's l2: 0.000175885
[1950]	training's l2: 0.000174761	valid_1's l2: 0.000175747
[1955]	training's l2: 0.000174599	valid_1

[235]	training's l2: 0.000375684	valid_1's l2: 0.000376682
[240]	training's l2: 0.00037226	valid_1's l2: 0.000373266
[245]	training's l2: 0.000369207	valid_1's l2: 0.000370239
[250]	training's l2: 0.000366304	valid_1's l2: 0.000367337
[255]	training's l2: 0.0003636	valid_1's l2: 0.000364632
[260]	training's l2: 0.000360709	valid_1's l2: 0.00036174
[265]	training's l2: 0.000358192	valid_1's l2: 0.000359219
[270]	training's l2: 0.000355231	valid_1's l2: 0.000356244
[275]	training's l2: 0.000352101	valid_1's l2: 0.0003531
[280]	training's l2: 0.000349667	valid_1's l2: 0.000350655
[285]	training's l2: 0.000347333	valid_1's l2: 0.000348308
[290]	training's l2: 0.000344784	valid_1's l2: 0.000345751
[295]	training's l2: 0.000342652	valid_1's l2: 0.000343605
[300]	training's l2: 0.000340132	valid_1's l2: 0.000341078
[305]	training's l2: 0.000337561	valid_1's l2: 0.00033851
[310]	training's l2: 0.00033518	valid_1's l2: 0.000336123
[315]	training's l2: 0.000332889	valid_1's l2: 0.000333835
[320]

[1615]	training's l2: 0.0001877	valid_1's l2: 0.000188356
[1620]	training's l2: 0.000187529	valid_1's l2: 0.000188188
[1625]	training's l2: 0.00018735	valid_1's l2: 0.000188011
[1630]	training's l2: 0.000187139	valid_1's l2: 0.000187801
[1635]	training's l2: 0.000186918	valid_1's l2: 0.000187579
[1640]	training's l2: 0.000186671	valid_1's l2: 0.000187335
[1645]	training's l2: 0.000186435	valid_1's l2: 0.000187102
[1650]	training's l2: 0.000186234	valid_1's l2: 0.000186901
[1655]	training's l2: 0.000186054	valid_1's l2: 0.000186718
[1660]	training's l2: 0.000185874	valid_1's l2: 0.000186538
[1665]	training's l2: 0.000185639	valid_1's l2: 0.000186304
[1670]	training's l2: 0.00018545	valid_1's l2: 0.000186117
[1675]	training's l2: 0.000185255	valid_1's l2: 0.000185925
[1680]	training's l2: 0.000185057	valid_1's l2: 0.000185729
[1685]	training's l2: 0.00018488	valid_1's l2: 0.000185553
[1690]	training's l2: 0.000184692	valid_1's l2: 0.000185367
[1695]	training's l2: 0.0001845	valid_1's l2:

[2980]	training's l2: 0.000148454	valid_1's l2: 0.000149399
[2985]	training's l2: 0.000148376	valid_1's l2: 0.000149321
[2990]	training's l2: 0.000148284	valid_1's l2: 0.000149229
[2995]	training's l2: 0.000148213	valid_1's l2: 0.000149159
[3000]	training's l2: 0.000148151	valid_1's l2: 0.000149099
Did not meet early stopping. Best iteration is:
[3000]	training's l2: 0.000148151	valid_1's l2: 0.000149099
86
1767570


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


['样本id_difference' 'A1_difference' 'A2_difference' 'A3_difference'
 'A4_difference' 'A5_difference' 'A6_difference' 'A7_difference'
 'A8_difference' 'A9_difference' 'A10_difference' 'A11_difference'
 'A12_difference' 'A13_difference' 'A14_difference' 'A15_difference'
 'A16_difference' 'A17_difference' 'A18_difference' 'A19_difference'
 'A20_difference' 'A21_difference' 'A22_difference' 'A23_difference'
 'A24_difference' 'A25_difference' 'A26_difference' 'A27_difference'
 'A28_difference' 'B1_difference' 'B2_difference' 'B3_difference'
 'B4_difference' 'B5_difference' 'B6_difference' 'B7_difference'
 'B8_difference' 'B9_difference' 'B10_difference' 'B11_difference'
 'B12_difference' 'B13_difference' 'B14_difference' '样本id' 'A1' 'A2' 'A3'
 'A4' 'A5' 'A6' 'A7' 'A8' 'A9' 'A10' 'A11' 'A12' 'A13' 'A14' 'A15' 'A16'
 'A17' 'A18' 'A19' 'A20' 'A21' 'A22' 'A23' 'A24' 'A25' 'A26' 'A27' 'A28'
 'B1' 'B2' 'B3' 'B4' 'B5' 'B6' 'B7' 'B8' 'B9' 'B10' 'B11' 'B12' 'B13'
 'B14' 'compare_id' 'compare_target']

FileNotFoundError: [Errno 2] No such file or directory: './output/jinnan_round_submit_diff.csv'