In [1]:
import pandas as pd
import numpy as np
import os
import lightgbm
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error 
from borax.calendars.lunardate import LunarDate
import xgboost as xgb


In [1]:
def yang2nong(date):
    
    year = int(date[:4])
    month = int(date[4:6])
    day = int(date[6:8])
    
    nong =  LunarDate.from_solar_date(year, month, day)
    
    res = nong.__format__('%y-%m-%d')
    return res

def create_features(dataframe):
    dataframe['date'] = pd.to_datetime(dataframe['date'])
    dataframe['month'] = dataframe.date.dt.month
    dataframe['day_of_month'] = dataframe.date.dt.day
    dataframe['day_of_year'] = dataframe.date.dt.dayofyear
    dataframe['week_of_year'] = dataframe.date.dt.weekofyear
    dataframe['day_of_week'] = dataframe.date.dt.dayofweek + 1
    dataframe['year'] = dataframe.date.dt.year
    dataframe['is_wknd'] = dataframe.date.dt.weekday // 4
    dataframe['is_month_start'] = dataframe.date.dt.is_month_start.astype(int)
    dataframe['is_month_end'] = dataframe.date.dt.is_month_end.astype(int)
    dataframe['quarter'] = dataframe.date.dt.quarter
    dataframe['week_block_num'] = [int(x) for x in np.floor((dataframe.date - pd.to_datetime('2017-12-31')).dt.days / 7) + 1]
    dataframe['quarter_block_num'] = (dataframe['year'] - 2018) * 4 + dataframe['quarter']
    dataframe['week_of_month'] = dataframe['week_of_year'].values // 4.35

    #新增星期几时间变量
    dataframe['is_Mon'] = np.where(dataframe['day_of_week'] == 1, 1, 0)                                                                                       
    dataframe['is_Tue'] = np.where(dataframe['day_of_week'] == 2, 1, 0)                                                                                         
    dataframe['is_Wed'] = np.where(dataframe['day_of_week'] == 3, 1, 0)                                                                                         
    dataframe['is_Thu'] = np.where(dataframe['day_of_week'] == 4, 1, 0)                                                                                         
    dataframe['is_Fri'] = np.where(dataframe['day_of_week'] == 5, 1, 0)                                                                                         
    dataframe['is_Sat'] = np.where(dataframe['day_of_week'] == 6, 1, 0)                                                                                         
    dataframe['is_Sun'] = np.where(dataframe['day_of_week'] == 7, 1, 0)
    #新增每月上中下旬
    dataframe['day_of_month_10days']=np.where((dataframe['day_of_month']<=10) == 1, 1, 0)
    dataframe['day_of_month_20days']=np.where(((dataframe['day_of_month']>10)&(dataframe['day_of_month']<=20)) == 1, 1, 0)
    dataframe['day_of_month_30days']=np.where((dataframe['day_of_month']>20) == 1, 1, 0)
    dataframe['day_of_year_>180days']=np.where((dataframe['day_of_year']>180) == 1, 1, 0)
    dataframe['day_of_year_<180days']=np.where((dataframe['day_of_year']<=180) == 1, 1, 0)

    dataframe['abs_month']=dataframe['day_of_month'].apply(lambda x: abs(x-16)+1)
    
    
    dataframe['CH_month'] = dataframe.China_date.dt.month  #农历月
    dataframe['CH_day_of_month'] = dataframe.China_date.dt.day #农历日
    dataframe['CH_day_of_year'] = dataframe.China_date.dt.dayofyear #农历的哪一天
    dataframe['abs_year']=dataframe['CH_day_of_year'].apply(lambda x: abs(x-182)+1)    
    
    #dataframe.drop(['date','post_id'],axis=1,inplace=True)
    return dataframe

In [3]:
#数据读取与wkd表连接
train_df=pd.read_csv('../data/train.csv')
train_df.columns=['date','A','B']
train_df.date=pd.to_datetime(train_df.date)
test_day=pd.read_csv('../data/test.csv')#按天计算
wkd=pd.read_csv('../data/wkd_v1.csv')
wkd=wkd.rename(columns={'ORIG_DT':'date'})
wkd.date=pd.to_datetime(wkd.date)
train_df=train_df.merge(wkd,on='date',how='left')
test_day.columns = ['date','A','B']
test_day.date = pd.to_datetime(test_day.date)
test_day = test_day.merge(wkd,on='date',how='left')

#进行农历转换
test_day['date1'] = test_day.date.dt.strftime('%Y%m%d')
test_day['China_date']=test_day['date1'].apply(lambda x: yang2nong(x))
#农历2月不同与公历2月，
#因此采用前向填充方式以构建pandas时间戳，难题在与20年存在的闰四月现象    
test_day.replace(['2018-2-29','2018-2-30','2019-2-29',
            '2020-2-29','2020-2-30'],np.nan,inplace=True)
    
test_day['China_date']=test_day['China_date'].fillna(method='ffill')
test_day['China_date'] = pd.to_datetime(test_day['China_date'])

#进行农历转换
train_df['date1'] = train_df.date.dt.strftime('%Y%m%d')
train_df['China_date']=train_df['date1'].apply(lambda x: yang2nong(x))
#农历2月不同与公历2月，
#因此采用前向填充方式以构建pandas时间戳，难题在与20年存在的闰四月现象    
train_df.replace(['2018-2-29','2018-2-30','2019-2-29',
            '2020-2-29','2020-2-30'],np.nan,inplace=True)
    
train_df['China_date']=train_df['China_date'].fillna(method='ffill')
train_df['China_date'] = pd.to_datetime(train_df['China_date'])

In [5]:
train_day_df_A = train_df[['date','A','WKD_TYP_CD','China_date']]
train_day_df_B = train_df[['date','B','WKD_TYP_CD','China_date']]

train_day_df_A=create_features(train_day_df_A)
train_day_df_B=create_features(train_day_df_B)

train_day_df_A['A']=train_day_df_A['A']
train_day_df_B['B']=train_day_df_B['B']

test_day_df_A = test_day[['date','A','WKD_TYP_CD','China_date']]
test_day_df_B = test_day[['date','B','WKD_TYP_CD','China_date']]
test_day_df_A=create_features(test_day_df_A)
test_day_df_B=create_features(test_day_df_B)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pa

In [31]:
def my_mape(real_value, pre_value): 
    real_value, pre_value = np.array(real_value), np.array(pre_value)
    return np.mean(np.abs((real_value - pre_value) /( real_value+1)))
def eval_score(pre, train_set):
    real = train_set.get_label()
    score = my_mape(real, pre)
    return 'eval_score', score
def xgb_model2(train_x, train_y, test_x):
    predictors = list(train_x.columns)
    train_x = train_x.values
    test_x = test_x.values
    folds = 10
    seed = 2021
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
    train = np.zeros((train_x.shape[0]))
    test = np.zeros((test_x.shape[0]))
    test_pre = np.zeros((folds, test_x.shape[0]))
    total_prevalue_test = np.zeros((folds, test_x.shape[0]))
    cv_scores = []
    cv_rounds = []

    for i, (train_index, test_index) in enumerate(kf.split(train_x, train_y)):
        print("Fold", i)
        X = train_x[train_index]
        Y = train_y[train_index]
        fol_x = train_x[test_index]
        fol_y = train_y[test_index]
        train_matrix = xgb.DMatrix(X, label=Y)
        test_matrix = xgb.DMatrix(fol_x, label=fol_y)
        evals = [(train_matrix, 'train'), (test_matrix, 'val')]
        params = {
            'booster': 'gbtree',
            'objective': 'reg:squarederror',
            'min_child_weight':10,
            'max_depth': 10,
            'colsample_bylevel':0.8,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'learning_rate': 0.1,
            'seed': 2021,
            'nthread': 8,
        }
        num_round = 4000
        early_stopping_rounds = 200
        if test_matrix:
            model = xgb.train(params, train_matrix, num_round, evals=evals, verbose_eval=200,feval=eval_score,
                              early_stopping_rounds=early_stopping_rounds
                              )

            pre = model.predict(xgb.DMatrix(fol_x),ntree_limit = model.best_iteration)
            pred = model.predict(xgb.DMatrix(test_x),ntree_limit = model.best_iteration)
            train[test_index] = pre
            test_pre[i, :] = pred
            cv_scores.append(mean_squared_error (fol_y, pre))
            cv_rounds.append(model.best_iteration)
            total_prevalue_test[i, :] = pred
            
    val_pre = model.predict(xgb.DMatrix(train_x[-30:]))
    print(f"A厂的验证集mape为{mape(val_pre, train_y[-30:])}")
    
    print("error_score is:", cv_scores)
    test[:] = test_pre.mean(axis=0)
    #-----------------------------------------
    print("val_mean:" , np.mean(cv_scores))
    print("val_std:", np.std(cv_scores))
    return model,train, test, total_prevalue_test, np.mean(cv_scores)

def mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / (y_true))) 

In [32]:
train_day_df_A.columns

Index(['date', 'A', 'WKD_TYP_CD', 'China_date', 'month', 'day_of_month',
       'day_of_year', 'week_of_year', 'day_of_week', 'year', 'is_wknd',
       'is_month_start', 'is_month_end', 'quarter', 'week_block_num',
       'quarter_block_num', 'week_of_month', 'is_Mon', 'is_Tue', 'is_Wed',
       'is_Thu', 'is_Fri', 'is_Sat', 'is_Sun', 'day_of_month_10days',
       'day_of_month_20days', 'day_of_month_30days', 'day_of_year_>180days',
       'day_of_year_<180days', 'month_WKD_TYP_CD', 'month_length',
       'week_WKD_TYP_CD', 'm10days_WKD_TYP_CD', 'm20days_WKD_TYP_CD',
       'm30days_WKD_TYP_CD', 'abs_month', 'CH_month', 'CH_day_of_month',
       'CH_day_of_year', 'abs_year'],
      dtype='object')

In [34]:
if __name__=="__main__":
    #-----------------------树模型-----------------------
    feature=['day_of_year', 'day_of_month', 'week_of_year',
            'week_of_year', 'week_block_num', 'CH_day_of_month',
            'CH_day_of_year', 'abs_year',]
    #-------筛选数据月份---------
    month_num=3
    #----------------------------
    train_input=train_day_df_A#训练集
    train_input=train_input[(train_input['date']>='2019-07-01')].reset_index(drop=True)
    test_input=test_day_df_A#测试集
    train_x = train_input[feature].copy()
    train_y = train_input['A']
    test_x = test_input[feature].copy()
    print('特征维度A：',train_x.shape)
    #model,xgb_train, xgb_test, ol, cv_scores = xgb_model2(train_x, train_y, test_x)
    model,xgb_train, xgb_test, ol, cv_scores = xgb_model2(train_x, train_y, test_x)
    xgb_test_A=[i if i>0 else 0 for i in xgb_test]




    train_input=train_day_df_B#训练集
    train_input=train_input[(train_input['date']>='2019-06-01') ].reset_index(drop=True)
    test_input=test_day_df_B#测试集
    train_x = train_input[feature].copy()
    train_y = train_input['B']
    test_x = test_input[feature].copy()
    print('特征维度B：',train_x.shape)
    model,xgb_train, xgb_test, ol, cv_scores = xgb_model2(train_x, train_y, test_x)
    xgb_test_B=[i if i>0 else 0 for i in xgb_test]

    #
    #------------------拼接文件------------------
    pre_period=[]
    pre_hour_A=xgb_test_A
    pre_hour_B=xgb_test_B


特征维度A： (489, 8)
Fold 0
[0]	train-rmse:203734.15625	train-eval_score:0.89868	val-rmse:206782.93750	val-eval_score:0.89876
[200]	train-rmse:3619.38013	train-eval_score:0.01267	val-rmse:13582.57617	val-eval_score:0.04145
[400]	train-rmse:2041.80798	train-eval_score:0.00716	val-rmse:13560.96484	val-eval_score:0.04040
[509]	train-rmse:1568.26355	train-eval_score:0.00555	val-rmse:13575.98731	val-eval_score:0.04027
Fold 1
[0]	train-rmse:203852.01562	train-eval_score:0.89861	val-rmse:205201.37500	val-eval_score:0.89880




[200]	train-rmse:3831.87524	train-eval_score:0.01336	val-rmse:12387.78711	val-eval_score:0.04288
[335]	train-rmse:2541.49268	train-eval_score:0.00890	val-rmse:12256.57324	val-eval_score:0.04250
Fold 2
[0]	train-rmse:204092.92188	train-eval_score:0.89874	val-rmse:203367.00000	val-eval_score:0.89748




[200]	train-rmse:3834.43799	train-eval_score:0.01332	val-rmse:8401.84766	val-eval_score:0.03030
[400]	train-rmse:2150.81177	train-eval_score:0.00746	val-rmse:8003.97900	val-eval_score:0.02907
[600]	train-rmse:1371.20850	train-eval_score:0.00482	val-rmse:7965.81055	val-eval_score:0.02921
[692]	train-rmse:1131.75366	train-eval_score:0.00399	val-rmse:8011.36133	val-eval_score:0.02927
Fold 3
[0]	train-rmse:204113.10938	train-eval_score:0.89908	val-rmse:202943.37500	val-eval_score:0.89816




[200]	train-rmse:3768.34082	train-eval_score:0.01312	val-rmse:11109.76172	val-eval_score:0.03489
[400]	train-rmse:2111.01147	train-eval_score:0.00751	val-rmse:11384.75293	val-eval_score:0.03458
[600]	train-rmse:1337.99280	train-eval_score:0.00483	val-rmse:11556.85840	val-eval_score:0.03529
[609]	train-rmse:1312.53149	train-eval_score:0.00474	val-rmse:11567.55078	val-eval_score:0.03527
Fold 4
[0]	train-rmse:204619.40625	train-eval_score:0.89866	val-rmse:198138.39062	val-eval_score:0.89747




[200]	train-rmse:3720.15454	train-eval_score:0.01315	val-rmse:10440.31055	val-eval_score:0.03423
[381]	train-rmse:2140.20947	train-eval_score:0.00758	val-rmse:10580.64648	val-eval_score:0.03515
Fold 5
[0]	train-rmse:203403.84375	train-eval_score:0.89857	val-rmse:210043.54688	val-eval_score:0.90135




[200]	train-rmse:3747.14893	train-eval_score:0.01331	val-rmse:12109.47656	val-eval_score:0.04533
[400]	train-rmse:2093.92944	train-eval_score:0.00757	val-rmse:11902.20996	val-eval_score:0.04376
[600]	train-rmse:1370.89380	train-eval_score:0.00501	val-rmse:11869.74414	val-eval_score:0.04321
[800]	train-rmse:938.68854	train-eval_score:0.00345	val-rmse:11819.48926	val-eval_score:0.04287
[1000]	train-rmse:666.24213	train-eval_score:0.00246	val-rmse:11780.93848	val-eval_score:0.04269
[1200]	train-rmse:477.83301	train-eval_score:0.00179	val-rmse:11780.74512	val-eval_score:0.04261
[1395]	train-rmse:351.10223	train-eval_score:0.00131	val-rmse:11801.47949	val-eval_score:0.04267
Fold 6
[0]	train-rmse:204160.01562	train-eval_score:0.89867	val-rmse:202405.20312	val-eval_score:0.89810




[200]	train-rmse:3763.95215	train-eval_score:0.01304	val-rmse:10607.45898	val-eval_score:0.03602
[400]	train-rmse:2073.89282	train-eval_score:0.00735	val-rmse:10669.75195	val-eval_score:0.03548
[600]	train-rmse:1341.49206	train-eval_score:0.00482	val-rmse:10740.30566	val-eval_score:0.03544
[679]	train-rmse:1145.33460	train-eval_score:0.00414	val-rmse:10796.63281	val-eval_score:0.03570
Fold 7
[0]	train-rmse:204267.42188	train-eval_score:0.89846	val-rmse:201224.15625	val-eval_score:0.89749




[200]	train-rmse:3725.09937	train-eval_score:0.01293	val-rmse:11277.42969	val-eval_score:0.04093
[335]	train-rmse:2490.71606	train-eval_score:0.00876	val-rmse:11178.09277	val-eval_score:0.04111
Fold 8
[0]	train-rmse:203882.45312	train-eval_score:0.89886	val-rmse:205442.37500	val-eval_score:0.89856




[200]	train-rmse:3814.46777	train-eval_score:0.01324	val-rmse:9915.38086	val-eval_score:0.03401
[400]	train-rmse:2191.24829	train-eval_score:0.00775	val-rmse:10012.91797	val-eval_score:0.03444
[455]	train-rmse:1939.63818	train-eval_score:0.00690	val-rmse:9986.69629	val-eval_score:0.03429
Fold 9
[0]	train-rmse:203928.73438	train-eval_score:0.89834	val-rmse:204031.18750	val-eval_score:0.89751




[200]	train-rmse:3816.46436	train-eval_score:0.01304	val-rmse:10203.63086	val-eval_score:0.03937
[400]	train-rmse:2107.81006	train-eval_score:0.00730	val-rmse:9907.56934	val-eval_score:0.03827
[600]	train-rmse:1324.91419	train-eval_score:0.00470	val-rmse:9758.48535	val-eval_score:0.03757
[800]	train-rmse:878.24890	train-eval_score:0.00318	val-rmse:9793.95508	val-eval_score:0.03757
[921]	train-rmse:702.96668	train-eval_score:0.00255	val-rmse:9823.77539	val-eval_score:0.03770
A厂的验证集mape为0.008533877773645235
error_score is: [184095659.23677656, 147418552.14947385, 62813950.4684822, 129199789.31174766, 108431327.41506398, 138766666.37423396, 113880654.95063376, 125530480.7482711, 97821097.68815768, 95492101.07125854]
val_mean: 120345027.94140992
val_std: 31401139.8639408
特征维度B： (519, 8)
Fold 0
[0]	train-rmse:186954.89062	train-eval_score:0.89919	val-rmse:181943.29688	val-eval_score:0.89802




[200]	train-rmse:2698.83716	train-eval_score:0.01000	val-rmse:10100.61621	val-eval_score:0.03225
[400]	train-rmse:1394.92346	train-eval_score:0.00547	val-rmse:10123.62207	val-eval_score:0.03170
[526]	train-rmse:1002.79718	train-eval_score:0.00403	val-rmse:10155.75977	val-eval_score:0.03150
Fold 1
[0]	train-rmse:185903.35938	train-eval_score:0.89946	val-rmse:191660.59375	val-eval_score:0.90116




[200]	train-rmse:2536.68237	train-eval_score:0.00905	val-rmse:11737.42090	val-eval_score:0.03879
[400]	train-rmse:1294.93982	train-eval_score:0.00503	val-rmse:11187.39356	val-eval_score:0.03606
[600]	train-rmse:775.36554	train-eval_score:0.00313	val-rmse:11006.73438	val-eval_score:0.03500
[800]	train-rmse:506.47592	train-eval_score:0.00212	val-rmse:10931.25586	val-eval_score:0.03478
[1000]	train-rmse:343.39182	train-eval_score:0.00144	val-rmse:10904.73926	val-eval_score:0.03471
[1078]	train-rmse:296.59699	train-eval_score:0.00126	val-rmse:10900.94043	val-eval_score:0.03471
Fold 2
[0]	train-rmse:185954.73438	train-eval_score:0.89959	val-rmse:190977.68750	val-eval_score:0.90074




[200]	train-rmse:2642.89014	train-eval_score:0.00967	val-rmse:5160.33838	val-eval_score:0.02000
[400]	train-rmse:1378.57117	train-eval_score:0.00526	val-rmse:5211.40723	val-eval_score:0.01986
[578]	train-rmse:875.93787	train-eval_score:0.00344	val-rmse:5296.90137	val-eval_score:0.02017
Fold 3
[0]	train-rmse:186051.20312	train-eval_score:0.89931	val-rmse:190063.37500	val-eval_score:0.90004




[200]	train-rmse:2519.51294	train-eval_score:0.00929	val-rmse:7820.83594	val-eval_score:0.02807
[400]	train-rmse:1284.24463	train-eval_score:0.00507	val-rmse:8051.57373	val-eval_score:0.02806
[600]	train-rmse:759.69037	train-eval_score:0.00316	val-rmse:8061.21582	val-eval_score:0.02779
[691]	train-rmse:618.79327	train-eval_score:0.00262	val-rmse:8064.85986	val-eval_score:0.02779
Fold 4
[0]	train-rmse:187425.59375	train-eval_score:0.89972	val-rmse:177323.56250	val-eval_score:0.89611




[200]	train-rmse:2729.21704	train-eval_score:0.00985	val-rmse:6133.07129	val-eval_score:0.02549
[400]	train-rmse:1410.68848	train-eval_score:0.00541	val-rmse:5770.90430	val-eval_score:0.02364
[600]	train-rmse:845.47455	train-eval_score:0.00333	val-rmse:5656.52930	val-eval_score:0.02314
[800]	train-rmse:546.92328	train-eval_score:0.00219	val-rmse:5619.10596	val-eval_score:0.02283
[1000]	train-rmse:365.20410	train-eval_score:0.00150	val-rmse:5602.24121	val-eval_score:0.02277
[1200]	train-rmse:253.56630	train-eval_score:0.00104	val-rmse:5589.68652	val-eval_score:0.02266
[1400]	train-rmse:177.88223	train-eval_score:0.00072	val-rmse:5566.87939	val-eval_score:0.02257
[1600]	train-rmse:127.66305	train-eval_score:0.00052	val-rmse:5562.77441	val-eval_score:0.02257
[1687]	train-rmse:111.21701	train-eval_score:0.00045	val-rmse:5565.08936	val-eval_score:0.02259
Fold 5
[0]	train-rmse:187058.37500	train-eval_score:0.89926	val-rmse:180560.28125	val-eval_score:0.89904




[200]	train-rmse:2627.93604	train-eval_score:0.00974	val-rmse:5647.12305	val-eval_score:0.01962
[400]	train-rmse:1334.87317	train-eval_score:0.00522	val-rmse:5454.14307	val-eval_score:0.01816
[567]	train-rmse:853.80292	train-eval_score:0.00341	val-rmse:5400.87207	val-eval_score:0.01862
Fold 6
[0]	train-rmse:186565.46875	train-eval_score:0.89928	val-rmse:184246.76562	val-eval_score:0.89696




[200]	train-rmse:2524.94336	train-eval_score:0.00915	val-rmse:9183.11231	val-eval_score:0.03121
[400]	train-rmse:1357.09436	train-eval_score:0.00520	val-rmse:8832.81445	val-eval_score:0.02991
[600]	train-rmse:810.78345	train-eval_score:0.00323	val-rmse:8647.44043	val-eval_score:0.02923
[800]	train-rmse:528.73724	train-eval_score:0.00216	val-rmse:8563.47754	val-eval_score:0.02902
[1000]	train-rmse:355.00967	train-eval_score:0.00147	val-rmse:8535.03320	val-eval_score:0.02894
[1200]	train-rmse:247.73035	train-eval_score:0.00103	val-rmse:8508.23047	val-eval_score:0.02883
[1400]	train-rmse:176.78198	train-eval_score:0.00075	val-rmse:8494.35938	val-eval_score:0.02881
[1600]	train-rmse:126.48299	train-eval_score:0.00053	val-rmse:8484.50293	val-eval_score:0.02878
[1800]	train-rmse:91.12573	train-eval_score:0.00038	val-rmse:8482.68555	val-eval_score:0.02879
[1824]	train-rmse:87.76920	train-eval_score:0.00037	val-rmse:8480.04199	val-eval_score:0.02878
Fold 7
[0]	train-rmse:186224.15625	train-eva



[200]	train-rmse:2709.80127	train-eval_score:0.00987	val-rmse:7606.75879	val-eval_score:0.02346
[400]	train-rmse:1355.98059	train-eval_score:0.00535	val-rmse:7000.34180	val-eval_score:0.02193
[600]	train-rmse:820.75745	train-eval_score:0.00338	val-rmse:6910.53369	val-eval_score:0.02186
[786]	train-rmse:542.10529	train-eval_score:0.00230	val-rmse:6897.01416	val-eval_score:0.02193
Fold 8
[0]	train-rmse:186369.42188	train-eval_score:0.89977	val-rmse:187320.32812	val-eval_score:0.89867




[200]	train-rmse:2527.95264	train-eval_score:0.00927	val-rmse:10525.94043	val-eval_score:0.02848
[400]	train-rmse:1304.11023	train-eval_score:0.00505	val-rmse:10317.87207	val-eval_score:0.02828
[480]	train-rmse:1050.57654	train-eval_score:0.00413	val-rmse:10314.51465	val-eval_score:0.02832
Fold 9
[0]	train-rmse:185972.85938	train-eval_score:0.89958	val-rmse:190685.87500	val-eval_score:0.89989




[200]	train-rmse:2527.45459	train-eval_score:0.00928	val-rmse:6374.47607	val-eval_score:0.02301
[400]	train-rmse:1279.19568	train-eval_score:0.00504	val-rmse:6367.82178	val-eval_score:0.02180
[600]	train-rmse:773.60675	train-eval_score:0.00313	val-rmse:6448.36475	val-eval_score:0.02202
[675]	train-rmse:658.31433	train-eval_score:0.00267	val-rmse:6469.88281	val-eval_score:0.02214
A厂的验证集mape为0.004919269247579271
error_score is: [102224891.28655185, 118854778.68600228, 26770257.71840844, 65084974.40166649, 30902313.915728055, 29552442.37129446, 71925437.45011666, 47800408.018996015, 108634680.92295016, 40989282.95833453]
val_mean: 64273946.773004904
val_std: 33144754.788244374




In [35]:
test_v11 = pd.read_csv('../data/test.csv')
test_v11['A厂'] = xgb_test_A
test_v11['B厂'] = xgb_test_B
test_v11['A厂']=test_v11['A厂'].astype(int)
test_v11['B厂']=test_v11['B厂'].astype(int)
test_v11

Unnamed: 0,日期,A厂,B厂
0,2020/11/01,263459,230627
1,2020/11/02,265694,231646
2,2020/11/03,265895,233853
3,2020/11/04,261536,238313
4,2020/11/05,259148,239645
...,...,...,...
146,2021/03/27,231128,156483
147,2021/03/28,229129,154746
148,2021/03/29,228212,155870
149,2021/03/30,228051,155829


In [36]:
test_v11.to_csv('./data/xgb_fold_play.csv',index=False)

In [50]:
df1 = pd.read_csv('./data/xgb_fold_play___guonian_999.csv')
df1.columns=['date','A','B']
df1

Unnamed: 0,date,A,B
0,2020/11/1,272186.000,236361.000
1,2020/11/2,266976.000,243226.000
2,2020/11/3,268541.000,240503.000
3,2020/11/4,266992.000,236450.000
4,2020/11/5,267706.000,238724.000
...,...,...,...
146,2021/3/27,275920.803,259501.239
147,2021/3/28,274450.275,261258.480
148,2021/3/29,280419.300,263156.580
149,2021/3/30,280221.498,263087.649


In [51]:
test_v11 = pd.read_csv('../data/test.csv')
test_v11['A厂'] = df1.A.astype(int)
test_v11['B厂'] = df1.B.astype(int)
test_v11

Unnamed: 0,日期,A厂,B厂
0,2020/11/01,272186,236361
1,2020/11/02,266976,243226
2,2020/11/03,268541,240503
3,2020/11/04,266992,236450
4,2020/11/05,267706,238724
...,...,...,...
146,2021/03/27,275920,259501
147,2021/03/28,274450,261258
148,2021/03/29,280419,263156
149,2021/03/30,280221,263087


In [52]:
test_v11.to_csv('./data/xgb_fold_play___guonian_999.csv',index=False)

In [40]:
df1 = pd.read_csv('./data/xgb_fold_play___.csv')
df1.columns=['date','A','B']

df2 = pd.read_csv('./data/new_prophet_A_B_.csv')
df2.columns=['date','A','B']

In [41]:
test_v11 = pd.read_csv('../data/test.csv')
test_v11['A厂'] = ((df1['A']+df2['A'])/2).astype(int)
test_v11['B厂'] = ((df1['B']+df2['B'])/2).astype(int)

test_v11

Unnamed: 0,日期,A厂,B厂
0,2020/11/01,272186,236361
1,2020/11/02,266976,243226
2,2020/11/03,268541,240503
3,2020/11/04,266992,236450
4,2020/11/05,267706,238724
...,...,...,...
146,2021/03/27,276335,259488
147,2021/03/28,274520,261528
148,2021/03/29,280436,263557
149,2021/03/30,280506,263447


In [42]:
test_v11.to_csv('./data/935mean_best.csv',index=False,encoding = 'utf-8')

In [None]:
A厂的验证集mape为0.008533877773645235
A厂的验证集mape为0.004919269247579271
        params = {
            'booster': 'gbtree',
            'objective': 'reg:squarederror',
            'min_child_weight':10,
            'max_depth': 10,
            'colsample_bylevel':0.8,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'learning_rate': 0.1,
            'seed': 2021,
            'nthread': 8,
        }