In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from icecream import ic
from sklearn.preprocessing import LabelEncoder
import time
from itertools import product
from icecream import ic


plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

sales_train = pd.read_csv('./data/sales_train.csv')
test = pd.read_csv('./data/test.csv')   # (214200, 3)

# 计算每个商品每个月的销售量，假如某个商品在某个月没有数据，则填充0（即这个月的销售量为0）
sales_by_item_id = sales_train.pivot_table(index=['item_id'], values=['item_cnt_day'], columns='date_block_num', aggfunc=np.sum, fill_value=0).reset_index()
sales_by_item_id.columns = sales_by_item_id.columns.droplevel().map(str)
sales_by_item_id.columns.values[0] = 'item_id'
sales_by_item_id = sales_by_item_id.rename_axis(None, axis=1)

# 获取最近6个月销售量为0的数据
# six_zero = sales_by_item_id[(sales_by_item_id['28'] == 0) & (sales_by_item_id['29'] == 0) & (sales_by_item_id['30'] == 0) & (sales_by_item_id['31'] == 0) & (sales_by_item_id['32'] == 0) & (sales_by_item_id['33'] == 0)]
# six_zero_item_id = list(six_zero['item_id'].values)   # item_id列表
# test.loc[test.item_id.isin(six_zero_item_id), 'item_cnt_month'] = 0  # 将test数据中（最近六个月销量为0）的数据月销量设为0，有7812个

# 计算每个商店每个月的销量
sales_by_shop_id = sales_train.pivot_table(index=['shop_id'], values=['item_cnt_day'], aggfunc=np.sum, fill_value=0, columns='date_block_num').reset_index()
sales_by_shop_id.columns = sales_by_shop_id.columns.droplevel().map(str)    # 将两层column转化为一层column,保留下层column
sales_by_shop_id.columns.values[0] = 'shop_id'
sales_by_shop_id = sales_by_shop_id.rename_axis(None, axis=1)   # 将列方向的轴重命名为none

# zero = sales_train[sales_train.date_block_num==0]
# ic(zero.shop_id.unique(), len(zero.item_id.unique()), len(zero.shop_id.unique()), len(zero.shop_id.unique()) * len(zero.item_id.unique()))
# ic(sales_train.shop_id.unique(), len(sales_train.item_id.unique()), len(sales_train.shop_id.unique()), len(sales_train.shop_id.unique()) * len(sales_train.item_id.unique()))

"""组合date_block_num,shop_id,item_id(部分) 总量：10913850"""
matrix = []
cols = ['date_block_num','shop_id','item_id']
for i in range(34):
    sales = sales_train[sales_train.date_block_num==i]
    matrix.append(np.array(list(product([i], sales.shop_id.unique(), sales.item_id.unique())), dtype='int16'))
matrix = pd.DataFrame(np.vstack(matrix), columns=cols)
matrix['date_block_num'] = matrix['date_block_num'].astype(np.int8)
matrix['shop_id'] = matrix['shop_id'].astype(np.int8)
matrix.sort_values(cols, inplace=True)  # 排序
sales_train['revenue'] = sales_train['item_price'] * sales_train['item_cnt_day']    # 某一天的销售额

# 分组
groupby = sales_train.groupby(['shop_id','item_id','date_block_num']).agg({'item_cnt_day': 'sum'}).reset_index()
groupby = groupby.rename(columns={'item_cnt_day': 'item_cnt_month'})
matrix = matrix.merge(groupby, on=['date_block_num','shop_id','item_id'], how='left')
matrix['item_cnt_month'] = matrix['item_cnt_month'].fillna(0).clip(0, 20)
matrix['item_cnt_month'] = matrix['item_cnt_month'].astype(np.float16)

# test数据
test['date_block_num'] = 34
test['date_block_num'] = test['date_block_num'].astype(np.int8)
test['shop_id'] = test['shop_id'].astype(np.int8)
test['item_id'] = test['item_id'].astype(np.int16)

# 合并matrix,test
matrix = pd.concat([matrix, test[cols]], ignore_index=True, axis=0)
matrix['item_cnt_month'].fillna(0, inplace=True)

# 商品信息
items = pd.read_csv('./data/items.csv')
items = items[['item_id', 'item_category_id']]
matrix = pd.merge(left=matrix, right=items, on='item_id', how='left')  # 合并

# 商品类别
le = LabelEncoder()
categories = pd.read_csv('./data/item_categories.csv')
categories['split'] = categories['item_category_name'].str.split('-')
categories['type'] = categories['split'].map(lambda x:x[0].strip())
categories['subtype'] = categories['split'].map(lambda x:x[1].strip() if len(x)>1 else x[0].strip())
categories = categories[['item_category_id','type','subtype']]
categories['cat_type_code'] = le.fit_transform(categories['type'])
categories['cat_subtype_code'] = le.fit_transform(categories['subtype'])
matrix = pd.merge(left=matrix, right=categories[['item_category_id','cat_type_code','cat_subtype_code']], on='item_category_id', how='left')    # 合并

# 商店信息
shops = pd.read_csv('./data/shops.csv')
shops['split']=shops.shop_name.str.split(' ')
shops['shop_city'] = shops['split'].map(lambda x:x[0])
shops['shop_city_code'] = le.fit_transform(shops['shop_city'])

def st(name):
    ic(name)
    if 'ТЦ' in name or 'ТРЦ' in name:
        shopt = 'ТЦ'
    elif 'ТК' in name:
        shopt = 'ТК'
    elif 'ТРК' in name:
        shopt = 'ТРК'
    elif 'МТРЦ' in name:
        shopt = 'МТРЦ'
    else:
        shopt = 'UNKNOWN'
    return shopt
shops['shop_type'] = shops['shop_name'].apply(st)

shops.loc[shops.shop_id == 21, 'shop_type'] = 'МТРЦ'   # 修正
shops['shop_type_code'] = le.fit_transform(shops['shop_type'])
matrix = pd.merge(left=matrix, right=shops[['shop_id','shop_city_code','shop_type_code']], on='shop_id', how='left')    # 合并
matrix['item_category_id'] = matrix['item_category_id'].astype(np.int8)
matrix['cat_type_code'] = matrix['cat_type_code'].astype(np.int8)
matrix['cat_subtype_code'] = matrix['cat_subtype_code'].astype(np.int8)
matrix['shop_city_code'] = matrix['shop_city_code'].astype(np.int8)
matrix['shop_type_code'] = matrix['shop_type_code'].astype(np.int8)


"""历史信息"""

def lag_features(df, lags, col):
    tmp = df[['date_block_num','shop_id','item_id',col]]
    for i in lags:
        shifted = tmp.copy()
        shifted.columns = ['date_block_num','shop_id','item_id',col+'_lag_'+str(i)]
        shifted['date_block_num'] = shifted['date_block_num'] + i
        df = pd.merge(left=df, right=shifted, on=['date_block_num','shop_id','item_id'], how='left')
    return df

# 月销量（所有商品）
group = matrix.groupby('date_block_num').agg({'item_cnt_month': 'mean'}).reset_index()
group.columns = ['date_block_num', 'date_avg_item_cnt']
matrix = pd.merge(left=matrix, right=group, on='date_block_num', how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_avg_item_cnt')
matrix.drop('date_avg_item_cnt', axis=1, inplace=True)

# 月销量（每一件商品）
group = matrix.groupby(['date_block_num', 'item_id']).agg({'item_cnt_month': ['mean']})
group.columns = [ 'date_item_avg_item_cnt' ]
group.reset_index(inplace=True)
matrix = pd.merge(left=matrix, right=group, on=['date_block_num','item_id'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_item_avg_item_cnt')
matrix.drop('date_item_avg_item_cnt', axis=1, inplace=True)

# 月销量（每个商店 ）
group = matrix.groupby(['date_block_num','shop_id']).agg({'item_cnt_month': 'mean'})
group.columns = ['date_shop_avg_item_cnt']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on=['date_block_num','shop_id'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_shop_avg_item_cnt')
matrix.drop('date_shop_avg_item_cnt', axis=1, inplace=True)

# 月销量（每个类别）
group = matrix.groupby(['date_block_num','item_category_id']).agg({'item_cnt_month': 'mean'})
group.columns = ['date_cat_avg_item_cnt']
group = group.reset_index()
matrix=pd.merge(left=matrix, right=group, on=['date_block_num','item_category_id'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_cat_avg_item_cnt')
matrix.drop('date_cat_avg_item_cnt', axis=1, inplace=True)

# 月销量（商品类别-商店）
group = matrix.groupby(['date_block_num','item_category_id','shop_id']).agg({'item_cnt_month': 'mean'})
group.columns = ['date_cat_shop_avg_item_cnt']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on=['date_block_num','item_category_id','shop_id'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_cat_shop_avg_item_cnt')
matrix.drop('date_cat_shop_avg_item_cnt', axis=1, inplace=True)

# 月销量（商品大类）
group = matrix.groupby(['date_block_num','cat_type_code']).agg({'item_cnt_month': 'mean'})
group.columns = ['date_type_avg_item_cnt']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on=['date_block_num','cat_type_code'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_type_avg_item_cnt')
matrix.drop('date_type_avg_item_cnt', axis=1, inplace=True)

# 月销量（商品-商品大类） ++++++++++++ 和 月销量（商品）是重复的，因为每一个商品，类别是确定的，大类也是确定的
group = matrix.groupby(['date_block_num', 'item_id', 'cat_type_code']).agg({'item_cnt_month': ['mean']})
group.columns = ['date_item_type_avg_item_cnt']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on=['date_block_num', 'item_id', 'cat_type_code'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_item_type_avg_item_cnt')
matrix.drop('date_item_type_avg_item_cnt', axis=1, inplace=True)

# 月销量（商店城市）
group = matrix.groupby(['date_block_num','shop_city_code']).agg({'item_cnt_month': 'mean'})
group.columns = ['date_city_avg_item_cnt']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on=['date_block_num','shop_city_code'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_city_avg_item_cnt')
matrix.drop('date_city_avg_item_cnt', axis=1, inplace=True)

# 月销量（商品-商店城市）
group = matrix.groupby(['date_block_num', 'item_id', 'shop_city_code']).agg({'item_cnt_month': ['mean']})
group.columns = ['date_item_city_avg_item_cnt']
group = group.reset_index()
matrix=pd.merge(left=matrix, right=group, on=['date_block_num', 'item_id', 'shop_city_code'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_item_city_avg_item_cnt')
matrix.drop('date_item_city_avg_item_cnt', axis=1, inplace=True)

# 趋势特征
group = sales_train.groupby('item_id').agg({'item_price': 'mean'})
group.columns = ['item_avg_item_price']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on='item_id', how='left')

group = sales_train.groupby(['date_block_num','item_id']).agg({'item_price': 'mean'})
group.columns = ['date_item_avg_item_price']
group = group.reset_index()
matrix=pd.merge(left=matrix, right=group, on=['date_block_num','item_id'], how='left')

matrix['item_avg_item_price'] = matrix['item_avg_item_price'].astype(np.float16)
matrix['date_item_avg_item_price'] = matrix['date_item_avg_item_price'].astype(np.float16)

# 计算matrix中商品的历史价格
lags = [1,2,3,4,5,6,12]
matrix = lag_features(matrix, lags, 'date_item_avg_item_price')
for i in lags:
    matrix['delta_price_lag_'+str(i)]=(matrix['date_item_avg_item_price_lag_' + str(i)] - matrix['item_avg_item_price']) / matrix['item_avg_item_price']

def select_trend(row):
    for i in lags:
        if pd.notnull(row['delta_price_lag_'+str(i)]):  # 如果不是NaN
            return row['delta_price_lag_'+str(i)]
    return 0

matrix['delta_price_lag']=matrix.apply(select_trend, axis=1)
matrix['delta_price_lag'] = matrix['delta_price_lag'].astype(np.float16)

features_to_drop = ['item_avg_item_price','date_item_avg_item_price']
for i in lags:
    features_to_drop += ['date_item_avg_item_price_lag_'+str(i)]
    features_to_drop += ['delta_price_lag_'+str(i)]
matrix.drop(features_to_drop, axis=1, inplace=True)

# 每个月的天数
matrix['month'] = matrix['date_block_num'] % 12
days = pd.Series([31,28,31,30,31,30,31,31,30,31,30,31])
matrix['days'] = matrix['month'].map(days)
matrix['days'] = matrix['days'].astype(np.int8)

# 开始销量
matrix['item_shop_first_sale'] = matrix['date_block_num'] - matrix.groupby(['item_id','shop_id'])['date_block_num'].transform('min')
matrix['item_first_sale'] = matrix['date_block_num'] - matrix.groupby('item_id')['date_block_num'].transform('min')

ic| name: '!Якутск Орджоникидзе, 56 фран'
ic| name: '!Якутск ТЦ "Центральный" фран'
ic| name: 'Адыгея ТЦ "Мега"'
ic| name: 'Балашиха ТРК "Октябрь-Киномир"'
ic| name: 'Волжский ТЦ "Волга Молл"'
ic| name: 'Вологда ТРЦ "Мармелад"'
ic| name: 'Воронеж (Плехановская, 13)'
ic| name: 'Воронеж ТРЦ "Максимир"'
ic| name: 'Воронеж ТРЦ Сити-Парк "Град"'
ic| name: 'Выездная Торговля'
ic| name: 'Жуковский ул. Чкалова 39м?'
ic| name: 'Жуковский ул. Чкалова 39м²'
ic| name: 'Интернет-магазин ЧС'
ic| name: 'Казань ТЦ "Бехетле"'
ic| name: 'Казань ТЦ "ПаркХаус" II'
ic| name: 'Калуга ТРЦ "XXI век"'
ic| name: 'Коломна ТЦ "Рио"'
ic| name: 'Красноярск ТЦ "Взлетка Плаза"'
ic| name: 'Красноярск ТЦ "Июнь"'
ic| name: 'Курск ТЦ "Пушкинский"'
ic| name: 'Москва "Распродажа"'
ic| name: 'Москва МТРЦ "Афи Молл"'
ic| name: 'Москва Магазин С21'
ic| name: 'Москва ТК "Буденовский" (пав.А2)'
ic| name: 'Москва ТК "Буденовский" (пав.К7)'
ic| name: 'Москва ТРК "Атриум"'
ic| name: 'Москва ТЦ "Ареал" (Беляево)'
ic| name: 'Москва 

In [95]:
# 获取最近6个月销售量为0的数据
six_zero = sales_by_item_id[(sales_by_item_id['28'] == 0) & (sales_by_item_id['29'] == 0) & (sales_by_item_id['30'] == 0) & (sales_by_item_id['31'] == 0) & (sales_by_item_id['32'] == 0) & (sales_by_item_id['33'] == 0)]
six_zero_item_id = list(six_zero['item_id'].values)   # item_id列表

In [177]:
matrix.shape

(11128050, 59)

In [6]:
# matrix_11128050_59 = matrix.copy()

In [175]:
matrix = matrix_11128050_59.copy()

In [176]:
matrix.shape

(11128050, 59)

In [178]:
matrix = lag_features(matrix, [1,2,3,6,12], 'item_cnt_month')

In [179]:
matrix.shape

(11128050, 64)

In [180]:
matrix.columns

Index(['date_block_num', 'shop_id', 'item_id', 'item_cnt_month',
       'item_category_id', 'cat_type_code', 'cat_subtype_code',
       'shop_city_code', 'shop_type_code', 'date_avg_item_cnt_lag_1',
       'date_avg_item_cnt_lag_2', 'date_avg_item_cnt_lag_3',
       'date_avg_item_cnt_lag_6', 'date_avg_item_cnt_lag_12',
       'date_item_avg_item_cnt_lag_1', 'date_item_avg_item_cnt_lag_2',
       'date_item_avg_item_cnt_lag_3', 'date_item_avg_item_cnt_lag_6',
       'date_item_avg_item_cnt_lag_12', 'date_shop_avg_item_cnt_lag_1',
       'date_shop_avg_item_cnt_lag_2', 'date_shop_avg_item_cnt_lag_3',
       'date_shop_avg_item_cnt_lag_6', 'date_shop_avg_item_cnt_lag_12',
       'date_cat_avg_item_cnt_lag_1', 'date_cat_avg_item_cnt_lag_2',
       'date_cat_avg_item_cnt_lag_3', 'date_cat_avg_item_cnt_lag_6',
       'date_cat_avg_item_cnt_lag_12', 'date_cat_shop_avg_item_cnt_lag_1',
       'date_cat_shop_avg_item_cnt_lag_2', 'date_cat_shop_avg_item_cnt_lag_3',
       'date_cat_shop_avg_ite

In [181]:
matrix.drop(['date_item_type_avg_item_cnt_lag_1',
       'date_item_type_avg_item_cnt_lag_2',
       'date_item_type_avg_item_cnt_lag_3',
       'date_item_type_avg_item_cnt_lag_6',
       'date_item_type_avg_item_cnt_lag_12'], axis=1, inplace=True)

In [182]:
matrix.shape

(11128050, 59)

In [183]:
# 月销量（商店类型）
group = matrix.groupby(['date_block_num','shop_type_code']).agg({'item_cnt_month': 'mean'})
group.columns = ['date_shoptype_avg_item_cnt']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on=['date_block_num','shop_type_code'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_shoptype_avg_item_cnt')
matrix.drop('date_shoptype_avg_item_cnt', axis=1, inplace=True)

# 月销量（商品-商店类型）
group = matrix.groupby(['date_block_num', 'item_id', 'shop_type_code']).agg({'item_cnt_month': ['mean']})
group.columns = ['date_item_shoptype_avg_item_cnt']
group = group.reset_index()
matrix=pd.merge(left=matrix, right=group, on=['date_block_num', 'item_id', 'shop_type_code'], how='left')
matrix = lag_features(matrix, [1,2,3,6,12], 'date_item_shoptype_avg_item_cnt')
matrix.drop('date_item_shoptype_avg_item_cnt', axis=1, inplace=True)

# # 月销量（商店-商品）
# group = matrix.groupby(['date_block_num', 'shop_id', 'item_id']).agg({'item_cnt_month': ['mean']})
# group.columns = [ 'date_shopitem_avg_item_cnt' ]
# group.reset_index(inplace=True)
# matrix = pd.merge(left=matrix, right=group, on=['date_block_num', 'shop_id', 'item_id'], how='left')
# matrix = lag_features(matrix, [1,2,3,6,12], 'date_shopitem_avg_item_cnt')
# matrix.drop('date_shopitem_avg_item_cnt', axis=1, inplace=True)

In [184]:
matrix.shape

(11128050, 69)

In [31]:
matrix.loc[matrix.date_block_num==34, 'item_cnt_month'] = np.nan

In [34]:
matrix[matrix.date_block_num == 34]

Unnamed: 0,date_block_num,shop_id,item_id,item_cnt_month,item_category_id,cat_type_code,cat_subtype_code,shop_city_code,shop_type_code,date_avg_item_cnt_lag_1,...,date_shoptype_avg_item_cnt_lag_1,date_shoptype_avg_item_cnt_lag_2,date_shoptype_avg_item_cnt_lag_3,date_shoptype_avg_item_cnt_lag_6,date_shoptype_avg_item_cnt_lag_12,date_item_shoptype_avg_item_cnt_lag_1,date_item_shoptype_avg_item_cnt_lag_2,date_item_shoptype_avg_item_cnt_lag_3,date_item_shoptype_avg_item_cnt_lag_6,date_item_shoptype_avg_item_cnt_lag_12
10913850,34,5,5037,,19,5,10,4,4,0.258545,...,0.220703,0.230591,0.275146,0.244873,0.30835,0.620605,2.666016,3.310547,2.162109,1.441406
10913851,34,5,5320,,55,13,2,4,4,,...,,,,,,,,,,
10913852,34,5,5233,,19,5,10,4,4,0.258545,...,0.220703,0.230591,0.275146,0.244873,,1.172852,1.900391,3.828125,1.806641,
10913853,34,5,5232,,23,5,16,4,4,0.258545,...,0.220703,0.230591,0.275146,,,0.862305,1.200195,1.655273,,
10913854,34,5,5268,,20,5,11,4,4,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11128045,34,45,18454,,55,13,2,21,4,0.258545,...,0.220703,0.230591,0.275146,0.244873,,0.068970,0.033325,0.034485,0.548340,
11128046,34,45,16188,,64,14,42,21,4,0.258545,...,0.220703,0.230591,,,,0.000000,0.099976,,,
11128047,34,45,15757,,55,13,2,21,4,0.258545,...,0.220703,0.230591,0.275146,0.244873,0.30835,0.137939,0.066650,0.137939,0.129028,0.205933
11128048,34,45,19648,,40,11,4,21,4,0.258545,...,0.220703,0.230591,0.275146,0.244873,,0.034485,0.033325,0.172363,0.096802,


In [35]:
matrix.shape

(11128050, 69)

In [36]:
# **********趋势特征 delta_cnt_month_lag ********************
group = matrix.groupby('item_id').agg({'item_cnt_month': 'mean'})
group.columns = ['trend_item_avg_cnt_month']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on='item_id', how='left')

group = matrix.groupby(['date_block_num','item_id']).agg({'item_cnt_month': 'mean'})
group.columns = ['trend_date_item_avg_cnt_month']
group = group.reset_index()
matrix=pd.merge(left=matrix, right=group, on=['date_block_num','item_id'], how='left')

# 计算matrix中商品的历史价格
lags = [1,2,3,4,5,6,12]
matrix = lag_features(matrix, lags, 'trend_date_item_avg_cnt_month')
for i in lags:
    matrix['delta_cnt_month_lag_'+str(i)] = (matrix['trend_date_item_avg_cnt_month_lag_' + str(i)] - matrix['trend_item_avg_cnt_month']) / matrix['trend_item_avg_cnt_month']

def select_trend2(row):
    for i in lags:
        if pd.notnull(row['delta_cnt_month_lag_'+str(i)]):  # 如果不是NaN
            return row['delta_cnt_month_lag_'+str(i)]
    return 0   #  如果delta_price_lag_都为空，那么将趋势设为0，0代表没有趋势

matrix['delta_cnt_month_lag'] = matrix.apply(select_trend2, axis=1)
matrix['delta_cnt_month_lag'] = matrix['delta_cnt_month_lag'].astype(np.float16)

features_to_drop = ['trend_item_avg_cnt_month','trend_date_item_avg_cnt_month']
for i in lags:
    features_to_drop += ['trend_date_item_avg_cnt_month_lag_'+str(i)]
    features_to_drop += ['delta_cnt_month_lag_'+str(i)]
matrix.drop(features_to_drop, axis=1, inplace=True)

In [37]:
matrix.shape

(11128050, 70)

In [38]:
# **********趋势特征 delta2_cnt_month_lag ********************
group = matrix.groupby(['shop_id', 'item_id']).agg({'item_cnt_month': 'mean'})
group.columns = ['qushi_shop_item_avg_cnt_month']
group = group.reset_index()
matrix = pd.merge(left=matrix, right=group, on=['shop_id', 'item_id'], how='left')

group = matrix.groupby(['date_block_num', 'shop_id', 'item_id']).agg({'item_cnt_month': 'mean'})
group.columns = ['qushi_date_shop_item_avg_cnt_month']
group = group.reset_index()
matrix=pd.merge(left=matrix, right=group, on=['date_block_num', 'shop_id', 'item_id'], how='left')

# 计算matrix中商品的历史价格
lags = [1,2,3,4,5,6,12]
matrix = lag_features(matrix, lags, 'qushi_date_shop_item_avg_cnt_month')
for i in lags:
    matrix['delta2_cnt_month_lag_'+str(i)] = (matrix['qushi_date_shop_item_avg_cnt_month_lag_' + str(i)] - matrix['qushi_shop_item_avg_cnt_month']) / matrix['qushi_shop_item_avg_cnt_month']

def select_trend3(row):
    for i in lags:
        if pd.notnull(row['delta2_cnt_month_lag_'+str(i)]):  # 如果不是NaN
            return row['delta2_cnt_month_lag_'+str(i)]
    return 0   #  如果delta_price_lag_都为空，那么将趋势设为0，0代表没有趋势

matrix['delta2_cnt_month_lag'] = matrix.apply(select_trend3, axis=1)
matrix['delta2_cnt_month_lag'] = matrix['delta2_cnt_month_lag'].astype(np.float16)

features_to_drop = ['qushi_shop_item_avg_cnt_month','qushi_date_shop_item_avg_cnt_month']
for i in lags:
    features_to_drop += ['delta2_cnt_month_lag_'+str(i)]
matrix.drop(features_to_drop, axis=1, inplace=True)

In [41]:
matrix.shape

(11128050, 78)

In [40]:
# matrix_11128050_78_bak = matrix.copy()

In [157]:
matrix.shape

(11128050, 64)

In [159]:
dict(matrix.count())

{'date_block_num': 11128050,
 'shop_id': 11128050,
 'item_id': 11128050,
 'item_cnt_month': 11128050,
 'item_category_id': 11128050,
 'cat_type_code': 11128050,
 'cat_subtype_code': 11128050,
 'shop_city_code': 11128050,
 'shop_type_code': 11128050,
 'date_avg_item_cnt_lag_1': 8605461,
 'date_avg_item_cnt_lag_2': 7953134,
 'date_avg_item_cnt_lag_3': 7344764,
 'date_avg_item_cnt_lag_6': 5711598,
 'date_avg_item_cnt_lag_12': 3120910,
 'date_item_avg_item_cnt_lag_1': 8605461,
 'date_item_avg_item_cnt_lag_2': 7953134,
 'date_item_avg_item_cnt_lag_3': 7344764,
 'date_item_avg_item_cnt_lag_6': 5711598,
 'date_item_avg_item_cnt_lag_12': 3120910,
 'date_shop_avg_item_cnt_lag_1': 8605461,
 'date_shop_avg_item_cnt_lag_2': 7953134,
 'date_shop_avg_item_cnt_lag_3': 7344764,
 'date_shop_avg_item_cnt_lag_6': 5711598,
 'date_shop_avg_item_cnt_lag_12': 3120910,
 'date_cat_avg_item_cnt_lag_1': 8605461,
 'date_cat_avg_item_cnt_lag_2': 7953134,
 'date_cat_avg_item_cnt_lag_3': 7344764,
 'date_cat_avg_item

In [166]:
# 新--删除前12个月数据
ts = time.time()

# 因为有12个月的延迟特征（1，2，3，6，12）（1，2，3，4，5，6，12），所以需要删除前12月的数据
matrix = matrix[matrix['date_block_num'] > 11]

def fill_na(df):
    for col in df.columns:
        if ('_lag_' in col) & (df[col].isnull().any()):
            if ('item_cnt' in col):
                df[col].fillna(0, inplace=True)         
    return df

matrix = fill_na(matrix)
time.time() - ts

3.3927719593048096

In [42]:
# 因为有12个月的延迟特征（1，2，3，6，12）（1，2，3，4，5，6，12），所以需要删除前12月的数据
matrix = matrix[matrix['date_block_num'] > 11]

# 找到有NaN值的列，然后把那些列中的NaN值填充0
columns = matrix.columns
column_null = []
for i in columns:
    if len(matrix[matrix[i].isnull()]) > 0:
        column_null.append(i)

for i in column_null:
    matrix[i].fillna(0, inplace=True)

In [167]:
matrix.shape

(6639294, 64)

In [168]:
dict(matrix.count())

{'date_block_num': 6639294,
 'shop_id': 6639294,
 'item_id': 6639294,
 'item_cnt_month': 6639294,
 'item_category_id': 6639294,
 'cat_type_code': 6639294,
 'cat_subtype_code': 6639294,
 'shop_city_code': 6639294,
 'shop_type_code': 6639294,
 'date_avg_item_cnt_lag_1': 6639294,
 'date_avg_item_cnt_lag_2': 6639294,
 'date_avg_item_cnt_lag_3': 6639294,
 'date_avg_item_cnt_lag_6': 6639294,
 'date_avg_item_cnt_lag_12': 6639294,
 'date_item_avg_item_cnt_lag_1': 6639294,
 'date_item_avg_item_cnt_lag_2': 6639294,
 'date_item_avg_item_cnt_lag_3': 6639294,
 'date_item_avg_item_cnt_lag_6': 6639294,
 'date_item_avg_item_cnt_lag_12': 6639294,
 'date_shop_avg_item_cnt_lag_1': 6639294,
 'date_shop_avg_item_cnt_lag_2': 6639294,
 'date_shop_avg_item_cnt_lag_3': 6639294,
 'date_shop_avg_item_cnt_lag_6': 6639294,
 'date_shop_avg_item_cnt_lag_12': 6639294,
 'date_cat_avg_item_cnt_lag_1': 6639294,
 'date_cat_avg_item_cnt_lag_2': 6639294,
 'date_cat_avg_item_cnt_lag_3': 6639294,
 'date_cat_avg_item_cnt_lag_

# 分析

# 训练

In [49]:
matrix.shape

(6639294, 78)

In [50]:
"""建模"""
trainData = matrix[matrix['date_block_num'] < 33]
label_train = trainData['item_cnt_month']
X_train = trainData.drop('item_cnt_month', axis=1)

validData = matrix[matrix['date_block_num'] == 33]
label_valid = validData['item_cnt_month']
X_valid = validData.drop('item_cnt_month', axis=1)333

In [53]:
import lightgbm as lgb
train_data = lgb.Dataset(data=X_train, label=label_train)
valid_data = lgb.Dataset(data=X_valid, label=label_valid)
params = {
    'objective': 'regression',  # 回归
    'metric': 'rmse',   # 回归问题选择rmse
    'n_estimators': 2000,
    'max_depth': 8,
    'num_leaves': 200,   # 每个弱学习器拥有的叶子的数量
    'learning_rate': 0.01,
    'bagging_fraction': 0.9,    # 每次训练“弱学习器”用的数据比例（应该也是随机的），用于加快训练速度和减小过拟合
    'feature_fraction': 0.3,   # 每次迭代过程中，随机选择30%的特征建树（弱学习器）
    'bagging_seed': 0,
    'early_stop_rounds': 50
}
lgb_model = lgb.train(params, train_data, valid_sets=[train_data, valid_data])

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13137
[LightGBM] [Info] Number of data points in the train set: 6186922, number of used features: 77
[LightGBM] [Info] Start training from score 0.288852
[1]	training's rmse: 1.18307	valid_1's rmse: 1.13238
[2]	training's rmse: 1.17795	valid_1's rmse: 1.12848
[3]	training's rmse: 1.17329	valid_1's rmse: 1.12514
[4]	training's rmse: 1.16853	valid_1's rmse: 1.12178
[5]	training's rmse: 1.16373	valid_1's rmse: 1.11823
[6]	training's rmse: 1.15924	valid_1's rmse: 1.11468
[7]	training's rmse: 1.15455	valid_1's rmse: 1.11115
[8]	training's rmse: 1.14977	valid_1's rmse: 1.10751
[9]	training's rmse: 1.1453	valid_1's rmse: 1.10436
[10]	training's rmse: 1.14172	valid_1's rmse: 1.10178
[11]	training's rmse: 1.13761	valid_1's rmse: 1.09884
[12]	training's rmse: 1.1336	valid_1's rmse: 1.09599
[13]	training's rmse: 1.12894	valid_1's rmse: 1.09198
[14]

[139]	training's rmse: 0.877786	valid_1's rmse: 0.927022
[140]	training's rmse: 0.876935	valid_1's rmse: 0.926727
[141]	training's rmse: 0.876215	valid_1's rmse: 0.926457
[142]	training's rmse: 0.875169	valid_1's rmse: 0.925975
[143]	training's rmse: 0.874518	valid_1's rmse: 0.925705
[144]	training's rmse: 0.873733	valid_1's rmse: 0.925395
[145]	training's rmse: 0.872748	valid_1's rmse: 0.924811
[146]	training's rmse: 0.871972	valid_1's rmse: 0.924463
[147]	training's rmse: 0.871261	valid_1's rmse: 0.924296
[148]	training's rmse: 0.870663	valid_1's rmse: 0.923983
[149]	training's rmse: 0.869986	valid_1's rmse: 0.923651
[150]	training's rmse: 0.869013	valid_1's rmse: 0.923162
[151]	training's rmse: 0.868262	valid_1's rmse: 0.92277
[152]	training's rmse: 0.867469	valid_1's rmse: 0.922539
[153]	training's rmse: 0.866975	valid_1's rmse: 0.922292
[154]	training's rmse: 0.866276	valid_1's rmse: 0.922054
[155]	training's rmse: 0.865459	valid_1's rmse: 0.921614
[156]	training's rmse: 0.86484	v

[281]	training's rmse: 0.810584	valid_1's rmse: 0.906901
[282]	training's rmse: 0.810298	valid_1's rmse: 0.906981
[283]	training's rmse: 0.809831	valid_1's rmse: 0.906758
[284]	training's rmse: 0.809662	valid_1's rmse: 0.906806
[285]	training's rmse: 0.809469	valid_1's rmse: 0.906959
[286]	training's rmse: 0.809231	valid_1's rmse: 0.907086
[287]	training's rmse: 0.808949	valid_1's rmse: 0.906933
[288]	training's rmse: 0.808767	valid_1's rmse: 0.906945
[289]	training's rmse: 0.80837	valid_1's rmse: 0.90691
[290]	training's rmse: 0.808121	valid_1's rmse: 0.906832
[291]	training's rmse: 0.807961	valid_1's rmse: 0.90686
[292]	training's rmse: 0.807698	valid_1's rmse: 0.906826
[293]	training's rmse: 0.80755	valid_1's rmse: 0.906919
[294]	training's rmse: 0.807318	valid_1's rmse: 0.906799
[295]	training's rmse: 0.807139	valid_1's rmse: 0.906952
[296]	training's rmse: 0.806899	valid_1's rmse: 0.906954
[297]	training's rmse: 0.80666	valid_1's rmse: 0.90691
[298]	training's rmse: 0.80651	valid_

[404]	training's rmse: 0.789076	valid_1's rmse: 0.908481
[405]	training's rmse: 0.788985	valid_1's rmse: 0.908432
[406]	training's rmse: 0.788836	valid_1's rmse: 0.908456
[407]	training's rmse: 0.788727	valid_1's rmse: 0.908567
[408]	training's rmse: 0.788522	valid_1's rmse: 0.908553
[409]	training's rmse: 0.788447	valid_1's rmse: 0.908543
[410]	training's rmse: 0.788279	valid_1's rmse: 0.908606
[411]	training's rmse: 0.788175	valid_1's rmse: 0.908688
[412]	training's rmse: 0.78806	valid_1's rmse: 0.908705
[413]	training's rmse: 0.787949	valid_1's rmse: 0.908749
[414]	training's rmse: 0.787886	valid_1's rmse: 0.908751
[415]	training's rmse: 0.787761	valid_1's rmse: 0.908734
[416]	training's rmse: 0.787636	valid_1's rmse: 0.908737
[417]	training's rmse: 0.787526	valid_1's rmse: 0.908754
[418]	training's rmse: 0.787386	valid_1's rmse: 0.908836
[419]	training's rmse: 0.787254	valid_1's rmse: 0.908768
[420]	training's rmse: 0.787141	valid_1's rmse: 0.908785
[421]	training's rmse: 0.78704	v

[508]	training's rmse: 0.777686	valid_1's rmse: 0.9085
[509]	training's rmse: 0.777625	valid_1's rmse: 0.908454
[510]	training's rmse: 0.777526	valid_1's rmse: 0.908405
[511]	training's rmse: 0.777425	valid_1's rmse: 0.908447
[512]	training's rmse: 0.777333	valid_1's rmse: 0.908496
[513]	training's rmse: 0.777266	valid_1's rmse: 0.908494
[514]	training's rmse: 0.777155	valid_1's rmse: 0.908583
[515]	training's rmse: 0.777044	valid_1's rmse: 0.908616
[516]	training's rmse: 0.776989	valid_1's rmse: 0.908639
[517]	training's rmse: 0.776894	valid_1's rmse: 0.908688
[518]	training's rmse: 0.776815	valid_1's rmse: 0.908791
[519]	training's rmse: 0.776758	valid_1's rmse: 0.908772
[520]	training's rmse: 0.77666	valid_1's rmse: 0.908725
[521]	training's rmse: 0.77657	valid_1's rmse: 0.908779
[522]	training's rmse: 0.776478	valid_1's rmse: 0.908797
[523]	training's rmse: 0.776363	valid_1's rmse: 0.908811
[524]	training's rmse: 0.776272	valid_1's rmse: 0.908666
[525]	training's rmse: 0.776202	val

[613]	training's rmse: 0.769144	valid_1's rmse: 0.907018
[614]	training's rmse: 0.769036	valid_1's rmse: 0.907014
[615]	training's rmse: 0.768916	valid_1's rmse: 0.906911
[616]	training's rmse: 0.768844	valid_1's rmse: 0.906923
[617]	training's rmse: 0.768447	valid_1's rmse: 0.906798
[618]	training's rmse: 0.768356	valid_1's rmse: 0.906874
[619]	training's rmse: 0.768297	valid_1's rmse: 0.90692
[620]	training's rmse: 0.768198	valid_1's rmse: 0.906939
[621]	training's rmse: 0.76815	valid_1's rmse: 0.906965
[622]	training's rmse: 0.768102	valid_1's rmse: 0.906973
[623]	training's rmse: 0.767996	valid_1's rmse: 0.906957
[624]	training's rmse: 0.76755	valid_1's rmse: 0.907087
[625]	training's rmse: 0.767482	valid_1's rmse: 0.907079
[626]	training's rmse: 0.767369	valid_1's rmse: 0.907053
[627]	training's rmse: 0.767291	valid_1's rmse: 0.906935
[628]	training's rmse: 0.767241	valid_1's rmse: 0.906876
[629]	training's rmse: 0.767199	valid_1's rmse: 0.906879
[630]	training's rmse: 0.767126	va

[705]	training's rmse: 0.761893	valid_1's rmse: 0.905781
[706]	training's rmse: 0.761805	valid_1's rmse: 0.905637
[707]	training's rmse: 0.761769	valid_1's rmse: 0.905645
[708]	training's rmse: 0.761715	valid_1's rmse: 0.905756
[709]	training's rmse: 0.761634	valid_1's rmse: 0.905811
[710]	training's rmse: 0.761578	valid_1's rmse: 0.905858
[711]	training's rmse: 0.761508	valid_1's rmse: 0.905869
[712]	training's rmse: 0.76146	valid_1's rmse: 0.905893
[713]	training's rmse: 0.761435	valid_1's rmse: 0.905891
[714]	training's rmse: 0.761403	valid_1's rmse: 0.905912
[715]	training's rmse: 0.76131	valid_1's rmse: 0.905948
[716]	training's rmse: 0.761252	valid_1's rmse: 0.905927
[717]	training's rmse: 0.761221	valid_1's rmse: 0.905944
[718]	training's rmse: 0.761171	valid_1's rmse: 0.905932
[719]	training's rmse: 0.761096	valid_1's rmse: 0.905987
[720]	training's rmse: 0.761023	valid_1's rmse: 0.905948
[721]	training's rmse: 0.760956	valid_1's rmse: 0.905882
[722]	training's rmse: 0.760926	v

[793]	training's rmse: 0.756362	valid_1's rmse: 0.904879
[794]	training's rmse: 0.756334	valid_1's rmse: 0.904811
[795]	training's rmse: 0.756296	valid_1's rmse: 0.904845
[796]	training's rmse: 0.756268	valid_1's rmse: 0.904839
[797]	training's rmse: 0.756224	valid_1's rmse: 0.904821
[798]	training's rmse: 0.756152	valid_1's rmse: 0.904794
[799]	training's rmse: 0.756123	valid_1's rmse: 0.904776
[800]	training's rmse: 0.756095	valid_1's rmse: 0.904804
[801]	training's rmse: 0.756029	valid_1's rmse: 0.904812
[802]	training's rmse: 0.75596	valid_1's rmse: 0.904846
[803]	training's rmse: 0.755898	valid_1's rmse: 0.904824
[804]	training's rmse: 0.755848	valid_1's rmse: 0.904786
[805]	training's rmse: 0.755813	valid_1's rmse: 0.904769
[806]	training's rmse: 0.755781	valid_1's rmse: 0.90476
[807]	training's rmse: 0.755725	valid_1's rmse: 0.904738
[808]	training's rmse: 0.755633	valid_1's rmse: 0.904732
[809]	training's rmse: 0.75559	valid_1's rmse: 0.904724
[810]	training's rmse: 0.755552	va

[885]	training's rmse: 0.751896	valid_1's rmse: 0.903527
[886]	training's rmse: 0.75187	valid_1's rmse: 0.903521
[887]	training's rmse: 0.751837	valid_1's rmse: 0.903516
[888]	training's rmse: 0.751809	valid_1's rmse: 0.903436
[889]	training's rmse: 0.751745	valid_1's rmse: 0.903461
[890]	training's rmse: 0.75171	valid_1's rmse: 0.903459
[891]	training's rmse: 0.751677	valid_1's rmse: 0.903379
[892]	training's rmse: 0.751634	valid_1's rmse: 0.903403
[893]	training's rmse: 0.751584	valid_1's rmse: 0.903443
[894]	training's rmse: 0.751558	valid_1's rmse: 0.90345
[895]	training's rmse: 0.75151	valid_1's rmse: 0.903437
[896]	training's rmse: 0.751445	valid_1's rmse: 0.903491
[897]	training's rmse: 0.75138	valid_1's rmse: 0.903355
[898]	training's rmse: 0.751318	valid_1's rmse: 0.903363
[899]	training's rmse: 0.751271	valid_1's rmse: 0.903364
[900]	training's rmse: 0.751235	valid_1's rmse: 0.903403
[901]	training's rmse: 0.751191	valid_1's rmse: 0.903418
[902]	training's rmse: 0.751144	vali

[963]	training's rmse: 0.748936	valid_1's rmse: 0.903109
[964]	training's rmse: 0.748883	valid_1's rmse: 0.903065
[965]	training's rmse: 0.74884	valid_1's rmse: 0.902994
[966]	training's rmse: 0.748783	valid_1's rmse: 0.902967
[967]	training's rmse: 0.748733	valid_1's rmse: 0.902981
[968]	training's rmse: 0.748674	valid_1's rmse: 0.90295
[969]	training's rmse: 0.748652	valid_1's rmse: 0.902953
[970]	training's rmse: 0.748624	valid_1's rmse: 0.902896
[971]	training's rmse: 0.748572	valid_1's rmse: 0.902868
[972]	training's rmse: 0.748541	valid_1's rmse: 0.90285
[973]	training's rmse: 0.748501	valid_1's rmse: 0.902864
[974]	training's rmse: 0.748478	valid_1's rmse: 0.90287
[975]	training's rmse: 0.74843	valid_1's rmse: 0.902776
[976]	training's rmse: 0.748401	valid_1's rmse: 0.902786
[977]	training's rmse: 0.748365	valid_1's rmse: 0.902808
[978]	training's rmse: 0.748306	valid_1's rmse: 0.902803
[979]	training's rmse: 0.748241	valid_1's rmse: 0.902785
[980]	training's rmse: 0.748201	vali

[1044]	training's rmse: 0.745854	valid_1's rmse: 0.901823
[1045]	training's rmse: 0.745831	valid_1's rmse: 0.901815
[1046]	training's rmse: 0.745806	valid_1's rmse: 0.90182
[1047]	training's rmse: 0.745773	valid_1's rmse: 0.901787
[1048]	training's rmse: 0.74575	valid_1's rmse: 0.90169
[1049]	training's rmse: 0.745704	valid_1's rmse: 0.901622
[1050]	training's rmse: 0.745657	valid_1's rmse: 0.901566
[1051]	training's rmse: 0.745626	valid_1's rmse: 0.901566
[1052]	training's rmse: 0.745604	valid_1's rmse: 0.901574
[1053]	training's rmse: 0.745581	valid_1's rmse: 0.901575
[1054]	training's rmse: 0.745556	valid_1's rmse: 0.901582
[1055]	training's rmse: 0.745521	valid_1's rmse: 0.901551
[1056]	training's rmse: 0.745467	valid_1's rmse: 0.901626
[1057]	training's rmse: 0.745428	valid_1's rmse: 0.901554
[1058]	training's rmse: 0.74537	valid_1's rmse: 0.901512
[1059]	training's rmse: 0.745326	valid_1's rmse: 0.901535
[1060]	training's rmse: 0.745267	valid_1's rmse: 0.901434
[1061]	training's 

[1128]	training's rmse: 0.742918	valid_1's rmse: 0.900954
[1129]	training's rmse: 0.742883	valid_1's rmse: 0.900969
[1130]	training's rmse: 0.742845	valid_1's rmse: 0.900968
[1131]	training's rmse: 0.742796	valid_1's rmse: 0.900965
[1132]	training's rmse: 0.742766	valid_1's rmse: 0.900852
[1133]	training's rmse: 0.742709	valid_1's rmse: 0.900822
[1134]	training's rmse: 0.742683	valid_1's rmse: 0.900799
[1135]	training's rmse: 0.742636	valid_1's rmse: 0.900843
[1136]	training's rmse: 0.74261	valid_1's rmse: 0.900705
[1137]	training's rmse: 0.742561	valid_1's rmse: 0.900716
[1138]	training's rmse: 0.742513	valid_1's rmse: 0.900708
[1139]	training's rmse: 0.742458	valid_1's rmse: 0.900736
[1140]	training's rmse: 0.742431	valid_1's rmse: 0.90072
[1141]	training's rmse: 0.742387	valid_1's rmse: 0.900776
[1142]	training's rmse: 0.742334	valid_1's rmse: 0.900807
[1143]	training's rmse: 0.742302	valid_1's rmse: 0.900834
[1144]	training's rmse: 0.742262	valid_1's rmse: 0.900829
[1145]	training'

[1208]	training's rmse: 0.74029	valid_1's rmse: 0.900366
[1209]	training's rmse: 0.740248	valid_1's rmse: 0.900362
[1210]	training's rmse: 0.740218	valid_1's rmse: 0.900344
[1211]	training's rmse: 0.740199	valid_1's rmse: 0.90035
[1212]	training's rmse: 0.740174	valid_1's rmse: 0.900345
[1213]	training's rmse: 0.740151	valid_1's rmse: 0.900348
[1214]	training's rmse: 0.740121	valid_1's rmse: 0.900353
[1215]	training's rmse: 0.740088	valid_1's rmse: 0.90035
[1216]	training's rmse: 0.740041	valid_1's rmse: 0.900317
[1217]	training's rmse: 0.740016	valid_1's rmse: 0.900319
[1218]	training's rmse: 0.739991	valid_1's rmse: 0.900305
[1219]	training's rmse: 0.739971	valid_1's rmse: 0.900297
[1220]	training's rmse: 0.739956	valid_1's rmse: 0.900283
[1221]	training's rmse: 0.739939	valid_1's rmse: 0.900279
[1222]	training's rmse: 0.739918	valid_1's rmse: 0.900289
[1223]	training's rmse: 0.739902	valid_1's rmse: 0.900287
[1224]	training's rmse: 0.739857	valid_1's rmse: 0.90035
[1225]	training's 

[1287]	training's rmse: 0.73803	valid_1's rmse: 0.900195
[1288]	training's rmse: 0.737999	valid_1's rmse: 0.90016
[1289]	training's rmse: 0.737953	valid_1's rmse: 0.900215
[1290]	training's rmse: 0.737927	valid_1's rmse: 0.90016
[1291]	training's rmse: 0.737912	valid_1's rmse: 0.900156
[1292]	training's rmse: 0.737892	valid_1's rmse: 0.900147
[1293]	training's rmse: 0.73785	valid_1's rmse: 0.900162
[1294]	training's rmse: 0.737826	valid_1's rmse: 0.900166
[1295]	training's rmse: 0.737804	valid_1's rmse: 0.900181
[1296]	training's rmse: 0.737779	valid_1's rmse: 0.900199
[1297]	training's rmse: 0.737758	valid_1's rmse: 0.9002
[1298]	training's rmse: 0.737734	valid_1's rmse: 0.900231
[1299]	training's rmse: 0.737701	valid_1's rmse: 0.900131
[1300]	training's rmse: 0.737672	valid_1's rmse: 0.900148
[1301]	training's rmse: 0.737649	valid_1's rmse: 0.900092
[1302]	training's rmse: 0.737624	valid_1's rmse: 0.900025
[1303]	training's rmse: 0.7376	valid_1's rmse: 0.900027
[1304]	training's rmse

[1367]	training's rmse: 0.735833	valid_1's rmse: 0.899751
[1368]	training's rmse: 0.735806	valid_1's rmse: 0.899748
[1369]	training's rmse: 0.735778	valid_1's rmse: 0.899766
[1370]	training's rmse: 0.735761	valid_1's rmse: 0.899763
[1371]	training's rmse: 0.735698	valid_1's rmse: 0.899743
[1372]	training's rmse: 0.735677	valid_1's rmse: 0.899737
[1373]	training's rmse: 0.735648	valid_1's rmse: 0.899736
[1374]	training's rmse: 0.735639	valid_1's rmse: 0.899731
[1375]	training's rmse: 0.735615	valid_1's rmse: 0.899728
[1376]	training's rmse: 0.735587	valid_1's rmse: 0.899741
[1377]	training's rmse: 0.735567	valid_1's rmse: 0.899729
[1378]	training's rmse: 0.735541	valid_1's rmse: 0.899733
[1379]	training's rmse: 0.735512	valid_1's rmse: 0.899718
[1380]	training's rmse: 0.735486	valid_1's rmse: 0.899726
[1381]	training's rmse: 0.735468	valid_1's rmse: 0.899715
[1382]	training's rmse: 0.735421	valid_1's rmse: 0.89972
[1383]	training's rmse: 0.735395	valid_1's rmse: 0.899636
[1384]	training

[1446]	training's rmse: 0.733652	valid_1's rmse: 0.899326
[1447]	training's rmse: 0.733604	valid_1's rmse: 0.899264
[1448]	training's rmse: 0.733584	valid_1's rmse: 0.899241
[1449]	training's rmse: 0.733555	valid_1's rmse: 0.899158
[1450]	training's rmse: 0.733529	valid_1's rmse: 0.89918
[1451]	training's rmse: 0.733507	valid_1's rmse: 0.899212
[1452]	training's rmse: 0.733486	valid_1's rmse: 0.899212
[1453]	training's rmse: 0.733467	valid_1's rmse: 0.899208
[1454]	training's rmse: 0.733439	valid_1's rmse: 0.899222
[1455]	training's rmse: 0.733419	valid_1's rmse: 0.899205
[1456]	training's rmse: 0.733393	valid_1's rmse: 0.899204
[1457]	training's rmse: 0.733349	valid_1's rmse: 0.899211
[1458]	training's rmse: 0.733304	valid_1's rmse: 0.899167
[1459]	training's rmse: 0.733288	valid_1's rmse: 0.899174
[1460]	training's rmse: 0.733273	valid_1's rmse: 0.899178
[1461]	training's rmse: 0.733249	valid_1's rmse: 0.899184
[1462]	training's rmse: 0.733224	valid_1's rmse: 0.899175
[1463]	training

[1522]	training's rmse: 0.731691	valid_1's rmse: 0.898992
[1523]	training's rmse: 0.731674	valid_1's rmse: 0.898992
[1524]	training's rmse: 0.731655	valid_1's rmse: 0.899021
[1525]	training's rmse: 0.73164	valid_1's rmse: 0.899019
[1526]	training's rmse: 0.731623	valid_1's rmse: 0.899018
[1527]	training's rmse: 0.731606	valid_1's rmse: 0.899009
[1528]	training's rmse: 0.731586	valid_1's rmse: 0.899004
[1529]	training's rmse: 0.731564	valid_1's rmse: 0.899022
[1530]	training's rmse: 0.731541	valid_1's rmse: 0.899033
[1531]	training's rmse: 0.731503	valid_1's rmse: 0.899035
[1532]	training's rmse: 0.731487	valid_1's rmse: 0.899036
[1533]	training's rmse: 0.731473	valid_1's rmse: 0.899035
[1534]	training's rmse: 0.731457	valid_1's rmse: 0.899028
[1535]	training's rmse: 0.731442	valid_1's rmse: 0.899035
[1536]	training's rmse: 0.731416	valid_1's rmse: 0.898994
[1537]	training's rmse: 0.731358	valid_1's rmse: 0.899011
[1538]	training's rmse: 0.731333	valid_1's rmse: 0.898929
[1539]	training

[1599]	training's rmse: 0.729721	valid_1's rmse: 0.898457
[1600]	training's rmse: 0.729706	valid_1's rmse: 0.898454
[1601]	training's rmse: 0.729672	valid_1's rmse: 0.898455
[1602]	training's rmse: 0.729641	valid_1's rmse: 0.89843
[1603]	training's rmse: 0.729589	valid_1's rmse: 0.898452
[1604]	training's rmse: 0.729559	valid_1's rmse: 0.898391
[1605]	training's rmse: 0.729538	valid_1's rmse: 0.898404
[1606]	training's rmse: 0.729519	valid_1's rmse: 0.898395
[1607]	training's rmse: 0.729498	valid_1's rmse: 0.898409
[1608]	training's rmse: 0.729449	valid_1's rmse: 0.898394
[1609]	training's rmse: 0.729432	valid_1's rmse: 0.898354
[1610]	training's rmse: 0.729414	valid_1's rmse: 0.89836
[1611]	training's rmse: 0.729398	valid_1's rmse: 0.898366
[1612]	training's rmse: 0.729373	valid_1's rmse: 0.89834
[1613]	training's rmse: 0.729354	valid_1's rmse: 0.898312
[1614]	training's rmse: 0.72933	valid_1's rmse: 0.898329
[1615]	training's rmse: 0.729315	valid_1's rmse: 0.898325
[1616]	training's 

[1678]	training's rmse: 0.727916	valid_1's rmse: 0.897738
[1679]	training's rmse: 0.727894	valid_1's rmse: 0.897713
[1680]	training's rmse: 0.727875	valid_1's rmse: 0.897725
[1681]	training's rmse: 0.727852	valid_1's rmse: 0.897748
[1682]	training's rmse: 0.727827	valid_1's rmse: 0.897726
[1683]	training's rmse: 0.727814	valid_1's rmse: 0.897725
[1684]	training's rmse: 0.727794	valid_1's rmse: 0.897733
[1685]	training's rmse: 0.727764	valid_1's rmse: 0.897715
[1686]	training's rmse: 0.727742	valid_1's rmse: 0.897735
[1687]	training's rmse: 0.727726	valid_1's rmse: 0.89773
[1688]	training's rmse: 0.727705	valid_1's rmse: 0.897685
[1689]	training's rmse: 0.727678	valid_1's rmse: 0.897653
[1690]	training's rmse: 0.727664	valid_1's rmse: 0.897646
[1691]	training's rmse: 0.72763	valid_1's rmse: 0.897663
[1692]	training's rmse: 0.727597	valid_1's rmse: 0.897613
[1693]	training's rmse: 0.727581	valid_1's rmse: 0.897627
[1694]	training's rmse: 0.727567	valid_1's rmse: 0.897625
[1695]	training'

[1756]	training's rmse: 0.726218	valid_1's rmse: 0.897351
[1757]	training's rmse: 0.726187	valid_1's rmse: 0.897287
[1758]	training's rmse: 0.726172	valid_1's rmse: 0.897292
[1759]	training's rmse: 0.726152	valid_1's rmse: 0.897279
[1760]	training's rmse: 0.726119	valid_1's rmse: 0.897308
[1761]	training's rmse: 0.726104	valid_1's rmse: 0.897316
[1762]	training's rmse: 0.726073	valid_1's rmse: 0.897273
[1763]	training's rmse: 0.726033	valid_1's rmse: 0.897268
[1764]	training's rmse: 0.726017	valid_1's rmse: 0.897189
[1765]	training's rmse: 0.726001	valid_1's rmse: 0.897204
[1766]	training's rmse: 0.725979	valid_1's rmse: 0.897203
[1767]	training's rmse: 0.725945	valid_1's rmse: 0.89714
[1768]	training's rmse: 0.725931	valid_1's rmse: 0.897137
[1769]	training's rmse: 0.725908	valid_1's rmse: 0.89711
[1770]	training's rmse: 0.725891	valid_1's rmse: 0.897101
[1771]	training's rmse: 0.725876	valid_1's rmse: 0.897105
[1772]	training's rmse: 0.725853	valid_1's rmse: 0.897085
[1773]	training'

[1832]	training's rmse: 0.724502	valid_1's rmse: 0.896772
[1833]	training's rmse: 0.72448	valid_1's rmse: 0.896786
[1834]	training's rmse: 0.72447	valid_1's rmse: 0.896784
[1835]	training's rmse: 0.724457	valid_1's rmse: 0.896785
[1836]	training's rmse: 0.724442	valid_1's rmse: 0.89679
[1837]	training's rmse: 0.724427	valid_1's rmse: 0.896796
[1838]	training's rmse: 0.724414	valid_1's rmse: 0.896795
[1839]	training's rmse: 0.724402	valid_1's rmse: 0.896783
[1840]	training's rmse: 0.724383	valid_1's rmse: 0.89677
[1841]	training's rmse: 0.724356	valid_1's rmse: 0.896741
[1842]	training's rmse: 0.724338	valid_1's rmse: 0.896693
[1843]	training's rmse: 0.724298	valid_1's rmse: 0.89667
[1844]	training's rmse: 0.724274	valid_1's rmse: 0.896676
[1845]	training's rmse: 0.724258	valid_1's rmse: 0.896689
[1846]	training's rmse: 0.724247	valid_1's rmse: 0.896695
[1847]	training's rmse: 0.724225	valid_1's rmse: 0.896697
[1848]	training's rmse: 0.72421	valid_1's rmse: 0.896692
[1849]	training's rm

[1902]	training's rmse: 0.723067	valid_1's rmse: 0.896398
[1903]	training's rmse: 0.723058	valid_1's rmse: 0.896396
[1904]	training's rmse: 0.72303	valid_1's rmse: 0.896422
[1905]	training's rmse: 0.723016	valid_1's rmse: 0.896417
[1906]	training's rmse: 0.723005	valid_1's rmse: 0.896415
[1907]	training's rmse: 0.722989	valid_1's rmse: 0.896399
[1908]	training's rmse: 0.722962	valid_1's rmse: 0.896476
[1909]	training's rmse: 0.722944	valid_1's rmse: 0.896469
[1910]	training's rmse: 0.722926	valid_1's rmse: 0.896455
[1911]	training's rmse: 0.722913	valid_1's rmse: 0.896453
[1912]	training's rmse: 0.722904	valid_1's rmse: 0.896451
[1913]	training's rmse: 0.722895	valid_1's rmse: 0.896456
[1914]	training's rmse: 0.722865	valid_1's rmse: 0.896483
[1915]	training's rmse: 0.722855	valid_1's rmse: 0.896479
[1916]	training's rmse: 0.722844	valid_1's rmse: 0.896479
[1917]	training's rmse: 0.722828	valid_1's rmse: 0.896487
[1918]	training's rmse: 0.722819	valid_1's rmse: 0.896478
[1919]	training

[1973]	training's rmse: 0.721758	valid_1's rmse: 0.896164
[1974]	training's rmse: 0.721753	valid_1's rmse: 0.896166
[1975]	training's rmse: 0.721737	valid_1's rmse: 0.896167
[1976]	training's rmse: 0.72171	valid_1's rmse: 0.89621
[1977]	training's rmse: 0.721695	valid_1's rmse: 0.896222
[1978]	training's rmse: 0.721667	valid_1's rmse: 0.896235
[1979]	training's rmse: 0.721655	valid_1's rmse: 0.896243
[1980]	training's rmse: 0.721637	valid_1's rmse: 0.896241
[1981]	training's rmse: 0.721622	valid_1's rmse: 0.896247
[1982]	training's rmse: 0.721609	valid_1's rmse: 0.896249
[1983]	training's rmse: 0.721595	valid_1's rmse: 0.896256
[1984]	training's rmse: 0.721586	valid_1's rmse: 0.896248
[1985]	training's rmse: 0.721542	valid_1's rmse: 0.896163
[1986]	training's rmse: 0.721529	valid_1's rmse: 0.896166
[1987]	training's rmse: 0.721519	valid_1's rmse: 0.896167
[1988]	training's rmse: 0.72151	valid_1's rmse: 0.896163
[1989]	training's rmse: 0.721502	valid_1's rmse: 0.896166
[1990]	training's

In [62]:
# 验证
from sklearn.metrics import mean_squared_error
valid_prediction = lgb_model.predict(X_valid).clip(0,20)
# valid_prediction
rmse_valid = np.sqrt(mean_squared_error(valid_prediction, label_valid))
rmse_valid

0.8959586618707074

In [69]:
rmse_valid = np.sqrt(mean_squared_error(valid_prediction, label_valid))
rmse_valid

0.8959586618707074

In [70]:
pred_33_df = pd.DataFrame({'item_cnt_month': valid_prediction})
dict(pred_33_df.item_cnt_month.value_counts())

{0.0: 52852,
 0.021887427341717867: 136,
 0.012392117780846399: 136,
 0.3455265551961666: 68,
 0.24419041349259255: 68,
 0.026545321919756892: 68,
 0.2367835393685169: 68,
 0.061109207249286694: 68,
 0.017461475290456933: 68,
 0.0810128069962931: 68,
 0.43670389976087154: 68,
 0.8244429340543955: 68,
 0.05607408554308525: 68,
 0.027647043693439094: 68,
 0.14090044646924477: 68,
 0.10104114505472708: 68,
 0.06546779812974014: 68,
 0.03748443870582796: 68,
 0.08008986438927498: 68,
 0.04923919402900914: 68,
 0.28538852346464577: 68,
 0.06145423313377659: 68,
 0.0014664723191730692: 68,
 0.0010629037175352758: 68,
 0.3231171078950427: 68,
 0.17296414273218086: 68,
 0.7684993299642364: 68,
 0.2541646160715933: 68,
 0.20708042053886874: 68,
 0.3420295705185275: 68,
 0.42265275413901376: 68,
 0.0302149066243018: 42,
 0.03971021618517349: 42,
 0.0058025753963794995: 40,
 0.12247429002485986: 36,
 0.00473559441640541: 36,
 0.1553274417725207: 34,
 0.0035624451123866864: 34,
 0.1458321322116491

In [71]:
# 对sub数据进行变换
def myfun(val):
    return round(val, 0)
pred_33_df['item_cnt_month'] = pred_33_df['item_cnt_month'].apply(myfun)

In [72]:
dict(pred_33_df.item_cnt_month.value_counts())

{0.0: 209542,
 1.0: 20638,
 2.0: 3907,
 3.0: 1334,
 4.0: 938,
 5.0: 707,
 6.0: 314,
 7.0: 171,
 8.0: 111,
 9.0: 76,
 10.0: 67,
 11.0: 61,
 13.0: 51,
 14.0: 47,
 15.0: 46,
 12.0: 44,
 19.0: 32,
 20.0: 22,
 16.0: 22,
 18.0: 22,
 17.0: 20}

In [73]:
m33 = matrix[matrix.date_block_num == 33]
dict(m33.item_cnt_month.value_counts())

{0.0: 206701,
 1.0: 21351,
 2.0: 5070,
 3.0: 1907,
 4.0: 959,
 5.0: 582,
 6.0: 341,
 20.0: 261,
 7.0: 226,
 8.0: 158,
 9.0: 138,
 10.0: 103,
 11.0: 65,
 12.0: 63,
 13.0: 54,
 15.0: 50,
 14.0: 46,
 16.0: 34,
 18.0: 25,
 17.0: 21,
 19.0: 17}

In [74]:
rmse_valid = np.sqrt(mean_squared_error(pred_33_df.item_cnt_month, label_valid))
rmse_valid

0.9143698651566013

# 训练33个月

In [75]:
matrix.shape

(6639294, 78)

In [76]:
"""建模"""
trainData = matrix[matrix['date_block_num'] < 34]
label_train = trainData['item_cnt_month']
X_train = trainData.drop('item_cnt_month', axis=1)

train_data = lgb.Dataset(data=X_train, label=label_train)
params = {
    'objective': 'regression',  # 回归
    'metric': 'rmse',   # 回归问题选择rmse
    'n_estimators': 2000,
    'max_depth': 8,
    'num_leaves': 200,   # 每个弱学习器拥有的叶子的数量
    'learning_rate': 0.01,
    'bagging_fraction': 0.9,    # 每次训练“弱学习器”用的数据比例（应该也是随机的），用于加快训练速度和减小过拟合
    'feature_fraction': 0.3,   # 每次迭代过程中，随机选择30%的特征建树（弱学习器）
    'bagging_seed': 0,
    'early_stop_rounds': 50
}
lgb_model = lgb.train(params, train_data, valid_sets=[train_data])



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13179
[LightGBM] [Info] Number of data points in the train set: 6425094, number of used features: 77
[LightGBM] [Info] Start training from score 0.287729
[1]	training's rmse: 1.18127
[2]	training's rmse: 1.17621
[3]	training's rmse: 1.17162
[4]	training's rmse: 1.16692
[5]	training's rmse: 1.16216
[6]	training's rmse: 1.15775
[7]	training's rmse: 1.15312
[8]	training's rmse: 1.14841
[9]	training's rmse: 1.144
[10]	training's rmse: 1.14047
[11]	training's rmse: 1.1364
[12]	training's rmse: 1.13244
[13]	training's rmse: 1.12785
[14]	training's rmse: 1.12393
[15]	training's rmse: 1.11936
[16]	training's rmse: 1.11626
[17]	training's rmse: 1.11275
[18]	training's rmse: 1.10844
[19]	training's rmse: 1.10472
[20]	training's rmse: 1.10119
[21]	training's rmse: 1.09696
[22]	training's rmse: 1.09389
[23]	training's rmse: 1.08966
[24]	training's r

[247]	training's rmse: 0.824517
[248]	training's rmse: 0.824202
[249]	training's rmse: 0.823775
[250]	training's rmse: 0.823128
[251]	training's rmse: 0.822741
[252]	training's rmse: 0.822433
[253]	training's rmse: 0.822065
[254]	training's rmse: 0.821852
[255]	training's rmse: 0.821606
[256]	training's rmse: 0.821143
[257]	training's rmse: 0.820666
[258]	training's rmse: 0.820423
[259]	training's rmse: 0.820208
[260]	training's rmse: 0.819948
[261]	training's rmse: 0.81973
[262]	training's rmse: 0.819541
[263]	training's rmse: 0.81939
[264]	training's rmse: 0.818932
[265]	training's rmse: 0.818484
[266]	training's rmse: 0.818316
[267]	training's rmse: 0.818123
[268]	training's rmse: 0.817901
[269]	training's rmse: 0.817534
[270]	training's rmse: 0.817139
[271]	training's rmse: 0.816871
[272]	training's rmse: 0.816654
[273]	training's rmse: 0.816495
[274]	training's rmse: 0.816059
[275]	training's rmse: 0.815871
[276]	training's rmse: 0.815656
[277]	training's rmse: 0.815315
[278]	trai

[451]	training's rmse: 0.787285
[452]	training's rmse: 0.787113
[453]	training's rmse: 0.787007
[454]	training's rmse: 0.786898
[455]	training's rmse: 0.786844
[456]	training's rmse: 0.786713
[457]	training's rmse: 0.786626
[458]	training's rmse: 0.786489
[459]	training's rmse: 0.786299
[460]	training's rmse: 0.786214
[461]	training's rmse: 0.7861
[462]	training's rmse: 0.786008
[463]	training's rmse: 0.78591
[464]	training's rmse: 0.785838
[465]	training's rmse: 0.785715
[466]	training's rmse: 0.785609
[467]	training's rmse: 0.785511
[468]	training's rmse: 0.785396
[469]	training's rmse: 0.785277
[470]	training's rmse: 0.78517
[471]	training's rmse: 0.784959
[472]	training's rmse: 0.784832
[473]	training's rmse: 0.784723
[474]	training's rmse: 0.784668
[475]	training's rmse: 0.784589
[476]	training's rmse: 0.784499
[477]	training's rmse: 0.784394
[478]	training's rmse: 0.784313
[479]	training's rmse: 0.784231
[480]	training's rmse: 0.784138
[481]	training's rmse: 0.784047
[482]	traini

[600]	training's rmse: 0.773954
[601]	training's rmse: 0.773897
[602]	training's rmse: 0.77381
[603]	training's rmse: 0.773728
[604]	training's rmse: 0.773652
[605]	training's rmse: 0.77358
[606]	training's rmse: 0.773537
[607]	training's rmse: 0.77347
[608]	training's rmse: 0.773374
[609]	training's rmse: 0.773308
[610]	training's rmse: 0.773277
[611]	training's rmse: 0.773152
[612]	training's rmse: 0.773098
[613]	training's rmse: 0.773035
[614]	training's rmse: 0.772924
[615]	training's rmse: 0.772815
[616]	training's rmse: 0.772754
[617]	training's rmse: 0.772378
[618]	training's rmse: 0.772283
[619]	training's rmse: 0.772214
[620]	training's rmse: 0.772104
[621]	training's rmse: 0.772045
[622]	training's rmse: 0.771985
[623]	training's rmse: 0.771896
[624]	training's rmse: 0.771518
[625]	training's rmse: 0.771445
[626]	training's rmse: 0.771312
[627]	training's rmse: 0.771263
[628]	training's rmse: 0.771224
[629]	training's rmse: 0.771168
[630]	training's rmse: 0.77109
[631]	traini

[730]	training's rmse: 0.764347
[731]	training's rmse: 0.764314
[732]	training's rmse: 0.764273
[733]	training's rmse: 0.7642
[734]	training's rmse: 0.764128
[735]	training's rmse: 0.764089
[736]	training's rmse: 0.764004
[737]	training's rmse: 0.763951
[738]	training's rmse: 0.763838
[739]	training's rmse: 0.763674
[740]	training's rmse: 0.763593
[741]	training's rmse: 0.763518
[742]	training's rmse: 0.763486
[743]	training's rmse: 0.763255
[744]	training's rmse: 0.763167
[745]	training's rmse: 0.763133
[746]	training's rmse: 0.763075
[747]	training's rmse: 0.763043
[748]	training's rmse: 0.76299
[749]	training's rmse: 0.762958
[750]	training's rmse: 0.762883
[751]	training's rmse: 0.762856
[752]	training's rmse: 0.762738
[753]	training's rmse: 0.762703
[754]	training's rmse: 0.762663
[755]	training's rmse: 0.762609
[756]	training's rmse: 0.762563
[757]	training's rmse: 0.762524
[758]	training's rmse: 0.762477
[759]	training's rmse: 0.762427
[760]	training's rmse: 0.762372
[761]	train

[840]	training's rmse: 0.758262
[841]	training's rmse: 0.758209
[842]	training's rmse: 0.758155
[843]	training's rmse: 0.758127
[844]	training's rmse: 0.758094
[845]	training's rmse: 0.758051
[846]	training's rmse: 0.757998
[847]	training's rmse: 0.75793
[848]	training's rmse: 0.75788
[849]	training's rmse: 0.757828
[850]	training's rmse: 0.757773
[851]	training's rmse: 0.757711
[852]	training's rmse: 0.757688
[853]	training's rmse: 0.757634
[854]	training's rmse: 0.757585
[855]	training's rmse: 0.757521
[856]	training's rmse: 0.757493
[857]	training's rmse: 0.757463
[858]	training's rmse: 0.757427
[859]	training's rmse: 0.757393
[860]	training's rmse: 0.757359
[861]	training's rmse: 0.757267
[862]	training's rmse: 0.757226
[863]	training's rmse: 0.757182
[864]	training's rmse: 0.757125
[865]	training's rmse: 0.757094
[866]	training's rmse: 0.757051
[867]	training's rmse: 0.757015
[868]	training's rmse: 0.756993
[869]	training's rmse: 0.756954
[870]	training's rmse: 0.756892
[871]	trai

[954]	training's rmse: 0.753571
[955]	training's rmse: 0.753548
[956]	training's rmse: 0.753523
[957]	training's rmse: 0.753488
[958]	training's rmse: 0.753442
[959]	training's rmse: 0.753417
[960]	training's rmse: 0.753394
[961]	training's rmse: 0.75334
[962]	training's rmse: 0.753316
[963]	training's rmse: 0.753282
[964]	training's rmse: 0.753236
[965]	training's rmse: 0.75319
[966]	training's rmse: 0.753139
[967]	training's rmse: 0.753086
[968]	training's rmse: 0.753022
[969]	training's rmse: 0.752998
[970]	training's rmse: 0.752966
[971]	training's rmse: 0.752924
[972]	training's rmse: 0.752901
[973]	training's rmse: 0.752851
[974]	training's rmse: 0.752821
[975]	training's rmse: 0.752768
[976]	training's rmse: 0.75274
[977]	training's rmse: 0.752709
[978]	training's rmse: 0.752688
[979]	training's rmse: 0.752656
[980]	training's rmse: 0.752617
[981]	training's rmse: 0.752591
[982]	training's rmse: 0.752556
[983]	training's rmse: 0.752528
[984]	training's rmse: 0.752501
[985]	train

[1064]	training's rmse: 0.749615
[1065]	training's rmse: 0.749583
[1066]	training's rmse: 0.749557
[1067]	training's rmse: 0.749497
[1068]	training's rmse: 0.749464
[1069]	training's rmse: 0.749435
[1070]	training's rmse: 0.749387
[1071]	training's rmse: 0.749356
[1072]	training's rmse: 0.749331
[1073]	training's rmse: 0.749278
[1074]	training's rmse: 0.749246
[1075]	training's rmse: 0.749213
[1076]	training's rmse: 0.749174
[1077]	training's rmse: 0.749134
[1078]	training's rmse: 0.749101
[1079]	training's rmse: 0.749065
[1080]	training's rmse: 0.749027
[1081]	training's rmse: 0.748977
[1082]	training's rmse: 0.748947
[1083]	training's rmse: 0.748905
[1084]	training's rmse: 0.748878
[1085]	training's rmse: 0.748831
[1086]	training's rmse: 0.748804
[1087]	training's rmse: 0.74877
[1088]	training's rmse: 0.74874
[1089]	training's rmse: 0.748717
[1090]	training's rmse: 0.748628
[1091]	training's rmse: 0.748578
[1092]	training's rmse: 0.748552
[1093]	training's rmse: 0.748496
[1094]	train

[1170]	training's rmse: 0.746008
[1171]	training's rmse: 0.745988
[1172]	training's rmse: 0.745959
[1173]	training's rmse: 0.745904
[1174]	training's rmse: 0.745882
[1175]	training's rmse: 0.745849
[1176]	training's rmse: 0.745828
[1177]	training's rmse: 0.745793
[1178]	training's rmse: 0.745751
[1179]	training's rmse: 0.745721
[1180]	training's rmse: 0.745699
[1181]	training's rmse: 0.745685
[1182]	training's rmse: 0.745656
[1183]	training's rmse: 0.745628
[1184]	training's rmse: 0.745589
[1185]	training's rmse: 0.745557
[1186]	training's rmse: 0.745537
[1187]	training's rmse: 0.745492
[1188]	training's rmse: 0.745471
[1189]	training's rmse: 0.745435
[1190]	training's rmse: 0.745404
[1191]	training's rmse: 0.745381
[1192]	training's rmse: 0.745355
[1193]	training's rmse: 0.745303
[1194]	training's rmse: 0.74528
[1195]	training's rmse: 0.745242
[1196]	training's rmse: 0.745214
[1197]	training's rmse: 0.745191
[1198]	training's rmse: 0.745148
[1199]	training's rmse: 0.745115
[1200]	trai

[1267]	training's rmse: 0.743207
[1268]	training's rmse: 0.743185
[1269]	training's rmse: 0.743167
[1270]	training's rmse: 0.743147
[1271]	training's rmse: 0.743099
[1272]	training's rmse: 0.743069
[1273]	training's rmse: 0.743051
[1274]	training's rmse: 0.74302
[1275]	training's rmse: 0.742981
[1276]	training's rmse: 0.742968
[1277]	training's rmse: 0.742936
[1278]	training's rmse: 0.742894
[1279]	training's rmse: 0.742871
[1280]	training's rmse: 0.742851
[1281]	training's rmse: 0.742818
[1282]	training's rmse: 0.742799
[1283]	training's rmse: 0.742768
[1284]	training's rmse: 0.742744
[1285]	training's rmse: 0.742718
[1286]	training's rmse: 0.742703
[1287]	training's rmse: 0.742677
[1288]	training's rmse: 0.742647
[1289]	training's rmse: 0.742604
[1290]	training's rmse: 0.742572
[1291]	training's rmse: 0.742555
[1292]	training's rmse: 0.742537
[1293]	training's rmse: 0.742493
[1294]	training's rmse: 0.742468
[1295]	training's rmse: 0.742444
[1296]	training's rmse: 0.74242
[1297]	train

[1371]	training's rmse: 0.74045
[1372]	training's rmse: 0.740429
[1373]	training's rmse: 0.740402
[1374]	training's rmse: 0.740392
[1375]	training's rmse: 0.740359
[1376]	training's rmse: 0.740324
[1377]	training's rmse: 0.740304
[1378]	training's rmse: 0.740285
[1379]	training's rmse: 0.740252
[1380]	training's rmse: 0.740229
[1381]	training's rmse: 0.740204
[1382]	training's rmse: 0.740156
[1383]	training's rmse: 0.740126
[1384]	training's rmse: 0.740084
[1385]	training's rmse: 0.740066
[1386]	training's rmse: 0.740047
[1387]	training's rmse: 0.740015
[1388]	training's rmse: 0.739965
[1389]	training's rmse: 0.739947
[1390]	training's rmse: 0.739927
[1391]	training's rmse: 0.739896
[1392]	training's rmse: 0.739877
[1393]	training's rmse: 0.739847
[1394]	training's rmse: 0.739815
[1395]	training's rmse: 0.739795
[1396]	training's rmse: 0.739781
[1397]	training's rmse: 0.739764
[1398]	training's rmse: 0.73975
[1399]	training's rmse: 0.73972
[1400]	training's rmse: 0.739695
[1401]	traini

[1475]	training's rmse: 0.73782
[1476]	training's rmse: 0.737801
[1477]	training's rmse: 0.73778
[1478]	training's rmse: 0.737738
[1479]	training's rmse: 0.737714
[1480]	training's rmse: 0.737686
[1481]	training's rmse: 0.737666
[1482]	training's rmse: 0.737641
[1483]	training's rmse: 0.737582
[1484]	training's rmse: 0.737563
[1485]	training's rmse: 0.737543
[1486]	training's rmse: 0.737504
[1487]	training's rmse: 0.737484
[1488]	training's rmse: 0.737467
[1489]	training's rmse: 0.73744
[1490]	training's rmse: 0.737419
[1491]	training's rmse: 0.737398
[1492]	training's rmse: 0.737376
[1493]	training's rmse: 0.737352
[1494]	training's rmse: 0.737322
[1495]	training's rmse: 0.737304
[1496]	training's rmse: 0.737288
[1497]	training's rmse: 0.737269
[1498]	training's rmse: 0.737255
[1499]	training's rmse: 0.73724
[1500]	training's rmse: 0.737162
[1501]	training's rmse: 0.737142
[1502]	training's rmse: 0.737119
[1503]	training's rmse: 0.737095
[1504]	training's rmse: 0.737076
[1505]	trainin

[1577]	training's rmse: 0.735377
[1578]	training's rmse: 0.735358
[1579]	training's rmse: 0.735328
[1580]	training's rmse: 0.735293
[1581]	training's rmse: 0.735274
[1582]	training's rmse: 0.735259
[1583]	training's rmse: 0.735229
[1584]	training's rmse: 0.73521
[1585]	training's rmse: 0.735191
[1586]	training's rmse: 0.735171
[1587]	training's rmse: 0.735151
[1588]	training's rmse: 0.735124
[1589]	training's rmse: 0.735101
[1590]	training's rmse: 0.735079
[1591]	training's rmse: 0.735061
[1592]	training's rmse: 0.735048
[1593]	training's rmse: 0.735023
[1594]	training's rmse: 0.734995
[1595]	training's rmse: 0.73497
[1596]	training's rmse: 0.734957
[1597]	training's rmse: 0.734939
[1598]	training's rmse: 0.734923
[1599]	training's rmse: 0.7349
[1600]	training's rmse: 0.734887
[1601]	training's rmse: 0.734863
[1602]	training's rmse: 0.734841
[1603]	training's rmse: 0.734792
[1604]	training's rmse: 0.734749
[1605]	training's rmse: 0.734723
[1606]	training's rmse: 0.734706
[1607]	trainin

[1681]	training's rmse: 0.733065
[1682]	training's rmse: 0.73304
[1683]	training's rmse: 0.733024
[1684]	training's rmse: 0.732995
[1685]	training's rmse: 0.732944
[1686]	training's rmse: 0.732927
[1687]	training's rmse: 0.732916
[1688]	training's rmse: 0.732894
[1689]	training's rmse: 0.73287
[1690]	training's rmse: 0.732856
[1691]	training's rmse: 0.732818
[1692]	training's rmse: 0.732805
[1693]	training's rmse: 0.732789
[1694]	training's rmse: 0.732773
[1695]	training's rmse: 0.732756
[1696]	training's rmse: 0.732714
[1697]	training's rmse: 0.732698
[1698]	training's rmse: 0.732668
[1699]	training's rmse: 0.732645
[1700]	training's rmse: 0.732621
[1701]	training's rmse: 0.732609
[1702]	training's rmse: 0.732596
[1703]	training's rmse: 0.73255
[1704]	training's rmse: 0.732531
[1705]	training's rmse: 0.73252
[1706]	training's rmse: 0.732491
[1707]	training's rmse: 0.732474
[1708]	training's rmse: 0.732444
[1709]	training's rmse: 0.732437
[1710]	training's rmse: 0.732419
[1711]	trainin

[1789]	training's rmse: 0.730605
[1790]	training's rmse: 0.730578
[1791]	training's rmse: 0.730535
[1792]	training's rmse: 0.73052
[1793]	training's rmse: 0.730495
[1794]	training's rmse: 0.730478
[1795]	training's rmse: 0.730461
[1796]	training's rmse: 0.730442
[1797]	training's rmse: 0.730426
[1798]	training's rmse: 0.730418
[1799]	training's rmse: 0.730394
[1800]	training's rmse: 0.730366
[1801]	training's rmse: 0.73035
[1802]	training's rmse: 0.730332
[1803]	training's rmse: 0.730305
[1804]	training's rmse: 0.730288
[1805]	training's rmse: 0.730273
[1806]	training's rmse: 0.730209
[1807]	training's rmse: 0.730197
[1808]	training's rmse: 0.730185
[1809]	training's rmse: 0.730169
[1810]	training's rmse: 0.730147
[1811]	training's rmse: 0.730123
[1812]	training's rmse: 0.73011
[1813]	training's rmse: 0.730089
[1814]	training's rmse: 0.730071
[1815]	training's rmse: 0.730053
[1816]	training's rmse: 0.730038
[1817]	training's rmse: 0.730017
[1818]	training's rmse: 0.729991
[1819]	traini

[1888]	training's rmse: 0.728598
[1889]	training's rmse: 0.728567
[1890]	training's rmse: 0.728557
[1891]	training's rmse: 0.728544
[1892]	training's rmse: 0.728533
[1893]	training's rmse: 0.728501
[1894]	training's rmse: 0.728486
[1895]	training's rmse: 0.728468
[1896]	training's rmse: 0.728454
[1897]	training's rmse: 0.728424
[1898]	training's rmse: 0.72841
[1899]	training's rmse: 0.728389
[1900]	training's rmse: 0.728373
[1901]	training's rmse: 0.728345
[1902]	training's rmse: 0.728317
[1903]	training's rmse: 0.728295
[1904]	training's rmse: 0.728279
[1905]	training's rmse: 0.728263
[1906]	training's rmse: 0.728253
[1907]	training's rmse: 0.72823
[1908]	training's rmse: 0.728208
[1909]	training's rmse: 0.728192
[1910]	training's rmse: 0.728174
[1911]	training's rmse: 0.728158
[1912]	training's rmse: 0.728152
[1913]	training's rmse: 0.728145
[1914]	training's rmse: 0.728115
[1915]	training's rmse: 0.728098
[1916]	training's rmse: 0.72809
[1917]	training's rmse: 0.728076
[1918]	traini

[1976]	training's rmse: 0.726939
[1977]	training's rmse: 0.72692
[1978]	training's rmse: 0.726909
[1979]	training's rmse: 0.726893
[1980]	training's rmse: 0.726881
[1981]	training's rmse: 0.726863
[1982]	training's rmse: 0.726801
[1983]	training's rmse: 0.726789
[1984]	training's rmse: 0.72678
[1985]	training's rmse: 0.726735
[1986]	training's rmse: 0.72672
[1987]	training's rmse: 0.72671
[1988]	training's rmse: 0.726687
[1989]	training's rmse: 0.726665
[1990]	training's rmse: 0.726646
[1991]	training's rmse: 0.726634
[1992]	training's rmse: 0.726595
[1993]	training's rmse: 0.726576
[1994]	training's rmse: 0.726554
[1995]	training's rmse: 0.726532
[1996]	training's rmse: 0.726517
[1997]	training's rmse: 0.726501
[1998]	training's rmse: 0.726486
[1999]	training's rmse: 0.726479
[2000]	training's rmse: 0.726468


In [77]:
six_zero = sales_by_item_id[(sales_by_item_id['28'] == 0) & (sales_by_item_id['29'] == 0) & (sales_by_item_id['30'] == 0) & (sales_by_item_id['31'] == 0) & (sales_by_item_id['32'] == 0) & (sales_by_item_id['33'] == 0)]
six_zero_item_id = list(six_zero['item_id'].values)   # item_id列表

In [78]:
# test数据
testData = matrix[matrix['date_block_num'] == 34]
X_test = testData.drop('item_cnt_month', axis=1)

# 预测&生成文件
y_test = lgb_model.predict(X_test).clip(0, 20)
submission = pd.DataFrame({ 'ID': range(0, 214200), 'item_cnt_month': y_test})

test0 = test[test.item_id.isin(six_zero_item_id)]
ids = list(test0.ID.values)
submission.loc[submission.ID.isin(ids), 'item_cnt_month'] = 0.0
submission.to_csv('./submit/sub1.csv', index=False)

# 分析生成数据

In [90]:
sub = submission.copy()

In [91]:
dict(sub.item_cnt_month.value_counts())

{0.0: 40634,
 0.28960871983713027: 40,
 0.31306224439954544: 40,
 0.3257521661952946: 40,
 0.15455734424698034: 30,
 0.16724726604272952: 30,
 0.13214881600264755: 30,
 0.5606389593078824: 24,
 0.5925064195095856: 24,
 0.6051963413053348: 24,
 0.34484368881809035: 20,
 0.26730109839288346: 20,
 0.25611555617521714: 20,
 0.24444406949205283: 20,
 0.6116608274901834: 20,
 0.2941234411157476: 20,
 0.4518087967761352: 20,
 0.7033786974574552: 20,
 0.6571099795850913: 20,
 1.1167932283511597: 20,
 0.29029480931521773: 20,
 0.22429871211505492: 20,
 0.28277410421464017: 20,
 0.26998387608865454: 20,
 0.3676663355162458: 20,
 0.2794577127593258: 20,
 0.7766794935780167: 20,
 0.2716931616295741: 20,
 0.36219276657063737: 20,
 0.6072537884927857: 20,
 1.2919255699860102: 20,
 0.2805705466966578: 20,
 0.38925965048295186: 20,
 0.3542467262196439: 20,
 0.5986790215953899: 20,
 0.3191045477231047: 20,
 0.5083011027195751: 20,
 0.3651896882496883: 20,
 0.3884026834231535: 20,
 0.2689121668483746: 2

In [92]:
# 对sub数据进行变换

def myfun(val):
    return round(val, 0)
    
sub['item_cnt_month'] = sub['item_cnt_month'].apply(myfun)

In [93]:
dict(sub.item_cnt_month.value_counts())

{0.0: 183539,
 1.0: 22279,
 2.0: 3788,
 3.0: 1465,
 4.0: 1092,
 5.0: 749,
 6.0: 461,
 7.0: 179,
 8.0: 109,
 10.0: 75,
 9.0: 72,
 12.0: 71,
 15.0: 58,
 11.0: 51,
 14.0: 43,
 13.0: 37,
 16.0: 32,
 20.0: 28,
 19.0: 27,
 17.0: 26,
 18.0: 19}

In [94]:
sub.to_csv('./submit/sub1_1.csv', index=False)

In [80]:
m33 = matrix[matrix.date_block_num == 33]
dict(m33.item_cnt_month.value_counts())

{0.0: 206701,
 1.0: 21351,
 2.0: 5070,
 3.0: 1907,
 4.0: 959,
 5.0: 582,
 6.0: 341,
 20.0: 261,
 7.0: 226,
 8.0: 158,
 9.0: 138,
 10.0: 103,
 11.0: 65,
 12.0: 63,
 13.0: 54,
 15.0: 50,
 14.0: 46,
 16.0: 34,
 18.0: 25,
 17.0: 21,
 19.0: 17}

In [77]:
m32 = matrix[matrix.date_block_num == 32]
dict(m32.item_cnt_month.value_counts())

{0.0: 189052,
 1.0: 20128,
 2.0: 4810,
 3.0: 1777,
 4.0: 904,
 5.0: 525,
 6.0: 308,
 20.0: 268,
 7.0: 222,
 8.0: 137,
 9.0: 115,
 10.0: 81,
 11.0: 62,
 12.0: 56,
 13.0: 47,
 19.0: 31,
 16.0: 30,
 14.0: 29,
 15.0: 27,
 17.0: 24,
 18.0: 22}

In [78]:
m31 = matrix[matrix.date_block_num == 31]
dict(m31.item_cnt_month.value_counts())

{0.0: 181110,
 1.0: 22413,
 2.0: 5677,
 3.0: 2151,
 4.0: 1044,
 5.0: 683,
 6.0: 431,
 7.0: 255,
 8.0: 154,
 20.0: 123,
 9.0: 121,
 10.0: 92,
 11.0: 59,
 12.0: 55,
 13.0: 41,
 15.0: 30,
 16.0: 24,
 14.0: 22,
 17.0: 18,
 19.0: 17,
 18.0: 16}

In [79]:
m30 = matrix[matrix.date_block_num == 30]
dict(m30.item_cnt_month.value_counts())

{0.0: 195431,
 1.0: 22977,
 2.0: 5720,
 3.0: 2030,
 4.0: 964,
 5.0: 520,
 6.0: 326,
 7.0: 197,
 8.0: 143,
 20.0: 120,
 9.0: 111,
 10.0: 80,
 11.0: 56,
 12.0: 54,
 14.0: 39,
 13.0: 38,
 15.0: 21,
 16.0: 19,
 18.0: 18,
 17.0: 14,
 19.0: 11}

# 新的开始

In [185]:
matrix.shape

(11128050, 69)

In [186]:
"""建模"""
trainData = matrix[matrix['date_block_num'] < 33]
label_train = trainData['item_cnt_month']
X_train = trainData.drop('item_cnt_month', axis=1)

validData = matrix[matrix['date_block_num'] == 33]
label_valid = validData['item_cnt_month']
X_valid = validData.drop('item_cnt_month', axis=1)

In [187]:
2**10

1024

In [None]:
import lightgbm as lgb
train_data = lgb.Dataset(data=X_train, label=label_train)
valid_data = lgb.Dataset(data=X_valid, label=label_valid)
params = {
    'objective': 'regression',  # 回归
    'metric': 'rmse',   # 回归问题选择rmse
    'n_estimators': 3000,
    'max_depth': 9,
    'num_leaves': 300,   # 每个弱学习器拥有的叶子的数量
    'learning_rate': 0.01,
    'bagging_fraction': 0.9,    # 每次训练“弱学习器”用的数据比例（应该也是随机的），用于加快训练速度和减小过拟合
    'feature_fraction': 0.3,   # 每次迭代过程中，随机选择30%的特征建树（弱学习器）
    'bagging_seed': 0,
    'early_stop_rounds': 50
}
lgb_model = lgb.train(params, train_data, valid_sets=[train_data, valid_data])

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11641
[LightGBM] [Info] Number of data points in the train set: 10675678, number of used features: 68
[LightGBM] [Info] Start training from score 0.299125
[1]	training's rmse: 1.2197	valid_1's rmse: 1.13357
[2]	training's rmse: 1.21523	valid_1's rmse: 1.13003
[3]	training's rmse: 1.2123	valid_1's rmse: 1.12753
[4]	training's rmse: 1.20782	valid_1's rmse: 1.12392
[5]	training's rmse: 1.20355	valid_1's rmse: 1.1208
[6]	training's rmse: 1.20017	valid_1's rmse: 1.11787
[7]	training's rmse: 1.19554	valid_1's rmse: 1.11409
[8]	training's rmse: 1.19168	valid_1's rmse: 1.11111
[9]	training's rmse: 1.18802	valid_1's rmse: 1.10836
[10]	training's rmse: 1.18376	valid_1's rmse: 1.10498
[11]	training's rmse: 1.17996	valid_1's rmse: 1.10205
[12]	training's rmse: 1.1762	valid_1's rmse: 1.09906
[13]	training's rmse: 1.17361	valid_1's rmse: 1.09684
[14]	

[140]	training's rmse: 0.94587	valid_1's rmse: 0.935335
[141]	training's rmse: 0.945163	valid_1's rmse: 0.935025
[142]	training's rmse: 0.94442	valid_1's rmse: 0.934555
[143]	training's rmse: 0.943888	valid_1's rmse: 0.934322
[144]	training's rmse: 0.943132	valid_1's rmse: 0.933978
[145]	training's rmse: 0.942442	valid_1's rmse: 0.933589
[146]	training's rmse: 0.941703	valid_1's rmse: 0.933268
[147]	training's rmse: 0.941064	valid_1's rmse: 0.933005
[148]	training's rmse: 0.940301	valid_1's rmse: 0.932469
[149]	training's rmse: 0.939496	valid_1's rmse: 0.932101
[150]	training's rmse: 0.938944	valid_1's rmse: 0.931827
[151]	training's rmse: 0.93827	valid_1's rmse: 0.931484
[152]	training's rmse: 0.937814	valid_1's rmse: 0.931154
[153]	training's rmse: 0.93726	valid_1's rmse: 0.930885
[154]	training's rmse: 0.936727	valid_1's rmse: 0.930696
[155]	training's rmse: 0.936121	valid_1's rmse: 0.93035
[156]	training's rmse: 0.935492	valid_1's rmse: 0.929886
[157]	training's rmse: 0.93478	valid

[285]	training's rmse: 0.89215	valid_1's rmse: 0.914237
[286]	training's rmse: 0.891981	valid_1's rmse: 0.9142
[287]	training's rmse: 0.891635	valid_1's rmse: 0.914125
[288]	training's rmse: 0.891253	valid_1's rmse: 0.914136
[289]	training's rmse: 0.891089	valid_1's rmse: 0.914158
[290]	training's rmse: 0.890914	valid_1's rmse: 0.914127
[291]	training's rmse: 0.890675	valid_1's rmse: 0.914045
[292]	training's rmse: 0.890532	valid_1's rmse: 0.914027
[293]	training's rmse: 0.890376	valid_1's rmse: 0.914004
[294]	training's rmse: 0.890205	valid_1's rmse: 0.914008
[295]	training's rmse: 0.889996	valid_1's rmse: 0.913917
[296]	training's rmse: 0.88988	valid_1's rmse: 0.913947
[297]	training's rmse: 0.889695	valid_1's rmse: 0.913946
[298]	training's rmse: 0.889546	valid_1's rmse: 0.913966
[299]	training's rmse: 0.889369	valid_1's rmse: 0.913925
[300]	training's rmse: 0.889201	valid_1's rmse: 0.913887
[301]	training's rmse: 0.889013	valid_1's rmse: 0.914008
[302]	training's rmse: 0.888718	val

[430]	training's rmse: 0.870925	valid_1's rmse: 0.911865
[431]	training's rmse: 0.870826	valid_1's rmse: 0.91177
[432]	training's rmse: 0.870723	valid_1's rmse: 0.911756
[433]	training's rmse: 0.870473	valid_1's rmse: 0.911617
[434]	training's rmse: 0.87035	valid_1's rmse: 0.911654
[435]	training's rmse: 0.870195	valid_1's rmse: 0.911639
[436]	training's rmse: 0.870103	valid_1's rmse: 0.911612
[437]	training's rmse: 0.870043	valid_1's rmse: 0.911611
[438]	training's rmse: 0.86995	valid_1's rmse: 0.911603
[439]	training's rmse: 0.86984	valid_1's rmse: 0.911549
[440]	training's rmse: 0.869756	valid_1's rmse: 0.911588
[441]	training's rmse: 0.869638	valid_1's rmse: 0.911582
[442]	training's rmse: 0.869591	valid_1's rmse: 0.911587
[443]	training's rmse: 0.869483	valid_1's rmse: 0.911546
[444]	training's rmse: 0.869417	valid_1's rmse: 0.911522
[445]	training's rmse: 0.869368	valid_1's rmse: 0.911514
[446]	training's rmse: 0.869299	valid_1's rmse: 0.911532
[447]	training's rmse: 0.869224	val

[563]	training's rmse: 0.858673	valid_1's rmse: 0.910156
[564]	training's rmse: 0.858594	valid_1's rmse: 0.910169
[565]	training's rmse: 0.858526	valid_1's rmse: 0.910146
[566]	training's rmse: 0.858467	valid_1's rmse: 0.910154
[567]	training's rmse: 0.858357	valid_1's rmse: 0.910135
[568]	training's rmse: 0.8583	valid_1's rmse: 0.910123
[569]	training's rmse: 0.858238	valid_1's rmse: 0.910103
[570]	training's rmse: 0.858198	valid_1's rmse: 0.91012
[571]	training's rmse: 0.858143	valid_1's rmse: 0.910123
[572]	training's rmse: 0.858101	valid_1's rmse: 0.910113
[573]	training's rmse: 0.85806	valid_1's rmse: 0.910105
[574]	training's rmse: 0.857988	valid_1's rmse: 0.910095
[575]	training's rmse: 0.857905	valid_1's rmse: 0.910137
[576]	training's rmse: 0.857846	valid_1's rmse: 0.910144
[577]	training's rmse: 0.857793	valid_1's rmse: 0.910153
[578]	training's rmse: 0.857665	valid_1's rmse: 0.910144
[579]	training's rmse: 0.8576	valid_1's rmse: 0.910129
[580]	training's rmse: 0.857483	valid

[685]	training's rmse: 0.849818	valid_1's rmse: 0.908553
[686]	training's rmse: 0.849758	valid_1's rmse: 0.908554
[687]	training's rmse: 0.84973	valid_1's rmse: 0.908541
[688]	training's rmse: 0.849669	valid_1's rmse: 0.908543
[689]	training's rmse: 0.849593	valid_1's rmse: 0.908525
[690]	training's rmse: 0.849537	valid_1's rmse: 0.908528
[691]	training's rmse: 0.849475	valid_1's rmse: 0.908514
[692]	training's rmse: 0.849454	valid_1's rmse: 0.908508
[693]	training's rmse: 0.849392	valid_1's rmse: 0.908477
[694]	training's rmse: 0.849345	valid_1's rmse: 0.908473
[695]	training's rmse: 0.849268	valid_1's rmse: 0.908475
[696]	training's rmse: 0.849223	valid_1's rmse: 0.908479
[697]	training's rmse: 0.849193	valid_1's rmse: 0.908462
[698]	training's rmse: 0.849048	valid_1's rmse: 0.908481
[699]	training's rmse: 0.849012	valid_1's rmse: 0.908463
[700]	training's rmse: 0.848954	valid_1's rmse: 0.908491
[701]	training's rmse: 0.848905	valid_1's rmse: 0.908484
[702]	training's rmse: 0.848861	

[805]	training's rmse: 0.842826	valid_1's rmse: 0.907177
[806]	training's rmse: 0.842777	valid_1's rmse: 0.907175
[807]	training's rmse: 0.842685	valid_1's rmse: 0.907097
[808]	training's rmse: 0.842642	valid_1's rmse: 0.907078
[809]	training's rmse: 0.84261	valid_1's rmse: 0.907087
[810]	training's rmse: 0.842578	valid_1's rmse: 0.907081
[811]	training's rmse: 0.842549	valid_1's rmse: 0.907086
[812]	training's rmse: 0.842495	valid_1's rmse: 0.90709
[813]	training's rmse: 0.842443	valid_1's rmse: 0.907094
[814]	training's rmse: 0.842415	valid_1's rmse: 0.907065
[815]	training's rmse: 0.842396	valid_1's rmse: 0.90708
[816]	training's rmse: 0.842307	valid_1's rmse: 0.90707
[817]	training's rmse: 0.842268	valid_1's rmse: 0.907069
[818]	training's rmse: 0.842234	valid_1's rmse: 0.90706
[819]	training's rmse: 0.842203	valid_1's rmse: 0.907049
[820]	training's rmse: 0.842176	valid_1's rmse: 0.907057
[821]	training's rmse: 0.842075	valid_1's rmse: 0.907032
[822]	training's rmse: 0.841991	vali

[918]	training's rmse: 0.837782	valid_1's rmse: 0.906569
[919]	training's rmse: 0.837712	valid_1's rmse: 0.906523
[920]	training's rmse: 0.837691	valid_1's rmse: 0.906517
[921]	training's rmse: 0.837621	valid_1's rmse: 0.906544
[922]	training's rmse: 0.837578	valid_1's rmse: 0.906532
[923]	training's rmse: 0.837533	valid_1's rmse: 0.906506
[924]	training's rmse: 0.837491	valid_1's rmse: 0.906515
[925]	training's rmse: 0.83747	valid_1's rmse: 0.906504
[926]	training's rmse: 0.837418	valid_1's rmse: 0.90646
[927]	training's rmse: 0.837398	valid_1's rmse: 0.906459
[928]	training's rmse: 0.837374	valid_1's rmse: 0.906458
[929]	training's rmse: 0.837347	valid_1's rmse: 0.906453
[930]	training's rmse: 0.837321	valid_1's rmse: 0.906454
[931]	training's rmse: 0.837281	valid_1's rmse: 0.906469
[932]	training's rmse: 0.837251	valid_1's rmse: 0.906467
[933]	training's rmse: 0.837179	valid_1's rmse: 0.906424
[934]	training's rmse: 0.83716	valid_1's rmse: 0.906422
[935]	training's rmse: 0.837118	va

[1016]	training's rmse: 0.834081	valid_1's rmse: 0.906483
[1017]	training's rmse: 0.834059	valid_1's rmse: 0.906461
[1018]	training's rmse: 0.834022	valid_1's rmse: 0.906466
[1019]	training's rmse: 0.834	valid_1's rmse: 0.906461
[1020]	training's rmse: 0.83397	valid_1's rmse: 0.906457
[1021]	training's rmse: 0.833942	valid_1's rmse: 0.906478
[1022]	training's rmse: 0.833912	valid_1's rmse: 0.90645
[1023]	training's rmse: 0.833893	valid_1's rmse: 0.906444
[1024]	training's rmse: 0.833853	valid_1's rmse: 0.906421
[1025]	training's rmse: 0.833834	valid_1's rmse: 0.906372
[1026]	training's rmse: 0.833816	valid_1's rmse: 0.906372
[1027]	training's rmse: 0.83379	valid_1's rmse: 0.90635
[1028]	training's rmse: 0.833763	valid_1's rmse: 0.906351
[1029]	training's rmse: 0.833713	valid_1's rmse: 0.906347
[1030]	training's rmse: 0.833673	valid_1's rmse: 0.906325
[1031]	training's rmse: 0.833637	valid_1's rmse: 0.906312
[1032]	training's rmse: 0.833611	valid_1's rmse: 0.906328
[1033]	training's rms

[1114]	training's rmse: 0.830926	valid_1's rmse: 0.906088
[1115]	training's rmse: 0.83091	valid_1's rmse: 0.906069
[1116]	training's rmse: 0.830874	valid_1's rmse: 0.906073
[1117]	training's rmse: 0.830856	valid_1's rmse: 0.906076
[1118]	training's rmse: 0.830825	valid_1's rmse: 0.906091
[1119]	training's rmse: 0.830801	valid_1's rmse: 0.906092
[1120]	training's rmse: 0.83077	valid_1's rmse: 0.906086
[1121]	training's rmse: 0.830753	valid_1's rmse: 0.906095
[1122]	training's rmse: 0.830737	valid_1's rmse: 0.906098
[1123]	training's rmse: 0.83068	valid_1's rmse: 0.906097
[1124]	training's rmse: 0.83066	valid_1's rmse: 0.906098
[1125]	training's rmse: 0.830636	valid_1's rmse: 0.906134
[1126]	training's rmse: 0.830595	valid_1's rmse: 0.906178
[1127]	training's rmse: 0.830571	valid_1's rmse: 0.906173
[1128]	training's rmse: 0.830548	valid_1's rmse: 0.90617
[1129]	training's rmse: 0.830529	valid_1's rmse: 0.906169
[1130]	training's rmse: 0.8305	valid_1's rmse: 0.906157
[1131]	training's rms

[1200]	training's rmse: 0.828532	valid_1's rmse: 0.906284
[1201]	training's rmse: 0.828516	valid_1's rmse: 0.906265
[1202]	training's rmse: 0.8285	valid_1's rmse: 0.906274
[1203]	training's rmse: 0.828477	valid_1's rmse: 0.906283
[1204]	training's rmse: 0.828462	valid_1's rmse: 0.906277
[1205]	training's rmse: 0.828449	valid_1's rmse: 0.906274
[1206]	training's rmse: 0.828418	valid_1's rmse: 0.906284
[1207]	training's rmse: 0.828405	valid_1's rmse: 0.906272
[1208]	training's rmse: 0.82838	valid_1's rmse: 0.906276
[1209]	training's rmse: 0.828332	valid_1's rmse: 0.906286
[1210]	training's rmse: 0.828313	valid_1's rmse: 0.90631
[1211]	training's rmse: 0.828292	valid_1's rmse: 0.906302
[1212]	training's rmse: 0.82827	valid_1's rmse: 0.906318
[1213]	training's rmse: 0.828257	valid_1's rmse: 0.906317
[1214]	training's rmse: 0.828243	valid_1's rmse: 0.90632
[1215]	training's rmse: 0.828224	valid_1's rmse: 0.906322
[1216]	training's rmse: 0.828202	valid_1's rmse: 0.906351
[1217]	training's rm

[1280]	training's rmse: 0.826512	valid_1's rmse: 0.906085
[1281]	training's rmse: 0.826466	valid_1's rmse: 0.906091
[1282]	training's rmse: 0.826435	valid_1's rmse: 0.906077
[1283]	training's rmse: 0.826407	valid_1's rmse: 0.906071
[1284]	training's rmse: 0.826387	valid_1's rmse: 0.90606
[1285]	training's rmse: 0.826364	valid_1's rmse: 0.906073
[1286]	training's rmse: 0.826331	valid_1's rmse: 0.906065
[1287]	training's rmse: 0.826325	valid_1's rmse: 0.906064
[1288]	training's rmse: 0.826316	valid_1's rmse: 0.906063
[1289]	training's rmse: 0.826304	valid_1's rmse: 0.906066
[1290]	training's rmse: 0.826288	valid_1's rmse: 0.90607
[1291]	training's rmse: 0.826281	valid_1's rmse: 0.906062
[1292]	training's rmse: 0.826262	valid_1's rmse: 0.906056
[1293]	training's rmse: 0.826234	valid_1's rmse: 0.906035
[1294]	training's rmse: 0.826198	valid_1's rmse: 0.906049
[1295]	training's rmse: 0.826158	valid_1's rmse: 0.906052
[1296]	training's rmse: 0.826144	valid_1's rmse: 0.906055
[1297]	training'

[1364]	training's rmse: 0.824543	valid_1's rmse: 0.906325
[1365]	training's rmse: 0.824519	valid_1's rmse: 0.90631
[1366]	training's rmse: 0.824486	valid_1's rmse: 0.906321
[1367]	training's rmse: 0.824459	valid_1's rmse: 0.90632
[1368]	training's rmse: 0.824442	valid_1's rmse: 0.906313
[1369]	training's rmse: 0.824426	valid_1's rmse: 0.906313
[1370]	training's rmse: 0.824419	valid_1's rmse: 0.906308
[1371]	training's rmse: 0.824395	valid_1's rmse: 0.906293
[1372]	training's rmse: 0.82438	valid_1's rmse: 0.9063
[1373]	training's rmse: 0.824365	valid_1's rmse: 0.90631
[1374]	training's rmse: 0.824344	valid_1's rmse: 0.906302
[1375]	training's rmse: 0.824321	valid_1's rmse: 0.906293
[1376]	training's rmse: 0.824311	valid_1's rmse: 0.906282
[1377]	training's rmse: 0.824295	valid_1's rmse: 0.906289
[1378]	training's rmse: 0.824275	valid_1's rmse: 0.90629
[1379]	training's rmse: 0.824267	valid_1's rmse: 0.906284
[1380]	training's rmse: 0.824246	valid_1's rmse: 0.906284
[1381]	training's rms

[1448]	training's rmse: 0.822771	valid_1's rmse: 0.906642
[1449]	training's rmse: 0.822755	valid_1's rmse: 0.906633
[1450]	training's rmse: 0.822737	valid_1's rmse: 0.906622
[1451]	training's rmse: 0.822705	valid_1's rmse: 0.906618
[1452]	training's rmse: 0.822697	valid_1's rmse: 0.906617
[1453]	training's rmse: 0.822665	valid_1's rmse: 0.906627
[1454]	training's rmse: 0.822629	valid_1's rmse: 0.906646
[1455]	training's rmse: 0.822607	valid_1's rmse: 0.906642
[1456]	training's rmse: 0.82258	valid_1's rmse: 0.906659
[1457]	training's rmse: 0.822553	valid_1's rmse: 0.906679
[1458]	training's rmse: 0.822433	valid_1's rmse: 0.90684
[1459]	training's rmse: 0.822421	valid_1's rmse: 0.906838
[1460]	training's rmse: 0.822394	valid_1's rmse: 0.906843
[1461]	training's rmse: 0.822371	valid_1's rmse: 0.906846
[1462]	training's rmse: 0.822359	valid_1's rmse: 0.906845
[1463]	training's rmse: 0.822334	valid_1's rmse: 0.906848
[1464]	training's rmse: 0.822321	valid_1's rmse: 0.906844
[1465]	training'

[1539]	training's rmse: 0.820678	valid_1's rmse: 0.906942
[1540]	training's rmse: 0.820663	valid_1's rmse: 0.906935
[1541]	training's rmse: 0.820632	valid_1's rmse: 0.906923
[1542]	training's rmse: 0.820607	valid_1's rmse: 0.906921
[1543]	training's rmse: 0.820572	valid_1's rmse: 0.906911
[1544]	training's rmse: 0.820559	valid_1's rmse: 0.906921
[1545]	training's rmse: 0.820518	valid_1's rmse: 0.906911
[1546]	training's rmse: 0.820499	valid_1's rmse: 0.906918
[1547]	training's rmse: 0.820485	valid_1's rmse: 0.906916
[1548]	training's rmse: 0.820456	valid_1's rmse: 0.906913
[1549]	training's rmse: 0.820441	valid_1's rmse: 0.906895
[1550]	training's rmse: 0.820433	valid_1's rmse: 0.9069
[1551]	training's rmse: 0.820412	valid_1's rmse: 0.906923
[1552]	training's rmse: 0.820396	valid_1's rmse: 0.906922
[1553]	training's rmse: 0.820349	valid_1's rmse: 0.906923
[1554]	training's rmse: 0.820335	valid_1's rmse: 0.90691
[1555]	training's rmse: 0.820328	valid_1's rmse: 0.906909
[1556]	training's

[1622]	training's rmse: 0.819	valid_1's rmse: 0.907119
[1623]	training's rmse: 0.818976	valid_1's rmse: 0.90712
[1624]	training's rmse: 0.818951	valid_1's rmse: 0.907134
[1625]	training's rmse: 0.818932	valid_1's rmse: 0.90714
[1626]	training's rmse: 0.818923	valid_1's rmse: 0.907138
[1627]	training's rmse: 0.818893	valid_1's rmse: 0.90713
[1628]	training's rmse: 0.818871	valid_1's rmse: 0.907127
[1629]	training's rmse: 0.818863	valid_1's rmse: 0.90712
[1630]	training's rmse: 0.818832	valid_1's rmse: 0.90712
[1631]	training's rmse: 0.818815	valid_1's rmse: 0.907134
[1632]	training's rmse: 0.818804	valid_1's rmse: 0.907128
[1633]	training's rmse: 0.818776	valid_1's rmse: 0.907125
[1634]	training's rmse: 0.818754	valid_1's rmse: 0.907124
[1635]	training's rmse: 0.818744	valid_1's rmse: 0.907114
[1636]	training's rmse: 0.818722	valid_1's rmse: 0.907111
[1637]	training's rmse: 0.818686	valid_1's rmse: 0.907109
[1638]	training's rmse: 0.818651	valid_1's rmse: 0.907097
[1639]	training's rmse

[1705]	training's rmse: 0.817337	valid_1's rmse: 0.906918
[1706]	training's rmse: 0.817307	valid_1's rmse: 0.906909
[1707]	training's rmse: 0.817294	valid_1's rmse: 0.906901
[1708]	training's rmse: 0.817285	valid_1's rmse: 0.906921
[1709]	training's rmse: 0.817275	valid_1's rmse: 0.906904
[1710]	training's rmse: 0.817264	valid_1's rmse: 0.906895
[1711]	training's rmse: 0.817253	valid_1's rmse: 0.906889
[1712]	training's rmse: 0.817238	valid_1's rmse: 0.906884
[1713]	training's rmse: 0.817227	valid_1's rmse: 0.906885
[1714]	training's rmse: 0.817186	valid_1's rmse: 0.906878
[1715]	training's rmse: 0.817176	valid_1's rmse: 0.906879
[1716]	training's rmse: 0.817164	valid_1's rmse: 0.906885
[1717]	training's rmse: 0.817149	valid_1's rmse: 0.906893
[1718]	training's rmse: 0.817121	valid_1's rmse: 0.906899
[1719]	training's rmse: 0.817114	valid_1's rmse: 0.906899
[1720]	training's rmse: 0.817079	valid_1's rmse: 0.906922
[1721]	training's rmse: 0.817068	valid_1's rmse: 0.906934
[1722]	trainin

[1785]	training's rmse: 0.815936	valid_1's rmse: 0.906894
[1786]	training's rmse: 0.815911	valid_1's rmse: 0.906902
[1787]	training's rmse: 0.81589	valid_1's rmse: 0.906881
[1788]	training's rmse: 0.815876	valid_1's rmse: 0.906876
[1789]	training's rmse: 0.815855	valid_1's rmse: 0.906888
[1790]	training's rmse: 0.815833	valid_1's rmse: 0.906895
[1791]	training's rmse: 0.815811	valid_1's rmse: 0.90689
[1792]	training's rmse: 0.815801	valid_1's rmse: 0.906936
[1793]	training's rmse: 0.815773	valid_1's rmse: 0.906929
[1794]	training's rmse: 0.815747	valid_1's rmse: 0.906922
[1795]	training's rmse: 0.815731	valid_1's rmse: 0.906921
[1796]	training's rmse: 0.815712	valid_1's rmse: 0.906927
[1797]	training's rmse: 0.815674	valid_1's rmse: 0.90692
[1798]	training's rmse: 0.815643	valid_1's rmse: 0.906927
[1799]	training's rmse: 0.815631	valid_1's rmse: 0.906927
[1800]	training's rmse: 0.815617	valid_1's rmse: 0.906933
[1801]	training's rmse: 0.81559	valid_1's rmse: 0.906941
[1802]	training's 

[1864]	training's rmse: 0.814531	valid_1's rmse: 0.906794
[1865]	training's rmse: 0.814515	valid_1's rmse: 0.906781
[1866]	training's rmse: 0.814489	valid_1's rmse: 0.906773
[1867]	training's rmse: 0.814477	valid_1's rmse: 0.906775
[1868]	training's rmse: 0.81445	valid_1's rmse: 0.90676
[1869]	training's rmse: 0.814432	valid_1's rmse: 0.906767
[1870]	training's rmse: 0.814422	valid_1's rmse: 0.906765
[1871]	training's rmse: 0.814399	valid_1's rmse: 0.906755
[1872]	training's rmse: 0.814376	valid_1's rmse: 0.906755
[1873]	training's rmse: 0.814285	valid_1's rmse: 0.906723
[1874]	training's rmse: 0.814268	valid_1's rmse: 0.90672
[1875]	training's rmse: 0.814243	valid_1's rmse: 0.906739
[1876]	training's rmse: 0.814236	valid_1's rmse: 0.906744
[1877]	training's rmse: 0.81421	valid_1's rmse: 0.906758
[1878]	training's rmse: 0.814191	valid_1's rmse: 0.906777
[1879]	training's rmse: 0.814178	valid_1's rmse: 0.906769
[1880]	training's rmse: 0.814172	valid_1's rmse: 0.906769
[1881]	training's 

[1940]	training's rmse: 0.813193	valid_1's rmse: 0.906668
[1941]	training's rmse: 0.813173	valid_1's rmse: 0.906662
[1942]	training's rmse: 0.81316	valid_1's rmse: 0.906651
[1943]	training's rmse: 0.813152	valid_1's rmse: 0.906663
[1944]	training's rmse: 0.813138	valid_1's rmse: 0.906658
[1945]	training's rmse: 0.813117	valid_1's rmse: 0.906691
[1946]	training's rmse: 0.813103	valid_1's rmse: 0.906698
[1947]	training's rmse: 0.813085	valid_1's rmse: 0.906697
[1948]	training's rmse: 0.813062	valid_1's rmse: 0.906699
[1949]	training's rmse: 0.813034	valid_1's rmse: 0.9067
[1950]	training's rmse: 0.813017	valid_1's rmse: 0.9067
[1951]	training's rmse: 0.813007	valid_1's rmse: 0.906698
[1952]	training's rmse: 0.812986	valid_1's rmse: 0.906697
[1953]	training's rmse: 0.812976	valid_1's rmse: 0.906705
[1954]	training's rmse: 0.812971	valid_1's rmse: 0.9067
[1955]	training's rmse: 0.812965	valid_1's rmse: 0.906702
[1956]	training's rmse: 0.81296	valid_1's rmse: 0.906703
[1957]	training's rmse

[2011]	training's rmse: 0.812221	valid_1's rmse: 0.906652
[2012]	training's rmse: 0.812206	valid_1's rmse: 0.906672
[2013]	training's rmse: 0.81218	valid_1's rmse: 0.906647
[2014]	training's rmse: 0.812165	valid_1's rmse: 0.906641
[2015]	training's rmse: 0.812156	valid_1's rmse: 0.906629
[2016]	training's rmse: 0.812147	valid_1's rmse: 0.906625
[2017]	training's rmse: 0.81213	valid_1's rmse: 0.906633
[2018]	training's rmse: 0.812096	valid_1's rmse: 0.906634
[2019]	training's rmse: 0.812065	valid_1's rmse: 0.906619
[2020]	training's rmse: 0.812053	valid_1's rmse: 0.906625
[2021]	training's rmse: 0.812039	valid_1's rmse: 0.906623
[2022]	training's rmse: 0.812031	valid_1's rmse: 0.906622
[2023]	training's rmse: 0.812023	valid_1's rmse: 0.906622
[2024]	training's rmse: 0.812007	valid_1's rmse: 0.906639
[2025]	training's rmse: 0.811992	valid_1's rmse: 0.906642
[2026]	training's rmse: 0.811978	valid_1's rmse: 0.906647
[2027]	training's rmse: 0.811962	valid_1's rmse: 0.906651
[2028]	training'

[2084]	training's rmse: 0.811151	valid_1's rmse: 0.906594
[2085]	training's rmse: 0.811133	valid_1's rmse: 0.906594
[2086]	training's rmse: 0.81111	valid_1's rmse: 0.906593
[2087]	training's rmse: 0.811079	valid_1's rmse: 0.906604
[2088]	training's rmse: 0.811061	valid_1's rmse: 0.906604
[2089]	training's rmse: 0.811051	valid_1's rmse: 0.906609
[2090]	training's rmse: 0.811031	valid_1's rmse: 0.906599
[2091]	training's rmse: 0.811022	valid_1's rmse: 0.906596
[2092]	training's rmse: 0.811001	valid_1's rmse: 0.906596
[2093]	training's rmse: 0.810994	valid_1's rmse: 0.906601
[2094]	training's rmse: 0.810987	valid_1's rmse: 0.906587
[2095]	training's rmse: 0.810978	valid_1's rmse: 0.906583
[2096]	training's rmse: 0.810972	valid_1's rmse: 0.906585
[2097]	training's rmse: 0.810947	valid_1's rmse: 0.90658
[2098]	training's rmse: 0.810838	valid_1's rmse: 0.906733
[2099]	training's rmse: 0.810826	valid_1's rmse: 0.906724
[2100]	training's rmse: 0.810819	valid_1's rmse: 0.906726
[2101]	training'

[2154]	training's rmse: 0.810055	valid_1's rmse: 0.906669
[2155]	training's rmse: 0.810036	valid_1's rmse: 0.906678
[2156]	training's rmse: 0.81002	valid_1's rmse: 0.906662
[2157]	training's rmse: 0.81	valid_1's rmse: 0.90663
[2158]	training's rmse: 0.809981	valid_1's rmse: 0.90663
[2159]	training's rmse: 0.80997	valid_1's rmse: 0.906642
[2160]	training's rmse: 0.809947	valid_1's rmse: 0.906659
[2161]	training's rmse: 0.809937	valid_1's rmse: 0.906665
[2162]	training's rmse: 0.809923	valid_1's rmse: 0.906661
[2163]	training's rmse: 0.80991	valid_1's rmse: 0.906677
[2164]	training's rmse: 0.80989	valid_1's rmse: 0.906674
[2165]	training's rmse: 0.80986	valid_1's rmse: 0.906669
[2166]	training's rmse: 0.809852	valid_1's rmse: 0.906663
[2167]	training's rmse: 0.809845	valid_1's rmse: 0.90666
[2168]	training's rmse: 0.80983	valid_1's rmse: 0.906658
[2169]	training's rmse: 0.809783	valid_1's rmse: 0.906688
[2170]	training's rmse: 0.809771	valid_1's rmse: 0.90669
[2171]	training's rmse: 0.80

[2232]	training's rmse: 0.808867	valid_1's rmse: 0.90665
[2233]	training's rmse: 0.808856	valid_1's rmse: 0.906652
[2234]	training's rmse: 0.808846	valid_1's rmse: 0.906654
[2235]	training's rmse: 0.808828	valid_1's rmse: 0.906655
[2236]	training's rmse: 0.808821	valid_1's rmse: 0.906653
[2237]	training's rmse: 0.808806	valid_1's rmse: 0.906651
[2238]	training's rmse: 0.808802	valid_1's rmse: 0.906652
[2239]	training's rmse: 0.808722	valid_1's rmse: 0.906685
[2240]	training's rmse: 0.80868	valid_1's rmse: 0.906685
[2241]	training's rmse: 0.808667	valid_1's rmse: 0.906674
[2242]	training's rmse: 0.808649	valid_1's rmse: 0.906676
[2243]	training's rmse: 0.808638	valid_1's rmse: 0.906671
[2244]	training's rmse: 0.80863	valid_1's rmse: 0.906668
[2245]	training's rmse: 0.808621	valid_1's rmse: 0.906667
[2246]	training's rmse: 0.808594	valid_1's rmse: 0.906653
[2247]	training's rmse: 0.808579	valid_1's rmse: 0.90664
[2248]	training's rmse: 0.808564	valid_1's rmse: 0.906647
[2249]	training's 

[2309]	training's rmse: 0.807658	valid_1's rmse: 0.906231
[2310]	training's rmse: 0.807647	valid_1's rmse: 0.906231
[2311]	training's rmse: 0.807632	valid_1's rmse: 0.906226
[2312]	training's rmse: 0.807598	valid_1's rmse: 0.906198
[2313]	training's rmse: 0.807582	valid_1's rmse: 0.906209
[2314]	training's rmse: 0.807565	valid_1's rmse: 0.906211
[2315]	training's rmse: 0.807539	valid_1's rmse: 0.906202
[2316]	training's rmse: 0.80749	valid_1's rmse: 0.906218
[2317]	training's rmse: 0.807479	valid_1's rmse: 0.906221
[2318]	training's rmse: 0.807468	valid_1's rmse: 0.906219
[2319]	training's rmse: 0.807445	valid_1's rmse: 0.906223
[2320]	training's rmse: 0.807429	valid_1's rmse: 0.906206
[2321]	training's rmse: 0.807419	valid_1's rmse: 0.906204
[2322]	training's rmse: 0.807403	valid_1's rmse: 0.906196
[2323]	training's rmse: 0.807394	valid_1's rmse: 0.906187
[2324]	training's rmse: 0.807386	valid_1's rmse: 0.906189
[2325]	training's rmse: 0.80738	valid_1's rmse: 0.906194
[2326]	training'

[2388]	training's rmse: 0.806497	valid_1's rmse: 0.906023
[2389]	training's rmse: 0.80647	valid_1's rmse: 0.906059
[2390]	training's rmse: 0.806459	valid_1's rmse: 0.906054
[2391]	training's rmse: 0.806451	valid_1's rmse: 0.906051
[2392]	training's rmse: 0.806446	valid_1's rmse: 0.90603
[2393]	training's rmse: 0.806439	valid_1's rmse: 0.906061
[2394]	training's rmse: 0.806426	valid_1's rmse: 0.906079
[2395]	training's rmse: 0.806417	valid_1's rmse: 0.906074
[2396]	training's rmse: 0.806401	valid_1's rmse: 0.906073
[2397]	training's rmse: 0.806377	valid_1's rmse: 0.906069
[2398]	training's rmse: 0.806344	valid_1's rmse: 0.906066
[2399]	training's rmse: 0.806335	valid_1's rmse: 0.906071
[2400]	training's rmse: 0.80633	valid_1's rmse: 0.906069
[2401]	training's rmse: 0.806316	valid_1's rmse: 0.906074
[2402]	training's rmse: 0.806269	valid_1's rmse: 0.906065
[2403]	training's rmse: 0.806259	valid_1's rmse: 0.906064
[2404]	training's rmse: 0.806254	valid_1's rmse: 0.906064
[2405]	training's

[2464]	training's rmse: 0.805395	valid_1's rmse: 0.905911
[2465]	training's rmse: 0.805385	valid_1's rmse: 0.905913
[2466]	training's rmse: 0.805368	valid_1's rmse: 0.905935
[2467]	training's rmse: 0.805352	valid_1's rmse: 0.905945
[2468]	training's rmse: 0.805343	valid_1's rmse: 0.905939
[2469]	training's rmse: 0.805332	valid_1's rmse: 0.905937
[2470]	training's rmse: 0.805323	valid_1's rmse: 0.905951
[2471]	training's rmse: 0.805281	valid_1's rmse: 0.90595
[2472]	training's rmse: 0.805224	valid_1's rmse: 0.905916
[2473]	training's rmse: 0.805218	valid_1's rmse: 0.905914
[2474]	training's rmse: 0.805207	valid_1's rmse: 0.9059
[2475]	training's rmse: 0.805204	valid_1's rmse: 0.9059
[2476]	training's rmse: 0.805194	valid_1's rmse: 0.905894
[2477]	training's rmse: 0.805184	valid_1's rmse: 0.905922
[2478]	training's rmse: 0.80517	valid_1's rmse: 0.905917
[2479]	training's rmse: 0.805154	valid_1's rmse: 0.905918
[2480]	training's rmse: 0.805146	valid_1's rmse: 0.905895
[2481]	training's rm

[2536]	training's rmse: 0.804446	valid_1's rmse: 0.905832
[2537]	training's rmse: 0.804436	valid_1's rmse: 0.90583
[2538]	training's rmse: 0.804429	valid_1's rmse: 0.905825
[2539]	training's rmse: 0.804413	valid_1's rmse: 0.905826
[2540]	training's rmse: 0.804408	valid_1's rmse: 0.905825
[2541]	training's rmse: 0.804397	valid_1's rmse: 0.905822
[2542]	training's rmse: 0.80439	valid_1's rmse: 0.905818
[2543]	training's rmse: 0.804379	valid_1's rmse: 0.905826
[2544]	training's rmse: 0.804375	valid_1's rmse: 0.905828
[2545]	training's rmse: 0.804366	valid_1's rmse: 0.905818
[2546]	training's rmse: 0.804332	valid_1's rmse: 0.905717
[2547]	training's rmse: 0.804328	valid_1's rmse: 0.905718
[2548]	training's rmse: 0.804321	valid_1's rmse: 0.905717
[2549]	training's rmse: 0.804306	valid_1's rmse: 0.905712
[2550]	training's rmse: 0.804293	valid_1's rmse: 0.905669
[2551]	training's rmse: 0.804281	valid_1's rmse: 0.905668
[2552]	training's rmse: 0.804273	valid_1's rmse: 0.905659
[2553]	training'

[2605]	training's rmse: 0.803595	valid_1's rmse: 0.905693
[2606]	training's rmse: 0.803587	valid_1's rmse: 0.905691
[2607]	training's rmse: 0.80358	valid_1's rmse: 0.905689
[2608]	training's rmse: 0.803576	valid_1's rmse: 0.905692
[2609]	training's rmse: 0.803567	valid_1's rmse: 0.905692
[2610]	training's rmse: 0.803553	valid_1's rmse: 0.905687
[2611]	training's rmse: 0.803543	valid_1's rmse: 0.905686
[2612]	training's rmse: 0.803538	valid_1's rmse: 0.905685
[2613]	training's rmse: 0.803529	valid_1's rmse: 0.905696
[2614]	training's rmse: 0.803518	valid_1's rmse: 0.905701
[2615]	training's rmse: 0.803507	valid_1's rmse: 0.905712
[2616]	training's rmse: 0.80348	valid_1's rmse: 0.905677
[2617]	training's rmse: 0.803467	valid_1's rmse: 0.905678
[2618]	training's rmse: 0.80346	valid_1's rmse: 0.90568
[2619]	training's rmse: 0.803457	valid_1's rmse: 0.905682
[2620]	training's rmse: 0.803439	valid_1's rmse: 0.905673
[2621]	training's rmse: 0.803425	valid_1's rmse: 0.905686
[2622]	training's 

In [139]:
valid_prediction = lgb_model.predict(X_valid).clip(0,20)

In [140]:
rmse_valid = np.sqrt(mean_squared_error(valid_prediction, label_valid))
rmse_valid

0.8986837882141382

In [141]:
pred_33_df = pd.DataFrame({'item_cnt_month': valid_prediction})
# 对sub数据进行变换
def myfun(val):
    return round(val, 0)
pred_33_df['item_cnt_month'] = pred_33_df['item_cnt_month'].apply(myfun)

In [142]:
rmse_valid = np.sqrt(mean_squared_error(pred_33_df['item_cnt_month'], label_valid))
rmse_valid

0.914353793551493

In [143]:
# 训练33个月数据
trainData = matrix[matrix['date_block_num'] < 34]
label_train = trainData['item_cnt_month']
X_train = trainData.drop('item_cnt_month', axis=1)

train_data = lgb.Dataset(data=X_train, label=label_train)
params = {
    'objective': 'regression',  # 回归
    'metric': 'rmse',   # 回归问题选择rmse
    'n_estimators': 400,
    'max_depth': 10,
    'num_leaves': 400,   # 每个弱学习器拥有的叶子的数量
    'learning_rate': 0.01,
    'bagging_fraction': 0.9,    # 每次训练“弱学习器”用的数据比例（应该也是随机的），用于加快训练速度和减小过拟合
    'feature_fraction': 0.3,   # 每次迭代过程中，随机选择30%的特征建树（弱学习器）
    'bagging_seed': 0,
    'early_stop_rounds': 50
}
lgb_model = lgb.train(params, train_data, valid_sets=[train_data])



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12627
[LightGBM] [Info] Number of data points in the train set: 6425094, number of used features: 73
[LightGBM] [Info] Start training from score 0.287729
[1]	training's rmse: 1.18113
[2]	training's rmse: 1.17602
[3]	training's rmse: 1.17077
[4]	training's rmse: 1.16592
[5]	training's rmse: 1.16092
[6]	training's rmse: 1.15622
[7]	training's rmse: 1.15128
[8]	training's rmse: 1.14652
[9]	training's rmse: 1.14165
[10]	training's rmse: 1.13707
[11]	training's rmse: 1.13277
[12]	training's rmse: 1.12832
[13]	training's rmse: 1.12401
[14]	training's rmse: 1.11985
[15]	training's rmse: 1.11573
[16]	training's rmse: 1.11259
[17]	training's rmse: 1.10928
[18]	training's rmse: 1.10549
[19]	training's rmse: 1.1016
[20]	training's rmse: 1.09744
[21]	training's rmse: 1.09414
[22]	training's rmse: 1.09024
[23]	training's rmse: 1.08687
[24]	training's

[247]	training's rmse: 0.814577
[248]	training's rmse: 0.814312
[249]	training's rmse: 0.814009
[250]	training's rmse: 0.813757
[251]	training's rmse: 0.813546
[252]	training's rmse: 0.813333
[253]	training's rmse: 0.813041
[254]	training's rmse: 0.812772
[255]	training's rmse: 0.812545
[256]	training's rmse: 0.812265
[257]	training's rmse: 0.812044
[258]	training's rmse: 0.811576
[259]	training's rmse: 0.811284
[260]	training's rmse: 0.810965
[261]	training's rmse: 0.810565
[262]	training's rmse: 0.8103
[263]	training's rmse: 0.809949
[264]	training's rmse: 0.809749
[265]	training's rmse: 0.809455
[266]	training's rmse: 0.80916
[267]	training's rmse: 0.808934
[268]	training's rmse: 0.808656
[269]	training's rmse: 0.808324
[270]	training's rmse: 0.807968
[271]	training's rmse: 0.807654
[272]	training's rmse: 0.807468
[273]	training's rmse: 0.807282
[274]	training's rmse: 0.806939
[275]	training's rmse: 0.806729
[276]	training's rmse: 0.806516
[277]	training's rmse: 0.80621
[278]	traini

In [145]:
# test数据
testData = matrix[matrix['date_block_num'] == 34]
X_test = testData.drop('item_cnt_month', axis=1)

# 预测&生成文件
y_test = lgb_model.predict(X_test).clip(0, 20)
submission = pd.DataFrame({ 'ID': range(0, 214200), 'item_cnt_month': y_test})

test0 = test[test.item_id.isin(six_zero_item_id)]
ids = list(test0.ID.values)
submission.loc[submission.ID.isin(ids), 'item_cnt_month'] = 0.0
submission.to_csv('./submit/sub3.csv', index=False)