In [33]:
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from calendar import monthrange

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# Data Processing

In [36]:
sales = pd.read_csv("data/source/sales_train.csv")

In [38]:
sales["date"] = sales["date"].apply(lambda x: datetime.strptime(x, "%d.%m.%Y"))
sales["item_price"] = (sales["item_price"] * 100).apply(lambda x: round(x, 0)).astype("Int32")
display(sales.head(5))
sales.shape

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day
0,2013-01-02,0,59,22154,99900,1.0
1,2013-01-03,0,25,2552,89900,1.0
2,2013-01-05,0,25,2552,89900,-1.0
3,2013-01-06,0,25,2554,170905,1.0
4,2013-01-15,0,25,2555,109900,1.0


(2935849, 6)

In [39]:
items = pd.read_csv("data/source/items.csv")
cats = pd.read_csv("data/source/item_categories.csv")

In [40]:
cats["item_category_1"] = cats.item_category_name.apply(lambda x: (x.split(" - ")[0]).split("(")[0])
cats["item_category_2"] = cats.item_category_name.apply(
    lambda x: x.split(" - ")[1] if " - " in x else "None"
)

cats.sort_values(by="item_category_2")

Unnamed: 0,item_category_name,item_category_id,item_category_1,item_category_2
73,Программы - 1С:Предприятие 8,73,Программы,1С:Предприятие 8
37,Кино - Blu-Ray,37,Кино,Blu-Ray
38,Кино - Blu-Ray 3D,38,Кино,Blu-Ray 3D
39,Кино - Blu-Ray 4K,39,Кино,Blu-Ray 4K
55,Музыка - CD локального производства,55,Музыка,CD локального производства
56,Музыка - CD фирменного производства,56,Музыка,CD фирменного производства
40,Кино - DVD,40,Кино,DVD
33,Карты оплаты - Live!,33,Карты оплаты,Live!
34,Карты оплаты - Live! (Цифра),34,Карты оплаты,Live! (Цифра)
74,Программы - MAC (Цифра),74,Программы,MAC (Цифра)


In [41]:
items = items.join(cats.set_index("item_category_id"), how="left", on="item_category_id")

items

Unnamed: 0,item_name,item_id,item_category_id,item_category_name,item_category_1,item_category_2
0,! ВО ВЛАСТИ НАВАЖДЕНИЯ (ПЛАСТ.) D,0,40,Кино - DVD,Кино,DVD
1,!ABBYY FineReader 12 Professional Edition Full...,1,76,Программы - Для дома и офиса (Цифра),Программы,Для дома и офиса (Цифра)
2,***В ЛУЧАХ СЛАВЫ (UNV) D,2,40,Кино - DVD,Кино,DVD
3,***ГОЛУБАЯ ВОЛНА (Univ) D,3,40,Кино - DVD,Кино,DVD
4,***КОРОБКА (СТЕКЛО) D,4,40,Кино - DVD,Кино,DVD
...,...,...,...,...,...,...
22165,"Ядерный титбит 2 [PC, Цифровая версия]",22165,31,Игры PC - Цифра,Игры PC,Цифра
22166,Язык запросов 1С:Предприятия [Цифровая версия],22166,54,Книги - Цифра,Книги,Цифра
22167,Язык запросов 1С:Предприятия 8 (+CD). Хрустале...,22167,49,Книги - Методические материалы 1С,Книги,Методические материалы 1С
22168,Яйцо для Little Inu,22168,62,"Подарки - Гаджеты, роботы, спорт",Подарки,"Гаджеты, роботы, спорт"


In [42]:
shops = pd.read_csv("data/source/shops.csv")

shops.head(5)

Unnamed: 0,shop_name,shop_id
0,"!Якутск Орджоникидзе, 56 фран",0
1,"!Якутск ТЦ ""Центральный"" фран",1
2,"Адыгея ТЦ ""Мега""",2
3,"Балашиха ТРК ""Октябрь-Киномир""",3
4,"Волжский ТЦ ""Волга Молл""",4


In [43]:
# based on https://www.kaggle.com/ahmedmurad1990/predict-future-sales

shops.loc[shops.shop_name == 'Сергиев Посад ТЦ "7Я"',"shop_name"] = 'СергиевПосад ТЦ "7Я"'
shops["shop_city"] = shops.shop_name.str.split(" ").map( lambda x: x[0] )
shops["shop_category"] = shops.shop_name.str.split(" ").map( lambda x: x[1] )
shops.loc[shops.shop_city == "!Якутск", "shop_city"] = "Якутск"
shops.shop_category = shops.shop_category.apply(lambda x: x if x in ["ТК", "ТРК", "ТРЦ", "ТЦ"] else "OTHER")

shops.sort_values(by="shop_city")

Unnamed: 0,shop_name,shop_id,shop_city,shop_category
2,"Адыгея ТЦ ""Мега""",2,Адыгея,ТЦ
3,"Балашиха ТРК ""Октябрь-Киномир""",3,Балашиха,ТРК
4,"Волжский ТЦ ""Волга Молл""",4,Волжский,ТЦ
5,"Вологда ТРЦ ""Мармелад""",5,Вологда,ТРЦ
6,"Воронеж (Плехановская, 13)",6,Воронеж,OTHER
7,"Воронеж ТРЦ ""Максимир""",7,Воронеж,ТРЦ
8,"Воронеж ТРЦ Сити-Парк ""Град""",8,Воронеж,ТРЦ
9,Выездная Торговля,9,Выездная,OTHER
11,Жуковский ул. Чкалова 39м²,11,Жуковский,OTHER
10,Жуковский ул. Чкалова 39м?,10,Жуковский,OTHER


In [44]:
test = pd.read_csv("data/source/test.csv")
test

Unnamed: 0,ID,shop_id,item_id
0,0,5,5037
1,1,5,5320
2,2,5,5233
3,3,5,5232
4,4,5,5268
...,...,...,...
214195,214195,45,18454
214196,214196,45,16188
214197,214197,45,15757
214198,214198,45,19648


In [45]:
# we merge all shop_id, item_id combinations from train and test set
shop_item = sales[["shop_id", "item_id"]].drop_duplicates().join(
    test.set_index(["shop_id", "item_id"]), on=["shop_id", "item_id"], how="outer"
)[["shop_id", "item_id"]].reset_index(drop=True)


# add item and shop metadata
shop_item = shop_item.join(items.set_index("item_id"), how="left", on="item_id")
shop_item = shop_item.join(shops.set_index("shop_id"), on="shop_id", how="left")

# replace categories by codes and remove detailed names
for col in ["item_category_1", "item_category_2", "shop_city", "shop_category"]:
    shop_item[col] = shop_item[col].astype("category").cat.codes
    
for col in ["item_name", "item_category_name", "shop_name"]:
    del shop_item[col]

shop_item

Unnamed: 0,shop_id,item_id,item_category_id,item_category_1,item_category_2,shop_city,shop_category
0,59,22154,37,11,1,30,4
1,25,2552,58,13,29,13,2
2,25,2554,58,13,29,13,2
3,25,2555,56,13,5,13,2
4,25,2564,59,13,40,13,2
...,...,...,...,...,...,...,...
526915,45,9500,40,11,6,20,4
526916,45,16007,64,14,42,20,4
526917,45,18027,70,14,56,20,4
526918,45,16188,64,14,42,20,4


In [46]:
shop_item.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 526920 entries, 0 to 526919
Data columns (total 7 columns):
 #   Column            Non-Null Count   Dtype
---  ------            --------------   -----
 0   shop_id           526920 non-null  int64
 1   item_id           526920 non-null  int64
 2   item_category_id  526920 non-null  int64
 3   item_category_1   526920 non-null  int8 
 4   item_category_2   526920 non-null  int8 
 5   shop_city         526920 non-null  int8 
 6   shop_category     526920 non-null  int8 
dtypes: int64(3), int8(4)
memory usage: 14.1 MB


In [47]:
# cross join with all date block nums to create a merged dataset later
shop_item = shop_item.merge(pd.DataFrame({"date_block_num": range(34)}), how="cross")

#reduce some memory usage
print(shop_item.info())

for col in ["shop_id", "item_id", "item_category_id","date_block_num"]:
    shop_item[col] = shop_item[col].astype("Int16")

shop_item.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17915280 entries, 0 to 17915279
Data columns (total 8 columns):
 #   Column            Dtype
---  ------            -----
 0   shop_id           int64
 1   item_id           int64
 2   item_category_id  int64
 3   item_category_1   int8 
 4   item_category_2   int8 
 5   shop_city         int8 
 6   shop_category     int8 
 7   date_block_num    int64
dtypes: int64(4), int8(4)
memory usage: 751.8 MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 17915280 entries, 0 to 17915279
Data columns (total 8 columns):
 #   Column            Dtype
---  ------            -----
 0   shop_id           Int16
 1   item_id           Int16
 2   item_category_id  Int16
 3   item_category_1   int8 
 4   item_category_2   int8 
 5   shop_city         int8 
 6   shop_category     int8 
 7   date_block_num    Int16
dtypes: Int16(4), int8(4)
memory usage: 410.0 MB


In [65]:
sales = sales.sort_values(by=["shop_id", "item_id", "date"]).reset_index(drop=True)

price_change = sales.groupby(["shop_id", "item_id"]).item_price.rolling(window=2).apply(
    lambda x: x.iloc[1] - x.iloc[0]
)

sales["price_change_day"] = price_change.reset_index()["item_price"]

sales

In [89]:
sales.head(n=200)

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day,price_change_day
0,2013-02-15,1,0,30,26500,2.0,
1,2013-02-16,1,0,30,26500,9.0,0.0
2,2013-02-17,1,0,30,26500,4.0,0.0
3,2013-02-18,1,0,30,26500,4.0,0.0
4,2013-02-20,1,0,30,26500,2.0,0.0
5,2013-02-21,1,0,30,26500,2.0,0.0
6,2013-02-22,1,0,30,26500,2.0,0.0
7,2013-02-23,1,0,30,26500,3.0,0.0
8,2013-02-26,1,0,30,26500,3.0,0.0
9,2013-02-15,1,0,31,43400,3.0,


In [91]:
# aggregate sales by month
sales_monthly = sales.groupby(["date_block_num", "shop_id", "item_id"]).agg({
    "item_cnt_day": ["sum", "min", "max"],
    "date": ["min", "max", "count"],
    "item_price": ["mean", "max", "min"],
    "price_change_day": ["sum"]
}).reset_index()

# flatten column names an fill na
sales_monthly.columns = [c[0] if c[1] == "" else c[1] + "_" + c[0] for c in sales_monthly.columns]
sales_monthly = sales_monthly.fillna(0) # should only affect std of breakouts of count 1

# replace min and max dates by their day of month
sales_monthly["min_date"] = sales_monthly.min_date.apply(lambda x: x.day)
sales_monthly["max_date"] = sales_monthly.max_date.apply(lambda x: x.day)

sales_monthly = sales_monthly.rename({
    "sum_item_cnt_day": "item_cnt_month",
}, axis=1)

sales_monthly["mean_item_price"] = (sales_monthly["mean_item_price"]).apply(lambda x: round(x, 0)).astype("Int32")

sales_monthly

Unnamed: 0,date_block_num,shop_id,item_id,item_cnt_month,min_item_cnt_day,max_item_cnt_day,min_date,max_date,count_date,mean_item_price,max_item_price,min_item_price,sum_price_change_day
0,0,0,32,6.0,1.0,2.0,3,31,4,22100,22100,22100,0.0
1,0,0,33,3.0,1.0,1.0,3,28,3,34700,34700,34700,0.0
2,0,0,35,1.0,1.0,1.0,31,31,1,24700,24700,24700,0.0
3,0,0,43,1.0,1.0,1.0,31,31,1,22100,22100,22100,0.0
4,0,0,51,2.0,1.0,1.0,13,31,2,12850,13000,12700,-300.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1609119,33,59,22087,6.0,1.0,3.0,5,23,3,11900,11900,11900,0.0
1609120,33,59,22088,2.0,1.0,1.0,3,27,2,11900,11900,11900,0.0
1609121,33,59,22091,1.0,1.0,1.0,3,3,1,17900,17900,17900,0.0
1609122,33,59,22100,1.0,1.0,1.0,18,18,1,62900,62900,62900,0.0


In [92]:
%%time

# extend to all item_id, shop_id combinations and add metadata
join_keys = ["shop_id", "item_id", "date_block_num"]
sales_monthly = shop_item.join(sales_monthly.set_index(join_keys),
                               on=join_keys, how="left")

sales_monthly

CPU times: user 9.84 s, sys: 3.23 s, total: 13.1 s
Wall time: 30.2 s


Unnamed: 0,shop_id,item_id,item_category_id,item_category_1,item_category_2,shop_city,shop_category,date_block_num,item_cnt_month,min_item_cnt_day,max_item_cnt_day,min_date,max_date,count_date,mean_item_price,max_item_price,min_item_price,sum_price_change_day
0,59,22154,37,11,1,30,4,0,1.0,1.0,1.0,2.0,2.0,1.0,99900,99900,99900,0.0
1,59,22154,37,11,1,30,4,1,,,,,,,,,,
2,59,22154,37,11,1,30,4,2,,,,,,,,,,
3,59,22154,37,11,1,30,4,3,,,,,,,,,,
4,59,22154,37,11,1,30,4,4,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17915275,45,19648,40,11,6,20,4,29,,,,,,,,,,
17915276,45,19648,40,11,6,20,4,30,,,,,,,,,,
17915277,45,19648,40,11,6,20,4,31,,,,,,,,,,
17915278,45,19648,40,11,6,20,4,32,,,,,,,,,,


In [93]:
base_date = datetime.strptime("201301", "%Y%m")
def date_block_num_to_month(date_block_num):
    return (base_date + relativedelta(months=date_block_num)).month

def date_block_num_to_days_in_month(date_block_num):
    block_month = base_date + relativedelta(months=date_block_num)
    return monthrange(block_month.year, block_month.month)[1]

date_block_num_to_month(25), date_block_num_to_days_in_month(25)

(2, 28)

In [94]:
%%time

# add month characteristics
sales_monthly["month"] = sales_monthly["date_block_num"].apply(date_block_num_to_month)
sales_monthly["days_in_month"] = sales_monthly["date_block_num"].apply(date_block_num_to_days_in_month)

sales_monthly

CPU times: user 8min 6s, sys: 2.14 s, total: 8min 8s
Wall time: 8min 28s


Unnamed: 0,shop_id,item_id,item_category_id,item_category_1,item_category_2,shop_city,shop_category,date_block_num,item_cnt_month,min_item_cnt_day,max_item_cnt_day,min_date,max_date,count_date,mean_item_price,max_item_price,min_item_price,sum_price_change_day,month,days_in_month
0,59,22154,37,11,1,30,4,0,1.0,1.0,1.0,2.0,2.0,1.0,99900,99900,99900,0.0,1,31
1,59,22154,37,11,1,30,4,1,,,,,,,,,,,2,28
2,59,22154,37,11,1,30,4,2,,,,,,,,,,,3,31
3,59,22154,37,11,1,30,4,3,,,,,,,,,,,4,30
4,59,22154,37,11,1,30,4,4,,,,,,,,,,,5,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17915275,45,19648,40,11,6,20,4,29,,,,,,,,,,,6,30
17915276,45,19648,40,11,6,20,4,30,,,,,,,,,,,7,31
17915277,45,19648,40,11,6,20,4,31,,,,,,,,,,,8,31
17915278,45,19648,40,11,6,20,4,32,,,,,,,,,,,9,30


In [97]:
#reduce memory usage
print(sales_monthly.info())

for col in ["item_cnt_month", "min_item_cnt_day", "max_item_cnt_day",
           "min_date", "max_date", "count_date", "shop_id"]:
    sales_monthly[col] = sales_monthly[col].astype("Int16")
    

for col in ["month", "days_in_month", "date_block_num"]:
    sales_monthly[col] = sales_monthly[col].astype("Int8")
    
sales_monthly["sum_price_change_day"] = sales_monthly["sum_price_change_day"].apply(
    lambda x: round(x, 0)
).astype("Int32")

sales_monthly.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17915280 entries, 0 to 17915279
Data columns (total 20 columns):
 #   Column                Dtype  
---  ------                -----  
 0   shop_id               Int16  
 1   item_id               Int16  
 2   item_category_id      Int16  
 3   item_category_1       int8   
 4   item_category_2       int8   
 5   shop_city             int8   
 6   shop_category         int8   
 7   date_block_num        Int8   
 8   item_cnt_month        Int16  
 9   min_item_cnt_day      Int16  
 10  max_item_cnt_day      Int16  
 11  min_date              Int16  
 12  max_date              Int16  
 13  count_date            Int16  
 14  mean_item_price       Int32  
 15  max_item_price        Int32  
 16  min_item_price        Int32  
 17  sum_price_change_day  float64
 18  month                 Int8   
 19  days_in_month         Int8   
dtypes: Int16(9), Int32(3), Int8(3), float64(1), int8(4)
memory usage: 1.1 GB
None
<class 'pandas.core.frame.DataFr

In [98]:
sales_monthly.to_pickle("data/processed/sales_monthly_full.pickle")

## create wide format dataframe

When working on low memory, restart the notebook now.

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from calendar import monthrange

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
%%time

sales_monthly_wide = pd.read_pickle("data/processed/sales_monthly_full.pickle").set_index([
    "shop_id",
    "item_id",
    "item_category_id",
    "item_category_1",
    "item_category_2",
    "shop_city",
    "shop_category",
    "date_block_num",
]).unstack()

sales_monthly_wide = sales_monthly_wide.reset_index().fillna(0)

sales_monthly_wide.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 526920 entries, 0 to 526919
Columns: 415 entries, ('shop_id', '') to ('days_in_month', 33)
dtypes: Int16(204), Int32(136), Int8(68), int64(7)
memory usage: 745.7 MB
CPU times: user 35.4 s, sys: 5.37 s, total: 40.7 s
Wall time: 41.2 s


In [3]:
sales_monthly_wide.columns = [
    c[0] if c[1] == '' else (
        c[0] + "_" + str(c[1])
    ).replace("_day", "_month") for c in sales_monthly_wide.columns
]

In [4]:
sales_monthly_wide

Unnamed: 0,shop_id,item_id,item_category_id,item_category_1,item_category_2,shop_city,shop_category,item_cnt_month_0,item_cnt_month_1,item_cnt_month_2,item_cnt_month_3,item_cnt_month_4,item_cnt_month_5,item_cnt_month_6,item_cnt_month_7,item_cnt_month_8,item_cnt_month_9,item_cnt_month_10,item_cnt_month_11,item_cnt_month_12,item_cnt_month_13,item_cnt_month_14,item_cnt_month_15,item_cnt_month_16,item_cnt_month_17,item_cnt_month_18,item_cnt_month_19,item_cnt_month_20,item_cnt_month_21,item_cnt_month_22,item_cnt_month_23,item_cnt_month_24,item_cnt_month_25,item_cnt_month_26,item_cnt_month_27,item_cnt_month_28,item_cnt_month_29,item_cnt_month_30,item_cnt_month_31,item_cnt_month_32,item_cnt_month_33,min_item_cnt_month_0,min_item_cnt_month_1,min_item_cnt_month_2,min_item_cnt_month_3,min_item_cnt_month_4,min_item_cnt_month_5,min_item_cnt_month_6,min_item_cnt_month_7,min_item_cnt_month_8,min_item_cnt_month_9,min_item_cnt_month_10,min_item_cnt_month_11,min_item_cnt_month_12,min_item_cnt_month_13,min_item_cnt_month_14,min_item_cnt_month_15,min_item_cnt_month_16,min_item_cnt_month_17,min_item_cnt_month_18,min_item_cnt_month_19,min_item_cnt_month_20,min_item_cnt_month_21,min_item_cnt_month_22,min_item_cnt_month_23,min_item_cnt_month_24,min_item_cnt_month_25,min_item_cnt_month_26,min_item_cnt_month_27,min_item_cnt_month_28,min_item_cnt_month_29,min_item_cnt_month_30,min_item_cnt_month_31,min_item_cnt_month_32,min_item_cnt_month_33,max_item_cnt_month_0,max_item_cnt_month_1,max_item_cnt_month_2,max_item_cnt_month_3,max_item_cnt_month_4,max_item_cnt_month_5,max_item_cnt_month_6,max_item_cnt_month_7,max_item_cnt_month_8,max_item_cnt_month_9,max_item_cnt_month_10,max_item_cnt_month_11,max_item_cnt_month_12,max_item_cnt_month_13,max_item_cnt_month_14,max_item_cnt_month_15,max_item_cnt_month_16,max_item_cnt_month_17,max_item_cnt_month_18,max_item_cnt_month_19,max_item_cnt_month_20,max_item_cnt_month_21,max_item_cnt_month_22,max_item_cnt_month_23,max_item_cnt_month_24,max_item_cnt_month_25,max_item_cnt_month_26,max_item_cnt_month_27,max_item_cnt_month_28,max_item_cnt_month_29,max_item_cnt_month_30,max_item_cnt_month_31,max_item_cnt_month_32,max_item_cnt_month_33,min_date_0,min_date_1,min_date_2,min_date_3,min_date_4,min_date_5,min_date_6,min_date_7,min_date_8,min_date_9,min_date_10,min_date_11,min_date_12,min_date_13,min_date_14,min_date_15,min_date_16,min_date_17,min_date_18,min_date_19,min_date_20,min_date_21,min_date_22,min_date_23,min_date_24,min_date_25,min_date_26,min_date_27,min_date_28,min_date_29,min_date_30,min_date_31,min_date_32,min_date_33,max_date_0,max_date_1,max_date_2,max_date_3,max_date_4,max_date_5,max_date_6,max_date_7,max_date_8,max_date_9,max_date_10,max_date_11,max_date_12,max_date_13,max_date_14,max_date_15,max_date_16,max_date_17,max_date_18,max_date_19,max_date_20,max_date_21,max_date_22,max_date_23,max_date_24,max_date_25,max_date_26,max_date_27,max_date_28,max_date_29,max_date_30,max_date_31,max_date_32,max_date_33,count_date_0,count_date_1,count_date_2,count_date_3,count_date_4,count_date_5,count_date_6,count_date_7,count_date_8,count_date_9,count_date_10,count_date_11,count_date_12,count_date_13,count_date_14,count_date_15,count_date_16,count_date_17,count_date_18,count_date_19,count_date_20,count_date_21,count_date_22,count_date_23,count_date_24,count_date_25,count_date_26,count_date_27,count_date_28,count_date_29,count_date_30,count_date_31,count_date_32,count_date_33,mean_item_price_0,mean_item_price_1,mean_item_price_2,mean_item_price_3,mean_item_price_4,mean_item_price_5,mean_item_price_6,mean_item_price_7,mean_item_price_8,mean_item_price_9,mean_item_price_10,mean_item_price_11,mean_item_price_12,mean_item_price_13,mean_item_price_14,mean_item_price_15,mean_item_price_16,mean_item_price_17,mean_item_price_18,mean_item_price_19,mean_item_price_20,mean_item_price_21,mean_item_price_22,mean_item_price_23,mean_item_price_24,mean_item_price_25,mean_item_price_26,mean_item_price_27,mean_item_price_28,mean_item_price_29,mean_item_price_30,mean_item_price_31,mean_item_price_32,mean_item_price_33,max_item_price_0,max_item_price_1,max_item_price_2,max_item_price_3,max_item_price_4,max_item_price_5,max_item_price_6,max_item_price_7,max_item_price_8,max_item_price_9,max_item_price_10,max_item_price_11,max_item_price_12,max_item_price_13,max_item_price_14,max_item_price_15,max_item_price_16,max_item_price_17,max_item_price_18,max_item_price_19,max_item_price_20,max_item_price_21,max_item_price_22,max_item_price_23,max_item_price_24,max_item_price_25,max_item_price_26,max_item_price_27,max_item_price_28,max_item_price_29,max_item_price_30,max_item_price_31,max_item_price_32,max_item_price_33,min_item_price_0,min_item_price_1,min_item_price_2,min_item_price_3,min_item_price_4,min_item_price_5,min_item_price_6,min_item_price_7,min_item_price_8,min_item_price_9,min_item_price_10,min_item_price_11,min_item_price_12,min_item_price_13,min_item_price_14,min_item_price_15,min_item_price_16,min_item_price_17,min_item_price_18,min_item_price_19,min_item_price_20,min_item_price_21,min_item_price_22,min_item_price_23,min_item_price_24,min_item_price_25,min_item_price_26,min_item_price_27,min_item_price_28,min_item_price_29,min_item_price_30,min_item_price_31,min_item_price_32,min_item_price_33,sum_price_change_month_0,sum_price_change_month_1,sum_price_change_month_2,sum_price_change_month_3,sum_price_change_month_4,sum_price_change_month_5,sum_price_change_month_6,sum_price_change_month_7,sum_price_change_month_8,sum_price_change_month_9,sum_price_change_month_10,sum_price_change_month_11,sum_price_change_month_12,sum_price_change_month_13,sum_price_change_month_14,sum_price_change_month_15,sum_price_change_month_16,sum_price_change_month_17,sum_price_change_month_18,sum_price_change_month_19,sum_price_change_month_20,sum_price_change_month_21,sum_price_change_month_22,sum_price_change_month_23,sum_price_change_month_24,sum_price_change_month_25,sum_price_change_month_26,sum_price_change_month_27,sum_price_change_month_28,sum_price_change_month_29,sum_price_change_month_30,sum_price_change_month_31,sum_price_change_month_32,sum_price_change_month_33,month_0,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,month_13,month_14,month_15,month_16,month_17,month_18,month_19,month_20,month_21,month_22,month_23,month_24,month_25,month_26,month_27,month_28,month_29,month_30,month_31,month_32,month_33,days_in_month_0,days_in_month_1,days_in_month_2,days_in_month_3,days_in_month_4,days_in_month_5,days_in_month_6,days_in_month_7,days_in_month_8,days_in_month_9,days_in_month_10,days_in_month_11,days_in_month_12,days_in_month_13,days_in_month_14,days_in_month_15,days_in_month_16,days_in_month_17,days_in_month_18,days_in_month_19,days_in_month_20,days_in_month_21,days_in_month_22,days_in_month_23,days_in_month_24,days_in_month_25,days_in_month_26,days_in_month_27,days_in_month_28,days_in_month_29,days_in_month_30,days_in_month_31,days_in_month_32,days_in_month_33
0,0,30,40,11,6,29,0,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
1,0,31,37,11,1,29,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
2,0,32,40,11,6,29,0,6,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22100,22100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22100,22100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22100,22100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
3,0,33,37,11,1,29,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34700,34700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34700,34700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34700,34700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
4,0,35,40,11,6,29,0,1,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24700,24700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24700,24700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24700,24700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
526915,59,22162,40,11,6,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,4,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,1,17,29,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,26,17,29,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,4,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39900,39900,34900,34900,0,0,34900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39900,39900,34900,34900,0,0,34900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39900,39900,34900,34900,0,0,34900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5000,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
526916,59,22163,40,11,6,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
526917,59,22164,37,11,1,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,29,22,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,29,30,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74900,74900,69900,0,0,69900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74900,74900,69900,0,0,69900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74900,74900,69900,0,0,69900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5000,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
526918,59,22166,54,12,60,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31


In [5]:
sales_monthly_wide.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 526920 entries, 0 to 526919
Columns: 415 entries, shop_id to days_in_month_33
dtypes: Int16(204), Int32(136), Int8(68), int64(7)
memory usage: 745.7 MB


In [6]:
sales_monthly_wide.to_pickle("data/processed/sales_monthly_wide_full.pickle")

# Models

In [4]:
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from calendar import monthrange
from tqdm.notebook import tqdm

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

sales_monthly_wide = pd.read_pickle("data/processed/sales_monthly_wide_full.pickle")

In [2]:
sales_monthly_wide

Unnamed: 0,shop_id,item_id,item_category_id,item_category_1,item_category_2,shop_city,shop_category,item_cnt_month_0,item_cnt_month_1,item_cnt_month_2,item_cnt_month_3,item_cnt_month_4,item_cnt_month_5,item_cnt_month_6,item_cnt_month_7,item_cnt_month_8,item_cnt_month_9,item_cnt_month_10,item_cnt_month_11,item_cnt_month_12,item_cnt_month_13,item_cnt_month_14,item_cnt_month_15,item_cnt_month_16,item_cnt_month_17,item_cnt_month_18,item_cnt_month_19,item_cnt_month_20,item_cnt_month_21,item_cnt_month_22,item_cnt_month_23,item_cnt_month_24,item_cnt_month_25,item_cnt_month_26,item_cnt_month_27,item_cnt_month_28,item_cnt_month_29,item_cnt_month_30,item_cnt_month_31,item_cnt_month_32,item_cnt_month_33,min_item_cnt_month_0,min_item_cnt_month_1,min_item_cnt_month_2,min_item_cnt_month_3,min_item_cnt_month_4,min_item_cnt_month_5,min_item_cnt_month_6,min_item_cnt_month_7,min_item_cnt_month_8,min_item_cnt_month_9,min_item_cnt_month_10,min_item_cnt_month_11,min_item_cnt_month_12,min_item_cnt_month_13,min_item_cnt_month_14,min_item_cnt_month_15,min_item_cnt_month_16,min_item_cnt_month_17,min_item_cnt_month_18,min_item_cnt_month_19,min_item_cnt_month_20,min_item_cnt_month_21,min_item_cnt_month_22,min_item_cnt_month_23,min_item_cnt_month_24,min_item_cnt_month_25,min_item_cnt_month_26,min_item_cnt_month_27,min_item_cnt_month_28,min_item_cnt_month_29,min_item_cnt_month_30,min_item_cnt_month_31,min_item_cnt_month_32,min_item_cnt_month_33,max_item_cnt_month_0,max_item_cnt_month_1,max_item_cnt_month_2,max_item_cnt_month_3,max_item_cnt_month_4,max_item_cnt_month_5,max_item_cnt_month_6,max_item_cnt_month_7,max_item_cnt_month_8,max_item_cnt_month_9,max_item_cnt_month_10,max_item_cnt_month_11,max_item_cnt_month_12,max_item_cnt_month_13,max_item_cnt_month_14,max_item_cnt_month_15,max_item_cnt_month_16,max_item_cnt_month_17,max_item_cnt_month_18,max_item_cnt_month_19,max_item_cnt_month_20,max_item_cnt_month_21,max_item_cnt_month_22,max_item_cnt_month_23,max_item_cnt_month_24,max_item_cnt_month_25,max_item_cnt_month_26,max_item_cnt_month_27,max_item_cnt_month_28,max_item_cnt_month_29,max_item_cnt_month_30,max_item_cnt_month_31,max_item_cnt_month_32,max_item_cnt_month_33,min_date_0,min_date_1,min_date_2,min_date_3,min_date_4,min_date_5,min_date_6,min_date_7,min_date_8,min_date_9,min_date_10,min_date_11,min_date_12,min_date_13,min_date_14,min_date_15,min_date_16,min_date_17,min_date_18,min_date_19,min_date_20,min_date_21,min_date_22,min_date_23,min_date_24,min_date_25,min_date_26,min_date_27,min_date_28,min_date_29,min_date_30,min_date_31,min_date_32,min_date_33,max_date_0,max_date_1,max_date_2,max_date_3,max_date_4,max_date_5,max_date_6,max_date_7,max_date_8,max_date_9,max_date_10,max_date_11,max_date_12,max_date_13,max_date_14,max_date_15,max_date_16,max_date_17,max_date_18,max_date_19,max_date_20,max_date_21,max_date_22,max_date_23,max_date_24,max_date_25,max_date_26,max_date_27,max_date_28,max_date_29,max_date_30,max_date_31,max_date_32,max_date_33,count_date_0,count_date_1,count_date_2,count_date_3,count_date_4,count_date_5,count_date_6,count_date_7,count_date_8,count_date_9,count_date_10,count_date_11,count_date_12,count_date_13,count_date_14,count_date_15,count_date_16,count_date_17,count_date_18,count_date_19,count_date_20,count_date_21,count_date_22,count_date_23,count_date_24,count_date_25,count_date_26,count_date_27,count_date_28,count_date_29,count_date_30,count_date_31,count_date_32,count_date_33,mean_item_price_0,mean_item_price_1,mean_item_price_2,mean_item_price_3,mean_item_price_4,mean_item_price_5,mean_item_price_6,mean_item_price_7,mean_item_price_8,mean_item_price_9,mean_item_price_10,mean_item_price_11,mean_item_price_12,mean_item_price_13,mean_item_price_14,mean_item_price_15,mean_item_price_16,mean_item_price_17,mean_item_price_18,mean_item_price_19,mean_item_price_20,mean_item_price_21,mean_item_price_22,mean_item_price_23,mean_item_price_24,mean_item_price_25,mean_item_price_26,mean_item_price_27,mean_item_price_28,mean_item_price_29,mean_item_price_30,mean_item_price_31,mean_item_price_32,mean_item_price_33,max_item_price_0,max_item_price_1,max_item_price_2,max_item_price_3,max_item_price_4,max_item_price_5,max_item_price_6,max_item_price_7,max_item_price_8,max_item_price_9,max_item_price_10,max_item_price_11,max_item_price_12,max_item_price_13,max_item_price_14,max_item_price_15,max_item_price_16,max_item_price_17,max_item_price_18,max_item_price_19,max_item_price_20,max_item_price_21,max_item_price_22,max_item_price_23,max_item_price_24,max_item_price_25,max_item_price_26,max_item_price_27,max_item_price_28,max_item_price_29,max_item_price_30,max_item_price_31,max_item_price_32,max_item_price_33,min_item_price_0,min_item_price_1,min_item_price_2,min_item_price_3,min_item_price_4,min_item_price_5,min_item_price_6,min_item_price_7,min_item_price_8,min_item_price_9,min_item_price_10,min_item_price_11,min_item_price_12,min_item_price_13,min_item_price_14,min_item_price_15,min_item_price_16,min_item_price_17,min_item_price_18,min_item_price_19,min_item_price_20,min_item_price_21,min_item_price_22,min_item_price_23,min_item_price_24,min_item_price_25,min_item_price_26,min_item_price_27,min_item_price_28,min_item_price_29,min_item_price_30,min_item_price_31,min_item_price_32,min_item_price_33,sum_price_change_month_0,sum_price_change_month_1,sum_price_change_month_2,sum_price_change_month_3,sum_price_change_month_4,sum_price_change_month_5,sum_price_change_month_6,sum_price_change_month_7,sum_price_change_month_8,sum_price_change_month_9,sum_price_change_month_10,sum_price_change_month_11,sum_price_change_month_12,sum_price_change_month_13,sum_price_change_month_14,sum_price_change_month_15,sum_price_change_month_16,sum_price_change_month_17,sum_price_change_month_18,sum_price_change_month_19,sum_price_change_month_20,sum_price_change_month_21,sum_price_change_month_22,sum_price_change_month_23,sum_price_change_month_24,sum_price_change_month_25,sum_price_change_month_26,sum_price_change_month_27,sum_price_change_month_28,sum_price_change_month_29,sum_price_change_month_30,sum_price_change_month_31,sum_price_change_month_32,sum_price_change_month_33,month_0,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,month_13,month_14,month_15,month_16,month_17,month_18,month_19,month_20,month_21,month_22,month_23,month_24,month_25,month_26,month_27,month_28,month_29,month_30,month_31,month_32,month_33,days_in_month_0,days_in_month_1,days_in_month_2,days_in_month_3,days_in_month_4,days_in_month_5,days_in_month_6,days_in_month_7,days_in_month_8,days_in_month_9,days_in_month_10,days_in_month_11,days_in_month_12,days_in_month_13,days_in_month_14,days_in_month_15,days_in_month_16,days_in_month_17,days_in_month_18,days_in_month_19,days_in_month_20,days_in_month_21,days_in_month_22,days_in_month_23,days_in_month_24,days_in_month_25,days_in_month_26,days_in_month_27,days_in_month_28,days_in_month_29,days_in_month_30,days_in_month_31,days_in_month_32,days_in_month_33
0,0,30,40,11,6,29,0,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
1,0,31,37,11,1,29,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
2,0,32,40,11,6,29,0,6,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22100,22100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22100,22100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22100,22100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
3,0,33,37,11,1,29,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34700,34700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34700,34700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34700,34700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
4,0,35,40,11,6,29,0,1,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24700,24700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24700,24700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24700,24700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
526915,59,22162,40,11,6,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,4,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,1,17,29,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,26,17,29,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,4,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39900,39900,34900,34900,0,0,34900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39900,39900,34900,34900,0,0,34900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39900,39900,34900,34900,0,0,34900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5000,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
526916,59,22163,40,11,6,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
526917,59,22164,37,11,1,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,29,22,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,29,30,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74900,74900,69900,0,0,69900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74900,74900,69900,0,0,69900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74900,74900,69900,0,0,69900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5000,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31
526918,59,22166,54,12,60,30,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,30,31,30,31,31,30,31


In [3]:
def get_train_set_for_month(month, n, X_only=False):
    cols = list(sales_monthly_wide.columns)
    
    train_cols = ["shop_id", "item_id", "item_category_id", "item_category_1",
                  "item_category_2", "shop_city", "shop_category"]
    cols = cols[7:]
    
    train_cols += [c for c in cols if (
        int(c.split("_")[-1]) < month and
        int(c.split("_")[-1]) > month - n
    )]
    X = sales_monthly_wide[train_cols]
    if not X_only:
        y = sales_monthly_wide[[f"item_cnt_month_{month}"]]
        return X.values, y.values.reshape(-1), train_cols

    return X.values, train_cols

# small test
#get_train_set_for_month(33, 13, X_only=False)

In [5]:
import numpy as np

def clip(array):
    return np.array([min(x, 20) for x in array])

clip(np.array([1, 2, 34, 45, 45, -111]))

array([   1,    2,   20,   20,   20, -111])

In [6]:
%%time

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

rf = RandomForestRegressor(
    n_estimators = 20,
    max_depth=10,
    min_samples_leaf=100,
    n_jobs=2,  # -1 is all processors used
)

n = 13
for i, month in tqdm(enumerate([33, 32, 31, 30, 29])):
    if i == 0:
        X, y, cols = get_train_set_for_month(month, n)
        continue
    
    X_inc, y_inc, _ = get_train_set_for_month(month, n)
    X = np.concatenate((X, X_inc), axis=0)
    y = np.concatenate((y, y_inc), axis=0)
    
    del X_inc, y_inc
    
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

rf.fit(X_train, clip(y_train))

print(mean_squared_error(y_true=clip(y_train), y_pred=clip(rf.predict(X_train)), squared=False))
print(mean_squared_error(y_true=clip(y_test), y_pred=clip(rf.predict(X_test)), squared=False))

0it [00:00, ?it/s]

0.5539023525016861
0.5724087187732684
CPU times: user 40min 56s, sys: 16.3 s, total: 41min 12s
Wall time: 23min 11s


In [None]:
"""
rf = RandomForestRegressor(
    n_estimators = 20,
    max_depth=10,
    min_samples_leaf=100,
    n_jobs=2,  # -1 is all processors used
)

n = 13
for i, month in enumerate([33, 32, 31, 30, 29]):
    if i == 0:
        X, y, cols = get_train_set_for_month(month, n)
        continue
    
    X_inc, y_inc, _ = get_train_set_for_month(month, n)
    X = np.concatenate((X, X_inc), axis=0)
    y = np.concatenate((y, y_inc), axis=0)
    
    del X_inc, y_inc
    
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

rf.fit(X_train, clip(y_train))


>>>
0.5538917590850159
0.5723062831596758
CPU times: user 31min 21s, sys: 39.7 s, total: 32min 1s
Wall time: 21min 57s
"""

In [7]:
print(f"Train set size MB: {X_train.nbytes / 1000 ** 2}")

Train set size MB: 2132.339856


In [8]:
feature_imp = list(zip(cols, rf.feature_importances_))

feature_imp.sort(key= lambda x: x[1], reverse=True)

feature_imp

[('item_cnt_month_32', 0.6979527010341855),
 ('count_date_32', 0.042170393400043575),
 ('max_item_price_32', 0.030531796442508852),
 ('item_cnt_month_31', 0.028817653431921015),
 ('max_item_price_31', 0.021751351847775675),
 ('item_cnt_month_25', 0.019315649894483755),
 ('item_cnt_month_24', 0.017078422548373),
 ('item_cnt_month_30', 0.014869510840029012),
 ('max_date_32', 0.013546473475776727),
 ('item_cnt_month_27', 0.009723762364995813),
 ('min_date_32', 0.008607334387338673),
 ('max_item_cnt_month_32', 0.00709860598194535),
 ('count_date_30', 0.004813836849147611),
 ('count_date_31', 0.004651537695780357),
 ('item_category_id', 0.004632845117631781),
 ('item_id', 0.004375620016364618),
 ('item_category_2', 0.0038462124332460422),
 ('count_date_23', 0.0037155086322018414),
 ('min_item_price_31', 0.0035918631693149126),
 ('item_cnt_month_26', 0.003534182086006176),
 ('count_date_25', 0.003410365996650365),
 ('count_date_26', 0.0033352169836939867),
 ('mean_item_price_30', 0.003185758

In [None]:
# predict for Nov!

X, cols = get_train_set_for_month(34, 13, X_only=True)

In [None]:
X

In [None]:
y_pred = rf.predict(X)

In [None]:
y_pred

In [None]:
X = pd.DataFrame(X, columns=cols)[["shop_id", "item_id"]]
X["item_cnt_month"] = clip(y_pred)
X

In [None]:
X.describe()

In [None]:
submission = pd.read_csv("data/source/test.csv")

submission = submission.join(
    X.set_index(["shop_id", "item_id"]), on=["shop_id", "item_id"], how="left"
)[["ID", "item_cnt_month"]]

display(submission)

submission.to_csv("data/submission_full_3.csv", index=False)

In [None]:
"""TODO's:

- Feature enginnering on names of shops, items, item_cats -done
- features for price delta
- features on overall shop and item performance
- add model pickle section
"""