In [36]:
import warnings
warnings.filterwarnings("ignore")

from fbprophet import Prophet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

from scipy import stats
import statsmodels.api as sm
from itertools import product
from datetime import timedelta

import pandas as pd
pd.options.display.max_columns = 999
import numpy as np

from sklearn.metrics import mean_squared_error
#RMSE = mean_squared_error(y, y_pred)**0.5

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import graph_objs as go
%matplotlib inline
import matplotlib.pyplot as plt

In [37]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


## data preparation

In [92]:
df = pd.read_csv('data/player_price.csv')
df['ds'] = pd.to_datetime(df['timestamp'], unit='ms')
df=df[df.ds>='2017-06-01']
df=df.loc[df["player_id"] == 1]
df.drop(["timestamp", "ps_price", "player_name", "player_id"], axis=1, inplace=True)
df.rename(columns={"xbox_price": "y"}, inplace=True)
df=df.replace(0,1)
print(df.shape)
df.head(10)

(22, 2)


Unnamed: 0,y,ds
251,3227500,2017-06-01
252,3115720,2017-06-02
253,3263857,2017-06-03
254,3351619,2017-06-04
255,3132091,2017-06-05
256,3535840,2017-06-06
257,3508952,2017-06-07
258,3515515,2017-06-08
259,3289895,2017-06-09
260,3258917,2017-06-10


In [93]:
#id-шники для предикшина
sample_subm = pd.read_csv('data/kaggle_sample_submission.csv')
sample_subm["player_id"] = sample_subm["id"].apply(lambda x: int(x.split("_")[0]))
player_id=sample_subm.groupby(['player_id'])['price'].count()
player_id.index

Int64Index([    1,     2,     3,     4,     5,     6,     7,     8,     9,
               10,
            ...
            18336, 18337, 18338, 18339, 18340, 18341, 18342, 18344, 18345,
            18370],
           dtype='int64', name='player_id', length=1253)

## prediction

In [138]:
predictions = 7
#train_df=df
train_df = df[:-predictions]

In [139]:
%%time
m = Prophet()
m.fit(train_df)
future = m.make_future_dataframe(periods=predictions)
forecast = m.predict(future)
future.tail(7)

Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
Wall time: 3.07 s


In [140]:
cmp_df = forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(df.set_index('ds'))

cmp_df['e'] = cmp_df['y'] - cmp_df['yhat']
cmp_df['p'] = 100*cmp_df['e']/cmp_df['y']
print('MAPE', np.mean(abs(cmp_df[-predictions:]['p'])))
print('MAE', np.mean(abs(cmp_df[-predictions:]['e'])))
print(mean_squared_error(df.y[-7:], cmp_df.yhat[-7:])**0.5)

MAPE 7.495783421829282
MAE 238515.0399445612
251768.789117


---

### validation

In [141]:
predictions = 7
train_df = df[:-predictions]

In [142]:
%%time
m = Prophet()
m.fit(train_df)
future = m.make_future_dataframe(periods=predictions)
forecast = m.predict(future)
future.tail(7)

Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
Wall time: 1.93 s


In [143]:
cmp_df = forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(df.set_index('ds'))
cmp_df.tail(7)

Unnamed: 0_level_0,yhat,yhat_lower,yhat_upper,y
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-06-16,3383420.0,3232611.0,3533953.0,3212762
2017-06-17,3420175.0,3272220.0,3576612.0,3312852
2017-06-18,3427870.0,3283284.0,3573526.0,3227893
2017-06-19,3433423.0,3282959.0,3584672.0,3142655
2017-06-20,3458722.0,3306969.0,3606256.0,3135387
2017-06-21,3477677.0,3326052.0,3634103.0,3248943
2017-06-22,3485954.0,3331920.0,3645646.0,3137143


In [121]:
qwe=cmp_df.yhat[-7:]    
#print(qwe)

pp=pd.DataFrame(columns=['y','ds'])
pp['ds']=qwe.index.astype(str)
pp.y=qwe.values
pp.y=pp.y.astype(int)
pp

Unnamed: 0,y,ds
0,3097255,2017-06-16
1,3136083,2017-06-17
2,3129383,2017-06-18
3,3087740,2017-06-19
4,3240035,2017-06-20
5,3206386,2017-06-21
6,3133730,2017-06-22


In [122]:
df_new=train_df
df_new.tail()

Unnamed: 0,y,ds
261,3196091,2017-06-11
262,3369791,2017-06-12
263,3311636,2017-06-13
264,3308667,2017-06-14
265,3269308,2017-06-15


In [124]:
df_new=df_new.append(pp,ignore_index=True)
df_new.tail(10)

Unnamed: 0,y,ds
12,3311636,2017-06-13 00:00:00
13,3308667,2017-06-14 00:00:00
14,3269308,2017-06-15 00:00:00
15,3097255,2017-06-16
16,3136083,2017-06-17
17,3129383,2017-06-18
18,3087740,2017-06-19
19,3240035,2017-06-20
20,3206386,2017-06-21
21,3133730,2017-06-22


In [125]:
%%time
m = Prophet()
m.fit(df_new)
future = m.make_future_dataframe(periods=7)
forecast = m.predict(future)
future.tail(7)

Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
Wall time: 1.99 s


In [126]:
cmp_df_new = forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(df.set_index('ds'))
cmp_df_new.tail(7)

Unnamed: 0_level_0,yhat,yhat_lower,yhat_upper,y
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-06-23,2948646.0,2871044.0,3026023.0,
2017-06-24,2987141.0,2909318.0,3069214.0,
2017-06-25,2981143.0,2899615.0,3061322.0,
2017-06-26,2939954.0,2859732.0,3018517.0,
2017-06-27,3093834.0,3008773.0,3176259.0,
2017-06-28,3061287.0,2975877.0,3146325.0,
2017-06-29,2986164.0,2905922.0,3074350.0,


---

In [146]:
df_all = pd.read_csv('data/player_price.csv')
df_all['ds'] = pd.to_datetime(df_all['timestamp'], unit='ms')
df_all=df_all[df_all.ds>='2017-02-01']
df_all.drop(["timestamp", "ps_price", "player_name"], axis=1, inplace=True)
df_all.rename(columns={"xbox_price": "y"}, inplace=True)

df_all.head()

Unnamed: 0,y,player_id,ds
131,5700917,1,2017-02-01
132,5577531,1,2017-02-02
133,5512164,1,2017-02-03
134,5692598,1,2017-02-04
135,5602358,1,2017-02-05


In [147]:
#id-шники для предикшина
sample_subm = pd.read_csv('data/kaggle_sample_submission.csv')
sample_subm["player_id"] = sample_subm["id"].apply(lambda x: int(x.split("_")[0]))
player_id=sample_subm.groupby(['player_id'])['price'].count()
#player_id.index

In [157]:
%%time
k=0
for i in range(1,3):#player_id.index[350:len(player_id.index)]:#player_id.index
    print('==================================')
    print('id={}'.format(i))
       
    df=df_all.loc[df_all['player_id'] == i]
    df.drop(['player_id'], axis=1, inplace=True)
    df=df.replace(0,1)    
    
    #validation (1 fold)
    predictions = 7
    trainFirst_df = df[:-predictions]    
    
    m = Prophet()
    m.fit(trainFirst_df)    
    future_validation = m.make_future_dataframe(periods=predictions)
    forecast_validation = m.predict(future_validation)    
    
    cmp_validation_df = forecast_validation.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(df.set_index('ds'))
    qwe_validation=cmp_validation_df.yhat[-predictions:]    
    #print(qwe)
    print('rmse: ',mean_squared_error(df.y[-7:], cmp_validation_df.yhat[-7:])**0.5)
    
    pp_validation=pd.DataFrame(columns=['y','ds'])
    pp_validation['ds']=qwe_validation.index.astype(str)
    pp_validation.y=qwe_validation.values
    pp_validation.y=pp_validation.y.astype(int)
    
    train_1Fold_df=trainFirst_df[7:]
    train_1Fold_df=train_1Fold_df.append(pp_validation,ignore_index=True)
    
    #prediction for 7 day
    m = Prophet()
    m.fit(train_1Fold_df)
    
    future = m.make_future_dataframe(periods=7)
    forecast = m.predict(future)
        
    cmp_df = forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(train_1Fold_df.set_index('ds'))
    
    qwe=cmp_df.yhat[-7:]    
    #print(qwe)
    
    pp=pd.DataFrame(columns=['id','price'])
    pp['id']=str(i)+'_'+qwe.index.astype(str)
    pp.price=qwe.values
    pp.price=pp.price.astype(int)
    print(pp)
    
    if k==0:
        pp.to_csv('csv/prediction_fbpophet9.csv',sep=',',header=True,index=False)
        k=1
    else:
        with open('csv/prediction_fbpophet9.csv', 'a') as f:
            pp.to_csv(f, header=False,index=False,sep=',')

id=1
Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
rmse:  251768.789117
Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
             id    price
0  1_2017-06-23  3496383
1  1_2017-06-24  3522347
2  1_2017-06-25  3533008
3  1_2017-06-26  3544336
4  1_2017-06-27  3580401
5  1_2017-06-28  3588784
6  1_2017-06-29  3599955
id=2
Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
rmse:  50920.9698483
Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
             id   price
0  2_2017-06-23  839224
1  2_2017-06-24  825889
2  2_2017-06-25  827267
3  2_2017-06-26  835669
4  2_2017-06-27  839934
5  2_2017-06-28  847413
6  2_2017-06-29  852226
Wall time: 9.7 s
