In [5]:
import pandas as pd 
import numpy as np
from prophet import Prophet
import datetime
%matplotlib notebook
import matplotlib.pyplot as plt


In [6]:
revenue_df = pd.read_csv('rev.csv', index_col = 0)
revenue_df.head()

Unnamed: 0,date,rev,discount
0,2019-01-01,860331.5,512860.0
1,2019-01-02,1494803.0,864634.0
2,2019-01-03,1004543.0,397134.0
3,2019-01-04,1011044.0,361481.0
4,2019-01-05,1025176.0,408829.0


## Plot the time series of total discounts. So we can find campaign days and exclude anomaly revenues from our model

In [7]:
plt.plot(revenue_df['date'], revenue_df['discount'])
revenue_df = revenue_df.drop(columns = ['discount'])

<IPython.core.display.Javascript object>

In [8]:
revenue_df = revenue_df.rename(columns = {'date':'ds', 'rev': 'y'})
revenue_df = revenue_df.sort_values(by = 'ds')
revenue_df.head(10)

Unnamed: 0,ds,y
0,2019-01-01,860331.5
1,2019-01-02,1494803.0
2,2019-01-03,1004543.0
3,2019-01-04,1011044.0
4,2019-01-05,1025176.0
5,2019-01-06,996386.3
6,2019-01-07,1168613.0
7,2019-01-08,1094639.0
8,2019-01-09,1335641.0
9,2019-01-10,1545129.0


In [9]:

campaign_days = pd.DataFrame({
  'holiday': 'campaign_week',
  'ds': pd.to_datetime([
      '2019-05-01', '2019-11-01', '2019-11-11', '2019-11-22', '2019-11-29'
  ]),
})






In [10]:
forecast = Prophet(changepoint_prior_scale = 0.001, yearly_seasonality = True, holidays = campaign_days)
forecast.fit(revenue_df)

future = forecast.make_future_dataframe(periods = (30))

results = forecast.predict(future)
results[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()


INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2019-01-01,1070009.0,73547.519886,2051232.0
1,2019-01-02,1180327.0,241645.925841,2181567.0
2,2019-01-03,1312933.0,377810.040178,2304830.0
3,2019-01-04,1316438.0,384853.543303,2322040.0
4,2019-01-05,1330909.0,333007.854244,2364153.0


In [11]:
fig1 = forecast.plot(results)

<IPython.core.display.Javascript object>

In [12]:
fig2 = forecast.plot_components(results)

<IPython.core.display.Javascript object>

In [14]:
results[['ds', 'yhat_lower', 'yhat', 'yhat_upper']].tail(30)

Unnamed: 0,ds,yhat_lower,yhat,yhat_upper
365,2020-01-01,1752263.0,2739896.0,3681972.0
366,2020-01-02,1882112.0,2876809.0,3836685.0
367,2020-01-03,1859182.0,2885252.0,3903831.0
368,2020-01-04,1883040.0,2905113.0,3836171.0
369,2020-01-05,1839769.0,2827701.0,3880156.0
370,2020-01-06,1857610.0,2836320.0,3837738.0
371,2020-01-07,1948590.0,2912965.0,3909409.0
372,2020-01-08,2006897.0,2998789.0,3984694.0
373,2020-01-09,2190357.0,3106079.0,4081035.0
374,2020-01-10,2092723.0,3084248.0,4155431.0


In [15]:
results[['ds', 'yhat_lower', 'yhat', 'yhat_upper']].to_csv('revenue_forecast.csv')