In [1]:
import os
import pandas as pd

path="c:\\temp\\python_play_ground\\sensor\\archive"
order_items = pd.read_csv(os.path.join(path, 'olist_order_items_dataset.csv'))
orders = pd.read_csv(os.path.join(path, 'olist_orders_dataset.csv'))
orders = orders[['order_id', 'order_purchase_timestamp']]
data = pd.merge(order_items, orders, on='order_id')

data_long = data[['product_id', 'order_purchase_timestamp', 'order_item_id']].copy()

data_long['order_purchase_timestamp'] = pd.to_datetime(data_long['order_purchase_timestamp']).dt.normalize()
data_long = data_long[data_long['order_purchase_timestamp'] >= '2017-01-01']
data_long['week_start_date'] = (data_long['order_purchase_timestamp'] + pd.Timedelta(days=1)).apply(lambda x: x - pd.offsets.Week(weekday=6))

data_grouped = data_long.groupby(['product_id', 'week_start_date'])['order_item_id'].sum().reset_index()

data_grouped = data_grouped.rename(columns={'order_item_id': 'quantity_sold'})

top100 = data_grouped['product_id'].value_counts().head(100).index
data_grouped = data_grouped[data_grouped['product_id'].isin(top100)]

data_pivoted = data_grouped.pivot(index='product_id', columns='week_start_date', values='quantity_sold').fillna(0)

data_long = data_pivoted.stack().reset_index()
data_long = data_long.rename(columns={'level_1': 'week_start_date', 0: 'quantity_sold'})

assert data_long.groupby('product_id').size().describe()['std'] == 0

data_long = data_long.rename(columns={'week_start_date': 'ds', 'quantity_sold': 'y', 'product_id': 'unique_id'})

train = data_long[data_long['ds'] < '2018-01-01']
valid = data_long[(data_long['ds'] >= '2018-01-01') & (data_long['ds'] < '2018-03-01')]
h = valid['ds'].nunique()
print('h =', h)

print('train:', train)
print('valid:', valid)

h = 8
train:                              unique_id         ds    y
0     0152f69b6cf919bcdaf117aa8c43e5a2 2017-01-08  0.0
1     0152f69b6cf919bcdaf117aa8c43e5a2 2017-01-15  0.0
2     0152f69b6cf919bcdaf117aa8c43e5a2 2017-01-22  0.0
3     0152f69b6cf919bcdaf117aa8c43e5a2 2017-01-29  0.0
4     0152f69b6cf919bcdaf117aa8c43e5a2 2017-02-05  0.0
...                                ...        ...  ...
8561  fc1d8637c0268af3db482c14b7ef8e75 2017-12-03  1.0
8562  fc1d8637c0268af3db482c14b7ef8e75 2017-12-10  1.0
8563  fc1d8637c0268af3db482c14b7ef8e75 2017-12-17  0.0
8564  fc1d8637c0268af3db482c14b7ef8e75 2017-12-24  1.0
8565  fc1d8637c0268af3db482c14b7ef8e75 2017-12-31  1.0

[5200 rows x 3 columns]
valid:                              unique_id         ds    y
52    0152f69b6cf919bcdaf117aa8c43e5a2 2018-01-07  2.0
53    0152f69b6cf919bcdaf117aa8c43e5a2 2018-01-14  1.0
54    0152f69b6cf919bcdaf117aa8c43e5a2 2018-01-21  1.0
55    0152f69b6cf919bcdaf117aa8c43e5a2 2018-01-28  0.0
56    0152f69b6cf919

In [2]:
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, WindowAverage, SeasonalWindowAverage

model = StatsForecast(models=[Naive(), 
                              SeasonalNaive(season_length=4), 
                              WindowAverage(window_size=4), 
                              SeasonalWindowAverage(window_size=2, season_length=4)],
                      freq='W', n_jobs=-1)

  from tqdm.autonotebook import tqdm


In [3]:
model.fit(train)

StatsForecast(models=[Naive,SeasonalNaive,WindowAverage,SeasWA])

In [4]:
p = model.forecast(h=h)



In [8]:
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, HoltWinters

model = StatsForecast(models=[AutoARIMA(season_length=4),
                              HoltWinters(season_length=4, error_type='A')],
                      freq='W', n_jobs=-1)

In [9]:
model.fit(train)

p = model.forecast(h=h, level=[90])

cols = p.columns[1:]
p.loc[:, cols] = p.loc[:, cols].clip(0)
p = p.reset_index().merge(valid, on=['ds', 'unique_id'], how='left')

