# Imports

In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorboard
import random as rand

# Load data

In [46]:
data = pd.read_csv('sales_quantity.csv',names=['date','item_code','quantity'],header=0)
data.head()

Unnamed: 0,date,item_code,quantity
0,2022-08-26,1000,15
1,2022-08-26,500,14
2,2023-01-01,8991102380706,13
3,2023-01-01,8991102381017,13
4,2023-01-01,8886008101053,20


In [47]:
# Transform data

In [48]:
#extract date features from date column
data['date'] = pd.to_datetime(data['date'])
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['day_of_week'] = data['date'].dt.dayofweek
data['day_of_year'] = data['date'].dt.dayofyear



In [59]:
#stack dataframe based on item_code
item_sales = data.groupby(['item_code','date','year','month','day','day_of_week','day_of_year'])['quantity'].sum().unstack(level=0)
#turn each NaN value to 0
item_sales = item_sales.fillna(0)
item_sales.reset_index(inplace=True)
item_sales.head()

item_code,date,year,month,day,day_of_week,day_of_year,(90)NA18210500154(91)2403,(90)NA18211207820(91)2410,00000001,00000002,...,CL000448327,CL000450943,COS LT,COSLT-228,EC0102190002,EC0102191301,EC0103190002,EC0106190101,MP-2203,SLM0958266
0,2022-01-03,2022,1,3,0,3,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2022-01-04,2022,1,4,1,4,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2022-01-05,2022,1,5,2,5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2022-01-06,2022,1,6,3,6,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2022-01-07,2022,1,7,4,7,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
item_sales.columns[7]

'(90)NA18211207820(91)2410'

In [82]:
#convert each tensor to a tensorflow dataset
dataset = [tf.data.Dataset.from_tensor_slices(item_sales[column]) for column in item_sales.columns[7:]]
dataset[0].element_spec

TensorSpec(shape=(), dtype=tf.float64, name=None)

In [0]:
#function to window the dataset
def windowed_dataset(ds, window_size):
    ds = ds.window(window_size+1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size+1))
    ds = ds.map(lambda windows: (windows[:-1], windows[-1:]))
    return ds

In [92]:
#window the dataset in batches of 7
windowed = [windowed_dataset(series, 21, 32) for series in dataset]

windowed

KeyboardInterrupt: 

In [91]:
windowed[0].take(1).as_numpy_iterator().next()

(array([[0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.]]),
 array([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]]))

In [44]:
#Tokenizer to tokenize the item codes
def create_tokenizer(item_code):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='',lower=False)
    tokenizer.fit_on_texts(item_code)
    return tokenizer
tokenizer = create_tokenizer(item_rows.iloc[5:,0])

tokenizer.word_index

{'(90)NA18210500154(91)2403': 1,
 '(90)NA18211207820(91)2410': 2,
 '00000001': 3,
 '00000002': 4,
 '00000003': 5,
 '00000008': 6,
 '00000010': 7,
 '00000011': 8,
 '00000012': 9,
 '00000013': 10,
 '00000014': 11,
 '00000015': 12,
 '00000016': 13,
 '00000017': 14,
 '00000019': 15,
 '00000020': 16,
 '00000021': 17,
 '00000022': 18,
 '00000023': 19,
 '00000024': 20,
 '00000025': 21,
 '00000026': 22,
 '00000027': 23,
 '00000030': 24,
 '00000031': 25,
 '00000032': 26,
 '00000034': 27,
 '00000035': 28,
 '00000036': 29,
 '00000037': 30,
 '00000038': 31,
 '00000039': 32,
 '00000040': 33,
 '00000041': 34,
 '00000042': 35,
 '00000044': 36,
 '00000045': 37,
 '00000046': 38,
 '00000047': 39,
 '00000048': 40,
 '00000049': 41,
 '00000050': 42,
 '00000051': 43,
 '00000052': 44,
 '00000054': 45,
 '00000057': 46,
 '00000058': 47,
 '00000059': 48,
 '00000060': 49,
 '00000061': 50,
 '00000062': 51,
 '00000063': 52,
 '00000064': 53,
 '00000065': 54,
 '00000067': 55,
 '00000069': 56,
 '00000070': 57,
 '0000

In [8]:
#convert to tensor
item_tensor = tokenizer.texts_to_sequences(item_rows.iloc[5:,0])

In [9]:
#transpose tensor to get item codes as rows
item_tensor = tf.transpose(item_tensor)

In [10]:
item_tensor[4]

<tf.Tensor: shape=(451,), dtype=float64, numpy=
array([  3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,  12.,  13.,
        14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,  24.,
        25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,  33.,  34.,  35.,
        36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,  45.,  46.,
        47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,  57.,
        58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.,
        69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,  78.,  79.,
        80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.,  89.,  90.,
        91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99., 100., 101.,
       102., 103., 104., 105., 106., 107., 108., 109., 110., 111., 112.,
       113., 114., 115., 116., 117., 118., 119., 120., 121., 124., 125.,
       126., 127., 128., 129., 130., 131., 132., 133., 134., 135., 136.,
       137., 138., 139., 140., 141., 142., 143., 144., 145., 146., 147.,
   

In [11]:
#window the dataset
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size+1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size+1))
    ds = ds.map(lambda windows: (windows[:-1], windows[-1:]))
    return ds.batch(batch_size).prefetch(1)

windowed = tf.data.Dataset.from_tensor_slices(item_tensor[5:7]).window(7, shift=1, drop_remainder=True).flat_map(lambda w: w.batch(7)).map(lambda windows: (windows[:-1], windows[-1:]))

for each in windowed:
    print(each)

In [12]:
windowed.take(1)

<_TakeDataset element_spec=(TensorSpec(shape=(None, 451), dtype=tf.float64, name=None), TensorSpec(shape=(None, 451), dtype=tf.float64, name=None))>