# Introduction
This is a notebook using machine learning to output the schedule of suppliers.
First import the data in the form of a csv file. The input of the model will be a date and the output will be a list of days corresponding to the schedule of the suppliers and when they will arrive next.

# Imports

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import datetime

# Load Dataset

In [2]:
filepath = 'supplier_schedule.csv'#supplier schedule on local drive
#filepath = '/content/drive/MyDrive/Documents/uni_work/Bangkit2023/capstone/supplier_schedule.csv'
df = pd.read_csv(filepath, dtype = {'supplier':str,'tanggal':str})
df['supplier'] = df['supplier'].str.replace(' ','')



# Data Transformation

Transform the code of suppliers into their tokenized forms using the tokenizer.

In [3]:
def supplier_tokenizer(supplier):
  """
  Instantiates the tokenizer for the supplier kodes
  Args:
    supplier (list): upper case list of supplier codes
  
  Returns:
    tokenizer (object): an instance of the Tokenizer class containing the dict
  """

  tokenizer = Tokenizer()
  tokenizer.fit_on_texts(supplier)

  index = tokenizer.word_index
  sequence = tokenizer.texts_to_sequences(supplier)

  return index, sequence

supplier_index, supplier_sequence = supplier_tokenizer(df['supplier'])

print(supplier_index)





{'mount': 1, 'inn': 2, 'sm': 3, 'kbj': 4, 'vg': 5, 'nv': 6, 'retur': 7, 'cd': 8, 'fajar': 9, 'aba': 10, 'ajw': 11, 'sk': 12, 'mara': 13, 'kampas': 14, 'okhl': 15, 'gmp': 16, 'asih': 17, 'kr': 18, 'adr': 19, 'cola': 20, 'yip': 21, 'ngetop': 22, 'pr': 23, 'mitra': 24, 'sampoerna': 25, 'cr': 26, 'uti': 27, 'bm': 28, 'enseval': 29, 'elsie': 30, 'umj': 31, 'suryam': 32, 'ds': 33, 'makdah': 34, 'ub': 35, 'esse': 36, 'dbt': 37, 'pp': 38, 'tuah': 39, 'bhk': 40, 'yusra': 41, 'wt': 42, 'tempo': 43, 'ina': 44, 'pt': 45, 'iap': 46, 'eb': 47, 'bukwati': 48, 'jakarta': 49, 'db': 50, 'tiara': 51, 'dba': 52, 'mja': 53, 'lina': 54, 'utj': 55, 'permata': 56, 'mas': 57, 'ppl': 58, 'sb': 59, 'herman': 60, 'sjma': 61, 'pgi': 62, 'ys': 63, 'jailolo': 64, 'bcrs': 65, 'aps': 66, 'lg': 67, 'elka': 68, 'bml': 69, 'mbs': 70, 'ida': 71, 'elyana': 72, 'bsp': 73, 'mutiara': 74, 'srb': 75, 'fd': 76, 'iub': 77, 'abs': 78, 'sab': 79, 'heri': 80, 'abd': 81, 'ip': 82, 'woi': 83, 'bsm': 84, 'scm': 85, 'rahmat': 86, 'kgak

In [4]:
#multi_hot encode the suppliers based of the date and if they visited on that date or not
df['supplier'] = np.array([i[0] for i in supplier_sequence],dtype = np.int64)

multi_hot = pd.get_dummies(df, columns = ['supplier'],prefix = '', prefix_sep = '')
multi_hot = multi_hot.groupby('tanggal').sum()
multi_hot['1'] = [0 for i in range(len(multi_hot['2']))]
multi_hot.insert(0,'1',multi_hot.pop('1'))
multi_hot = multi_hot.reset_index()
multi_hot

Unnamed: 0,tanggal,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,2022-01-03,0,1,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2022-01-04,0,0,1,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,2022-01-05,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2022-01-06,0,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2022-01-07,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,2023-03-28,0,0,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
386,2023-03-29,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
387,2023-03-30,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
388,2023-03-31,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
#convert date to epoch
epoch = pd.to_datetime(multi_hot['tanggal'], format = "%Y-%m-%d")
epoch = (epoch - datetime.datetime(1970,1,1))
epoch = epoch.map(lambda x: x.total_seconds())
multi_hot['tanggal'] = epoch
multi_hot


Unnamed: 0,tanggal,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,1.641168e+09,0,1,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1.641254e+09,0,0,1,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,1.641341e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1.641427e+09,0,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1.641514e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,1.679962e+09,0,0,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
386,1.680048e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
387,1.680134e+09,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
388,1.680221e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
#sort from multi_hot date
multi_hot = multi_hot.sort_values('tanggal')#, ascending= False)
multi_hot = multi_hot.reset_index()
multi_hot.pop('index')
multi_hot

Unnamed: 0,tanggal,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,1.641168e+09,0,1,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1.641254e+09,0,0,1,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,1.641341e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1.641427e+09,0,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1.641514e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,1.679962e+09,0,0,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
386,1.680048e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
387,1.680134e+09,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
388,1.680221e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
#add previous date column
def previous_date(dataframe):
  """
  Adds a column containing the previous date
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the previous date
  """
  dataframe['previous_tanggal'] = dataframe['tanggal'].shift(1)
  return dataframe
multi_hot = previous_date(multi_hot)
multi_hot

Unnamed: 0,tanggal,1,2,3,4,5,6,7,8,9,...,191,192,193,194,195,196,197,198,199,previous_tanggal
0,1.641168e+09,0,1,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
1,1.641254e+09,0,0,1,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,1.641168e+09
2,1.641341e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.641254e+09
3,1.641427e+09,0,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1.641341e+09
4,1.641514e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.641427e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,1.679962e+09,0,0,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1.679875e+09
386,1.680048e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.679962e+09
387,1.680134e+09,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.680048e+09
388,1.680221e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.680134e+09


In [8]:
#Get the time difference between the current date and the previous date and convert it to days, keep the first row as 0
def time_diff(dataframe):
  """
  Adds a column containing the time difference between the current date and the previous date
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the time difference between the current date and the previous date
  """
  dataframe['time_diff'] = dataframe['tanggal'] - dataframe['previous_tanggal']
  dataframe['time_diff'] = dataframe['time_diff'].map(lambda x: x / 86400)
  dataframe['time_diff'] = dataframe['time_diff'].fillna(1)
  return dataframe
multi_hot = time_diff(multi_hot)
multi_hot


Unnamed: 0,tanggal,1,2,3,4,5,6,7,8,9,...,192,193,194,195,196,197,198,199,previous_tanggal,time_diff
0,1.641168e+09,0,1,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,,1.0
1,1.641254e+09,0,0,1,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,1.641168e+09,1.0
2,1.641341e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1.641254e+09,1.0
3,1.641427e+09,0,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,1.641341e+09,1.0
4,1.641514e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1.641427e+09,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,1.679962e+09,0,0,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1.679875e+09,1.0
386,1.680048e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1.679962e+09,1.0
387,1.680134e+09,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1.680048e+09,1.0
388,1.680221e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1.680134e+09,1.0


In [9]:
#swap values of 0 and 1 in the first row of the dataframe (because the first row is the lamultihot date for supplier visits)
def swap_0_1(dataframe):
  """
  Swaps the values of 0 and 1 in the first row of the dataframe
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the swapped values of 0 and 1 in the first row of the dataframe
  """
  for i in range(1, len(dataframe.columns)):
    if dataframe.iloc[0,i] == 0:
      dataframe.iloc[0,i] = 1
    else:
      dataframe.iloc[0,i] = 0
  return dataframe
multi_hot = swap_0_1(multi_hot)
multi_hot

Unnamed: 0,tanggal,1,2,3,4,5,6,7,8,9,...,192,193,194,195,196,197,198,199,previous_tanggal,time_diff
0,1.641168e+09,1,0,1,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,0.000000e+00,0.0
1,1.641254e+09,0,0,1,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,1.641168e+09,1.0
2,1.641341e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1.641254e+09,1.0
3,1.641427e+09,0,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,1.641341e+09,1.0
4,1.641514e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1.641427e+09,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,1.679962e+09,0,0,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1.679875e+09,1.0
386,1.680048e+09,0,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1.679962e+09,1.0
387,1.680134e+09,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1.680048e+09,1.0
388,1.680221e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1.680134e+09,1.0


In [11]:
#in order to determine days till next visit, we need to set the rows of each column that has 0 to time_diff + previous value, if value is 1, then set it to 0 (because suppier visited on that day)
def days_till_next_visit(dataframe):
  """
  Adds a column containing the days till the next visit
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the days till the next visit
  """
  dataframe = dataframe.copy()
  for i in range(1, len(dataframe.columns)-1):
    for j in range(1, len(dataframe)):
      if dataframe.iloc[j,i] == 0:
        dataframe.iloc[j,i] = dataframe.iloc[j - 1,i] + dataframe.iloc[j,dataframe.columns.get_loc('time_diff')]
      else:
        dataframe.iloc[j,i] = 0
  return dataframe
multi_hot = days_till_next_visit(multi_hot)
multi_hot

Unnamed: 0,tanggal,1,2,3,4,5,6,7,8,9,...,192,193,194,195,196,197,198,199,previous_tanggal,time_diff
0,1.641168e+09,1,0,1,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,0.0,0.0
1,1.641254e+09,2,1,0,1,1,0,2,0,0,...,2,2,2,2,2,2,2,2,0.0,1.0
2,1.641341e+09,3,0,0,2,0,1,3,1,1,...,3,3,3,3,3,3,3,3,0.0,1.0
3,1.641427e+09,4,1,0,0,1,0,4,2,2,...,4,4,4,4,4,4,4,4,0.0,1.0
4,1.641514e+09,5,2,1,1,2,1,5,3,3,...,5,5,5,5,5,5,5,5,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,1.679962e+09,450,1,0,0,0,4,203,4,0,...,332,314,225,223,197,171,113,11,0.0,1.0
386,1.680048e+09,451,0,0,1,0,5,204,5,1,...,333,315,226,224,198,172,114,12,0.0,1.0
387,1.680134e+09,452,1,1,0,1,6,205,6,2,...,334,316,227,225,199,173,115,13,0.0,1.0
388,1.680221e+09,453,2,2,1,2,7,206,7,3,...,335,317,228,226,200,174,116,14,0.0,1.0


# Model Architecture

# Analysis