# Introduction
This is a notebook using machine learning to output the schedule of suppliers.
First import the data in the form of a csv file. The input of the model will be a date and the output will be a list of days corresponding to the schedule of the suppliers and when they will arrive next.

# Imports

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
import datetime

# Load Dataset

In [2]:
filepath = 'supplier_schedule.csv'#supplier schedule on local drive
#filepath = '/content/drive/MyDrive/Documents/uni_work/Bangkit2023/capstone/supplier_schedule.csv'
df = pd.read_csv(filepath, dtype = {'supplier':str,'tanggal':str})
df['supplier'] = df['supplier'].str.replace(' ','')
#remove all punctuation from the supplier names
df['supplier'] = df['supplier'].str.replace('.','')


FileNotFoundError: ignored


# Data Transformation

Transform the code of suppliers into their tokenized forms using the tokenizer.

In [None]:
#multi_hot encode the suppliers based of the date and if they visited on that date or not
#df['supplier'] = np.array([i[0] for i in supplier_sequence],dtype = np.int64)

multi_hot = pd.get_dummies(df, columns = ['supplier'],prefix = '', prefix_sep = '')
multi_hot = multi_hot.groupby('tanggal').sum()
multi_hot = multi_hot.reset_index()
multi_hot

In [None]:
#convert date to epoch
epoch = pd.to_datetime(multi_hot['tanggal'], format = "%Y-%m-%d")
epoch = (epoch - datetime.datetime(1970,1,1))
epoch = epoch.map(lambda x: x.total_seconds())
multi_hot['tanggal'] = epoch
multi_hot


In [None]:
#sort from multi_hot date
multi_hot = multi_hot.sort_values('tanggal', ascending= True)
multi_hot = multi_hot.reset_index()
multi_hot.pop('index')
multi_hot

In [None]:
#add previous date column
def previous_date(dataframe):
  """
  Adds a column containing the previous date
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the previous date
  """
  dataframe['previous_tanggal'] = dataframe['tanggal'].shift(1)
  return dataframe
multi_hot = previous_date(multi_hot)


In [None]:
#Get the time difference between the current date and the previous date and convert it to days, keep the first row as 0
def time_diff(dataframe):
  """
  Adds a column containing the time difference between the current date and the previous date
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the time difference between the current date and the previous date
  """
  dataframe['time_diff'] = abs(dataframe['tanggal'] - dataframe['previous_tanggal'])
  dataframe['time_diff'] = dataframe['time_diff'].map(lambda x: x / 86400)
  dataframe['time_diff'] = dataframe['time_diff'].fillna(1)
  return dataframe
multi_hot = time_diff(multi_hot)
multi_hot.pop('previous_tanggal')


In [None]:
#dictionary of sum of 1s for each supplier
def supplier_dict(dataframe):
  """
  Creates a dictionary containing the sum of 1s for each supplier
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    supplier_dict (dict): dictionary containing the sum of 1s for each supplier
  """
  supplier_dict = {}
  for i in range(1, len(dataframe.columns)):
    supplier_dict[dataframe.columns[i]] = dataframe[dataframe.columns[i]].sum()
  return supplier_dict
supplier_dict = supplier_dict(multi_hot)
supplier_dict

In [None]:
#swap values of 0 and 1 in the first row of the dataframe (because the first row is the lamultihot date for supplier visits)
def swap_0_1(dataframe):
  """
  Swaps the values of 0 and 1 in the first row of the dataframe
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the swapped values of 0 and 1 in the first row of the dataframe
  """
  for i in range(1, len(dataframe.columns)):
    if dataframe.iloc[0,i] == 0:
      dataframe.iloc[0,i] = -1
    else:
      dataframe.iloc[0,i] = 0
  return dataframe
multi_hot = swap_0_1(multi_hot)
multi_hot

In [None]:
#update each column
def days_since_last_visit(dataframe):
  """
  Adds a column containing the days till the next visit
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the since last visit limited to a max of 365
  """
  dataframe = dataframe.copy()
  for i in range(1, len(dataframe.columns)-1):
    for j in range(1, len(dataframe)):
      if dataframe.iloc[j,i] == 0 and dataframe.iloc[j - 1,i] == -1:
        dataframe.iloc[j,i] = -1
      elif dataframe.iloc[j,i] == 0:
        dataframe.iloc[j,i] = dataframe.iloc[j - 1,i] + dataframe.iloc[j,dataframe.columns.get_loc('time_diff')]
      else:
        dataframe.iloc[j,i] = 0
  return dataframe
days_diff = days_since_last_visit(multi_hot)
days_diff

In [None]:
#Remove any columns of suppliers that have not visited in the last 186 days or if all the rows are -1
def remove_unvisited(dataframe):
  """
  Removes any columns of suppliers that have not visited in the last 186 days or if all the rows are -1
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the suppliers that have visited in the last 186 days
  """
  changed = dataframe.copy()
  for i in range(1, len(dataframe.columns)-1):
    if dataframe.iloc[-1,i] == -1:
      dataframe.pop(dataframe.columns[i])
  return dataframe
days_diff = remove_unvisited(days_diff)
days_diff.pop('time_diff')
multi_hot.pop('time_diff')

In [None]:
def supplier_tokenizer(supplier):
  """
  Instantiates the tokenizer for the supplier codes
  Args:
    supplier (list): upper case list of supplier codes

  Returns:
    tokenizer (object): an instance of the Tokenizer class containing the dict
  """

  tokenizer = Tokenizer()
  tokenizer.fit_on_texts(supplier)

  index = tokenizer.word_index
  sequence = tokenizer.texts_to_sequences(supplier)

  return index, sequence

supplier_index, supplier_sequence = supplier_tokenizer(multi_hot.columns[1:])

print(supplier_index)





In [None]:
#rename columns to supplier codes from supplier index
def rename_columns(dataframe):
  """
  Renames the columns to supplier codes from supplier index
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the renamed columns
  """
  dataframe = dataframe.copy()
  for i in range(1, len(dataframe.columns)):
    dataframe = dataframe.rename(columns = {dataframe.columns[i]: supplier_index[dataframe.columns[i].lower()]})
  return dataframe
renamed = rename_columns(multi_hot)
renamed

In [None]:
#convert dataframe to tensor
def convert_to_tensor(dataframe):
  """
  Converts the dataframe to a tensor
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (tensor): tensor containing the dataframe
  """
  tensor = tf.convert_to_tensor(dataframe)
  return tensor
renamed = convert_to_tensor(renamed)
renamed

In [None]:
#split first column of tensor into features and rest into labels
def split_tensor(dataframe):
  """
  Splits the first column of the tensor into features and rest into labels
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (tensor): tensor containing the features and labels
  """
  features = dataframe[:,0]
  labels = dataframe[:,1:]
  return features, labels
features, labels = split_tensor(renamed)
features


In [None]:
#create tensor dataset
def create_dataset(features, labels):
  """
  Creates a tensor dataset
  Args:
    features (tensor): tensor containing the features
    labels (tensor): tensor containing the labels

  Returns:
    dataset (object): an instance of the TensorSliceDataset class
  """
  dataset = tf.data.Dataset.from_tensor_slices((features, labels))
  return dataset
dataset = create_dataset(features, labels)
dataset


# Model Architecture

In [None]:
#create a model with variable learning rate
def create_model(learning_rate):
  """
  Creates a model with variable learning rate
  Args:
    learning_rate (float): learning rate for the model

  Returns:
    model (object): an instance of the Sequential class
  """
  model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(1,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(len(supplier_index), activation='sigmoid')
  ])

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                loss='BinaryCrossentropy',
                metrics=['accuracy'])
  return model


In [None]:
#compile models with different learning rates
learning_rates = [0.001, 0.01, 0.1, 0.2, 0.3]
models = []
for learning_rate in learning_rates:
  models.append(create_model(learning_rate))

#determine the best learning rate
histories = []
for model in models:
  history = model.fit(train_dataset.batch(1), epochs=10, validation_data=val_dataset.batch(1), verbose=1)
  histories.append(history)


# Analysis

In [None]:
#plot the loss and accuracy for each learning rate
def plot_loss(history):
  """
  Plots the loss and accuracy for each learning rate
  Args:
    history (object): an instance of the History class

  Returns:
    None
  """
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Error [MPG]')
  plt.ylim([0, 10])
  plt.legend()
  plt.grid(True)

for i in range(len(histories)):
    plot_loss(histories[i])
    plt.title('Learning Rate: ' + str(learning_rates[i]))
    plt.show()



In [None]:
#choose the best learning rate
best_learning_rate = 0.001
best_model = create_model(best_learning_rate)
best_model.fit(train_dataset.batch(1), epochs=100, validation_data=val_dataset.batch(1), verbose=1)


In [None]:
#plot the loss and accuracy for the best model
plot_loss(best_model.history)
plt.title('Best Model')
plt.show()

In [None]:
#evaluate the best model on the test dataset
loss, accuracy = best_model.evaluate(test_dataset.batch(1), verbose=1)
print("Accuracy", accuracy)


In [None]:
#predict the next visit for each supplier
def predict_next_visit(dataframe):
  """
  Predicts the next visit for each supplier
  Args:
    dataframe (dataframe): dataframe containing the dates

  Returns:
    df (dataframe): dataframe containing the predicted next visit for each supplier
  """
  dataframe = dataframe.copy()
  for i in range(1, len(dataframe.columns)):
    dataframe.iloc[-1,i] = best_model.predict(dataframe.iloc[-1,0])
  return dataframe
predicted = predict_next_visit(renamed)
predicted
