## Load data & libraries

In [3]:
import xgboost as xgb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from bayes_opt import BayesianOptimization

In [25]:
df = pd.read_csv("../dataset/interview-test.csv")
df.head(3)

Unnamed: 0,salesdate,menuid,menuname,menu_group,qty_total
0,2022-10-09,518,Premium Beef Deal,0,1
1,2022-10-09,518,Premium Beef Deal,0,1
2,2022-10-09,518,Premium Beef Deal,0,1


- salesdate: the day sales happen
- menuid: menu identification for the menu. (sometimes same menuname not gruanted to have identical menuid)
- menuname: name of the menu. (FS means flash sales, B1G1 means buy 1 get 1, etc)
- qty_total: total of menu ordered

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8902 entries, 2022-10-09 to 2022-12-02
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   menuid      8902 non-null   int64 
 1   menuname    8902 non-null   object
 2   menu_group  8902 non-null   object
 3   qty_total   8902 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 347.7+ KB


In [34]:
df.describe(include = ['object','int','float']).T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
menuid,8902.0,,,,457.300719,238.769876,1.0,234.0,515.0,638.0,994.0
menuname,8902.0,370.0,Plastic Spoon,191.0,,,,,,,
menu_group,8902.0,63.0,0,4665.0,,,,,,,
qty_total,8902.0,,,,1.132105,0.609869,1.0,1.0,1.0,1.0,21.0


In [27]:
df.shape

(8902, 5)

## Preprocessing data for EDA

In [28]:
menu_grup_unique = df["menu_group"].unique()
menu_name_unique = df["menuname"].unique()

In [29]:
df['new_col'] = list(zip(df.menu_group, df.menuname))

"""
menu_dict = {menu_group: [menu_name]}. menu dict is used to know how many different menu_name in each menu_group
"""

menu_dict = {}
for menu_zipped in df['new_col'].unique():
    for i,data in enumerate(menu_zipped):
        if(i == 0):
            key = data
            if(key not in menu_dict.keys()):
                menu_dict[key] = list()
        if(i == 1):
            if(data not in menu_dict[key]):
                menu_dict[key].append(data)
df.drop('new_col',axis = 1,inplace = True)

In [30]:
for menu_group,menu_name in menu_dict.items():
    print(f"{menu_group} : {menu_name}", end="\n")

0 : ['Premium Beef Deal', '[FS] Mac and Cheese', 'Plastic Spoon', '[FS] Sei Ayam Regular', 'Sambal Belimbing Wuluh', 'Sei Ayam Regular', 'Truffle Oil', 'Free Mac and Cheese', 'Caramelised Butter Chicken Rice', 'Premium Beef Deal (20K)', 'Plain Rice', 'JUJUTIZEN SET', 'Juju Daebak', 'Korean Seasoned Rice', 'Honey Butter Soy', 'Samyang', 'Seaweed Pepper', 'JUJU PARTY', 'Garlic Parmesan', 'Chefs Special', 'Voucher Red Rider Set', 'RED RIDER BUNDLE', 'Teriyaki Gyudon', 'Sayur Singkong + Telur Barendo - Longboard', 'Ninniku Gyudon', 'Sei Sultan Embe Regular', 'Sayur Daun Singkong + Telur Barendo - Small', 'Double Sayur Daun Singkong - Small', 'NaSei Goreng Pedas', 'Truffle Duo', 'Tambah 20RB dapat RYUJIN BEEF BOWL', 'Tambah 15RB dapat SEKFAN CHICKEN RICE BOWL', 'Sekfan Weekly Best Seller', 'Ryujin weekly Best Seller', '(ALC)Caramelised Butter Chicken', 'Legit Group Care', 'Sayur Daun Singkong + Telur Mata Sapi', 'Sambal Korek', 'Sambal Matah | Level 2 Judes Nagih', 'Tahu Crispy Cereal', 'Ma

- Berdasarkan data pada variabel <b>menu_dict</b>, secara keseluruhan data pada kolom <b>menu_group</b> sudah mewakili jenis menu makanan itu sendiri dan pada data <b>menu_name</b> merupakan tambahan dari menu yang terdapat pada <b>menu_group</b> seperti adanya <i>flash sale</i>,  <i>buy 1 get 1</i>, tambahan porsi, dll. Oleh karena itu, perancangan model <i>demand forecasting</i> dilakukan pada <b>menu_group</b>
- Based on the data in the <b>menu_dict</b> variable, overall, the data in the <b>menu_group</b> column represents the type of food menu itself, and the <b>menu_name</b> data is an additional description for the menus in <b>menu_group</b> , such as <i>flash sale</i>, <i>buy 1 get 1</i>, <i>extra portion</i>, etc. Therefore, the design of the demand forecasting model is performed based on the <b>menu_group</b>.

In [31]:
df["salesdate"] = pd.to_datetime(df["salesdate"],format="%Y-%m-%d")
df.set_index("salesdate",inplace = True)

## EDA