In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
from os.path import join
import matplotlib.pyplot as plt
import datetime as dt
import re


## Local Load

In [4]:
# path = join(os.getcwd(), "data")
offline_raw = pd.read_excel("./9._offline_total.xlsx")

## Colab Load

In [5]:
# from google.colab import drive
# drive.mount('/content/drive')

# # 데이터 불러오기
# offline_df = pd.read_excel('/content/drive/MyDrive/9. offline_total.xlsx')

# plt.rcParams['font.family'] = 'AppleGothic' # 폰트 변경
# plt.rcParams['axes.unicode_minus'] = False # 축 값 마이너스 깨짐 해결

In [6]:
def convert_to_nan(data):
    
    df = data.copy()
    
    columns = df.columns.to_list()
    for col in columns:
        df[col] = df[col].apply(lambda x: np.nan if x == "-" else x)
    
    return df


def drop_columns(data):
    
    df = data.copy()
    
    rm_columns = ["온라인 스토어", "사용 포인트", "적립 포인트", "사용 선불권", "배달팁(매출 포함x)", "결제메모", "주문 채널"]
    df = df.drop(rm_columns, axis = 1)
    
    return df


def date_conversion(data):
    
    df = data.copy()
    
    df["결제일시"] = df.loc[:, "결제일"] + " " + df.loc[:, "결제시간"]
    df["결제일시"] = pd.to_datetime(df["결제일시"])
    df["year"] = df["결제일시"].apply(lambda x: x.year)
    df["month"] = df["결제일시"].apply(lambda x: x.month)
    df["day"] = df["결제일시"].apply(lambda x: x.day)
    df["hour"] = df["결제일시"].apply(lambda x: x.hour)
    df["day_name"] = df["결제일시"].apply(lambda x: x.day_name())
    df["year_month"] = pd.to_datetime(df["결제일"]).dt.strftime("%Y-%m")
    
    return df


def add_weekend(data):
    
    df = data.copy()
    
    # 평일, 주말 구분
    # 0 = 평일, 1 = 주말
    df["is_weekend"] = df["day_name"].apply(lambda x: 1 if (x == "Sunday") | (x == "Saturday") else 0)
    
    return df


def add_season(data):
    
    df = data.copy()
    
    # 계절 추가하기
    # 봄(3~5월) = 1, 여름(6~8월) = 2, 가을(9~11월0) = 3, 겨울(12~2월) = 4
    seasons = [1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1]
    season_dict = dict(zip(range(1,13), seasons))
    df["season"] = df["month"].map(season_dict)

    return df


def add_holiday(data):
    
    df = data.copy()
    
    conditionlist = [
        (df['결제일'] == '2022-03-01') | (df['결제일'] == '2022-05-05') | (df['결제일'] == '2022-05-08') |
        (df['결제일'] == '2022-06-06') | (df['결제일'] == '2022-08-15') | (df['결제일'] == '2022-09-09') |
        (df['결제일'] == '2022-09-10') | (df['결제일'] == '2022-09-11') | (df['결제일'] == '2022-10-09') |
        (df['결제일'] == '2022-10-03') | (df['결제일'] == '2022-12-25') | 
        (df['결제일'] == '2023-01-01') | (df['결제일'] == '2023-01-21') | (df['결제일'] == '2023-01-22') |
        (df['결제일'] == '2023-01-23') | (df['결제일'] == '2023-03-01') | (df['결제일'] == '2023-05-05') |
        (df['결제일'] == '2023-05-26') | (df['결제일'] == '2023-06-06')]

    choicelist = [1]
    df['is_holiday'] = np.select(conditionlist, choicelist, default= 0)
    df["weekend_n_holiday"] = df["is_weekend"] + df["is_holiday"]
    
    return df

def drop_row(data):
    
    df = data.copy()
    df["상품명"] = df["상품명"].apply(lambda x: re.sub(r"\s", "", x))
    
    drop_lst = ['야외',
                '포장',
                '무료시음권', 
                '캐리어',
                '종이백',
                '포크',
                '⚪️',
                '⚪', # 위 emoji 와 별개
                '일회용컵',
               ]

    custom_lst = ['덜달게',
                  '1샷추가', 
                  '오틀리', 
                  '연하게', 
                  '시럽', 
                  '얼음적게',
                  '오트사이드', 
                  '물적게', 
                  '바닐라시럽', 
                  '2샷추가',
                  '얼음X',
                  '샷추가',
                 ]

    idx = df.loc[df["상품명"].str.contains("|".join(drop_lst)),"상품명"].index
    df = df.drop(idx, axis = 0).reset_index(drop = True)
    
    idx = df.loc[df["상품명"].str.contains("|".join(custom_lst)),"상품명"].index
    df.loc[idx, "카테고리"] = "커스텀"
    
    return df


In [7]:
def preprocess_productname(data):
    tmp = data.copy()
    tmp["카테고리"] = tmp["카테고리"].apply(lambda x: re.sub(r"\s", "", x))
    pattern = r'\s*_\s*'
    
    
    # 카테고리 - basic_ice
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"플랫_", "플랫화이트_", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"템플", "I", x))

    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"][tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
        
    # 카테고리 - basic
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(r"\s|\(H\)", "", x))
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains("아메리카노|카페라떼|플랫화이트|카푸치노|바닐라라떼")].index
    tmp.loc[idx, "상품명"] = tmp.loc[idx, "상품명"].apply(lambda x: "(H)"+x)
    
    
    # 카테고리 - 시그니처
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"아이스텐라", "아이스텐저린라떼", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"유자아메리카노|아이스유자아메리카노", "아이스유자아메리카노", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"][tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[tmp["상품명"] == "텐저린카푸치노", "상품명"] = "텐저린카푸치노_쥬시"
    tmp.loc[tmp["상품명"] == "아이스텐저린라떼", "상품명"] = "아이스텐저린라떼_쥬시"
    tmp.loc[tmp["상품명"] == "아이스유자아메리카노", "상품명"] = "아이스유자아메리카노_쥬시"
        
    # 카테고리 - beverage
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    
    rename_dict = {"차가운어린이우유": "(I)어린이우유",
                   "따뜻한어린이우유": "(H)어린이우유",
                   
                   "얼그레이밀크티": "(H)얼그레이밀크티",
                   
                   "제주유기농귤피주스" : "(I)제주유기농귤피주스",
                   "문경선암리사과주스" : "(I)문경선암리사과주스",
                   "제주유기농감귤주스" : "(I)제주유기농감귤주스",
                   "어린이감귤주스" : "(I)어린이감귤주스",
                   
                   "시나몬플럼" : "(H)시나몬플럼",
                   "트로피칼루이보스" : "(H)트로피칼루이보스",
                   "카모마일" : "(H)카모마일"
                  }
    
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x: rename_dict[x] if x in rename_dict.keys() else x)
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].value_counts()
    
    
    # 카테고리 - 디저트
    tmp.loc[tmp["카테고리"] == "디저트", "상품명"] = tmp.loc[tmp["카테고리"] == "디저트", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 블랜딩원두
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 = 세트
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub(r"\s", '', x))
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub("Set.", "", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "세트", "상품명"][tmp.loc[tmp["카테고리"] == "세트", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"] = tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"].apply(lambda x : "(H)" + x)
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x: "Set_" + x)
    
    
    # 카테고리 - 드립백/캡슐
    tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"] = tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 에스프레소
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_", "스페셜_", "샘플_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"][tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])

    # 카테고리 - 핸드드립, 싱글원두
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].str.strip()
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\)\s', ")", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s*:\s*', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s+', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(강배전\)|\(강\)', "강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(중강배전\)|\(중\)', "중강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(디카프\)', "디카프", x))
    
    tmp["상품명_원산지"] = tmp["상품명"].copy()   
    idx = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"][tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.contains("ㅡ")].index
    tmp = tmp.drop(idx, axis = 0).reset_index(drop = True)

    rename_dict = {"디카페인 우일라 200g" : "디카페인 콜롬비아 우일라 200g",
                   "디카페인콜롬비아 리치 200g" : "디카페인 콜롬비아 리치 200g",
                   "엘리다 카투아이 100g" : "파나마 엘리다 카투아이 100g",
                   "엘리다 카투아이 ASD 100g" : "파나마 엘리다 카투아이 100g",
                   "엘파라이소 디카프 100g" : "콜롬비아 엘파라이소 디카프 100g",
                   "엘파라이소 리치 100g" : "콜롬비아 엘파라이소 리치 100g",
                   "엘파라이소 리치" : "콜롬비아 엘파라이소 리치",
                   "부산제 200g" : "르완다 부산제 200g",
                   "르완다부산제 200g" : "르완다 부산제 200g",
                   "에콰100g" : "에콰도르 100g",
                   "세로아줄 게이샤" : "콜롬비아 세로아줄 게이샤",
                   "페루게이샤" : "페루 게이샤",
                   "페루게이샤 100g" : "페루 게이샤 100g",
                   "니카라과강배전" : "니카라과 강배전",
                   "케냐키티투 200g" : "케냐 키티투 200g",
                   "케냐캄왕기. 200g" : "케냐 캄왕기 200g",
                   "쿠쿠세" : "에티오피아 쿠쿠세",
                   "(할인) 케냐카루만디 200g" : "(할인) 케냐 카루만디 200g",
                   "니카라과핀카케냐바티안" : "니카라과 핀카케냐바티안",
                   "온다라스 엘 케브라초 파라이네마 200g" : "온두라스 엘 케브라초 파라이네마 200g",
                   "페루엘사포테 200g" : "페루 엘사포테 200g",
                   "니카라과리틀 레드 200g" : "니카라과 리틀 레드 200g",
                   "(디카페인)콜롬비아 리치 200g" : "(디카페인) 콜롬비아 리치 200g",
                   "(디카페인)콜롬비아 리치 100g" : "(디카페인) 콜롬비아 리치 100g",
                   "콜룸비니 엘 파라이소 리치 100g" : "콜롬비아 엘 파라이소 리치 100g",
                   "(할인) 디카프 / 콜롬비아 엘 파라이소 리치" : "(할인) 디카프 콜롬비아 엘 파라이소 리치",
                   "[로우카페인] 시티트래블러" : "시티트래블러 로우카페인",

                   "(I)디카프_에티오피아" : "(I)에티오피아_디카프",
                   "(H)디카프_에티오피아" : "(H)에티오피아_디카프",
                   "(H)과테_레드_파카마라" : "(H)과테말라_레드_파카마라",
                   "(I)과테_레드_파카마라" : "(I)과테말라_레드_파카마라",
                   "(I)과테말라엘모리또" : "(I)과테말라_엘모리또",
                   "(H)과테말라엘모리또" : "(H)과테말라_엘모리또",
                   "(H)케냐띠리쿠" : "(H)케냐_띠리쿠",
                   "(H)콰트로_콜롬비아" : "(H)콜롬비아_콰트로",
                   "(I)콰트로_콜롬비아" : "(I)콜롬비아_콰트로",
                   "(H)디카페인_콜롬비아" : "(H)콜롬비아_디카페인",
                   "(I)디카페인_콜롬비아" : "(I)콜롬비아_디카페인",
                   "(H)디카페인_니카라과" : "(H)니카라과_디카페인",
                   "(I)디카페인_니카라과" : "(I)니카라과_디카페인",
                   "(I)디카페인_에티오피아" : "(I)에티오피아_디카페인",
                   "(H)디카페인_에티오피아" : "(H)에티오피아_디카페인",
                   "(I)오늘의커피" : "(I)오늘의_커피",
                   "(H)오늘의커피" : "(H)오늘의_커피"
                }
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[:, "상품명_원산지"] = tmp.loc[:, "상품명_원산지"].replace(rename_dict)
    
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub("예맨", "예멘", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x: x.split("_")[0])
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: re.sub(pattern, " ", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub("[()]", "", x)) 
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"_?[0-9]*g|할인|강배전", "", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"\s", "_", x))
    
    lst = ["디카페인_", "디카프_", "콰트로_"]
    for i in lst:
        tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x[len(i):] + "_" + i[:-1] if x[:len(i)] == i else x)
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x.split("_")[0])

    
    return tmp

In [88]:
pd.options.display.max_columns = None

# offline_raw = pd.read_excel(join(path, "9. offline_total.xlsx"))
offline_df = convert_to_nan(offline_raw)

offline_df = drop_columns(offline_df)
offline_df = date_conversion(offline_df)
offline_df = add_weekend(offline_df)
offline_df = add_season(offline_df)
offline_df = add_holiday(offline_df)
offline_df = preprocess_productname(offline_df)
offline_df = drop_row(offline_df)

offline_df



Unnamed: 0,결제일,결제시간,결제내역,합계,상품별 할인,결제 할인,카드 결제,현금 결제,간편 결제,기타 결제,환불,환불 일시,카테고리,상품명,옵션,수량,상품별 단가,상품별 합계,결제일시,year,month,day,hour,day_name,year_month,is_weekend,season,is_holiday,weekend_n_holiday,상품명_원산지
0,2022-02-07,19:28:53,샘플 캐모마일,,,,,,,,4500.0,2022-02-07 19:29:37,에스프레소,캐모마일_샘플,,1,4500,,2022-02-07 19:28:53,2022,2,7,19,Monday,2022-02,0,1,0,0,캐모마일_샘플
1,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(I)콜롬비아_로꼬_소르베,,1,10500,10500.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0,콜롬비아
2,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(H)니카라과_COE#1,,1,12000,12000.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0,니카라과
3,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(H)과테_레드_파카마라,,1,10000,10000.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0,과테말라
4,2022-02-10,10:13:57,아이스 텐저린 라떼 외 2건,20000.0,,,20000.0,,,,,,시그니처,아이스텐저린라떼_쥬시,,1,7000,7000.0,2022-02-10 10:13:57,2022,2,10,10,Thursday,2022-02,0,1,0,0,아이스텐저린라떼_쥬시
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140770,2023-05-31,17:21:24,드립백 쥬시 외 2건,36100.0,,,36100.0,,,,,,드립백/캡슐,드립백_쥬시,,1,18000,18000.0,2023-05-31 17:21:24,2023,5,31,17,Wednesday,2023-05,0,2,0,0,드립백_쥬시
140771,2023-05-31,17:21:24,드립백 쥬시 외 2건,36100.0,,,36100.0,,,,,,드립백/캡슐,드립백_클래식,,1,18000,18000.0,2023-05-31 17:21:24,2023,5,31,17,Wednesday,2023-05,0,2,0,0,드립백_클래식
140772,2023-05-31,17:22:16,(KCW) 기념 뱃지,6000.0,,,6000.0,,,,,,MD,(KCW)기념뱃지,,1,6000,6000.0,2023-05-31 17:22:16,2023,5,31,17,Wednesday,2023-05,0,2,0,0,(KCW) 기념 뱃지
140773,2023-05-31,17:24:37,(I) 오미자 에이드 외 1건,14000.0,,,14000.0,,,,,,비버리지,(I)오미자에이드,,1,7000,7000.0,2023-05-31 17:24:37,2023,5,31,17,Wednesday,2023-05,0,2,0,0,(I)오미자에이드


# apriori

휘낭시에 쌍 포함, 휘낭시에 쌍 제거
* 전체(밑에 있는 명단 전부)
* 커피류(핸드드립 제외, 비버리지 제외)
* 커피류 + 디저트
* 시그니처
* 시그니처 + 디저트
* 에스프레소
* 에스프레소 + 디저트
* 베이직(베이직 아이스 포함)
* 베이직 + 디저트
* 비버리지
* 비버리지 + 디저트


In [11]:
# 커피류('시그니처','Basic_ice','에스프레소','Basic')

In [128]:
offline_df['카테고리2'] = offline_df['카테고리'].replace({'시그니처' : '시그니처', 'Basic_ice' : '베이직', 
                                                     '에스프레소' : '에스프레소', 'Basic' : '베이직'})

In [129]:
offline_df2 = offline_df[offline_df['카테고리2'].isin(['에스프레소', '비버리지', '시그니처', '디저트', '베이직'])]

In [130]:
menu_df_set = pd.read_csv('./menu_repl_df.csv')

In [131]:
offline_df2 = pd.merge(offline_df2, menu_df_set[['상품명', '상품재분류']], how = 'left')

### 1. 휘낭시에 쌍 포함

In [132]:
from matplotlib.colors import LinearSegmentedColormap
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules


In [133]:
records = offline_df2[["결제일시", "상품재분류", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리"] == "핸드드립", "상품재분류"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리"] == "핸드드립", "상품재분류"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품재분류"].value_counts()


records = records.groupby("결제일시").agg({"상품재분류" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품재분류"].apply(lambda x : True if len(x) > 1 else False)]

In [134]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품재분류"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)
te_df

Unnamed: 0,바닐라라떼,브라우니,슈퍼클린,아메리카노,에스프레소,유자아메리카노,잠봉뵈르,치즈케이크,카페라떼,카페루이지,카푸치노,텐저린라떼,텐저린카푸치노,프레도,플래터,플랫화이트,휘낭시에
0,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False
1,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False
2,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False
3,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False
4,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32455,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False
32456,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
32457,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False
32458,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False


In [135]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)
itemset

Processing 210 combinations | Sampling itemset size 2Processing 693 combinations | Sampling itemset size 3Processing 144 combinations | Sampling itemset size 4


Unnamed: 0,support,itemsets,length
11,0.438078,(텐저린라떼),1
3,0.400955,(아메리카노),1
5,0.260967,(유자아메리카노),1
2,0.224368,(슈퍼클린),1
12,0.219778,(텐저린카푸치노),1
...,...,...,...
87,0.005699,"(아메리카노, 카페라떼, 치즈케이크)",3
54,0.005545,"(카페라떼, 잠봉뵈르)",2
76,0.005299,"(브라우니, 카페라떼, 아메리카노)",3
82,0.005176,"(텐저린카푸치노, 슈퍼클린, 아메리카노)",3


In [136]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
6,(카페루이지),(슈퍼클린),0.067468,0.224368,0.027973,0.414612,1.847906,0.012835,1.324987
7,(슈퍼클린),(카페루이지),0.224368,0.067468,0.027973,0.124674,1.847906,0.012835,1.065354
23,"(아메리카노, 치즈케이크)",(브라우니),0.041497,0.108688,0.007425,0.178916,1.64615,0.002914,1.085531
24,(브라우니),"(아메리카노, 치즈케이크)",0.108688,0.041497,0.007425,0.068311,1.64615,0.002914,1.028779
25,(치즈케이크),"(브라우니, 아메리카노)",0.115465,0.04276,0.007425,0.064301,1.503753,0.002487,1.023021
22,"(브라우니, 아메리카노)",(치즈케이크),0.04276,0.115465,0.007425,0.173631,1.503753,0.002487,1.070387
8,(브라우니),(치즈케이크),0.108688,0.115465,0.018638,0.171485,1.485169,0.006089,1.067615
9,(치즈케이크),(브라우니),0.115465,0.108688,0.018638,0.161419,1.485169,0.006089,1.062882
31,(치즈케이크),"(텐저린라떼, 브라우니)",0.115465,0.04045,0.006624,0.057364,1.418152,0.001953,1.017943
28,"(텐저린라떼, 브라우니)",(치즈케이크),0.04045,0.115465,0.006624,0.163747,1.418152,0.001953,1.057736


In [137]:
records = offline_df2[["결제일시", "상품재분류", "카테고리2"]]
records = records.dropna()
records.loc[records["카테고리2"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리2"] == "핸드드립", "상품재분류"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리2"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리2"] == "핸드드립", "상품재분류"].apply(lambda x : x.split("_")[0])


records = records.groupby("결제일시").agg({"카테고리2" : lambda x : '_'.join(sorted(list(set(x))))}).reset_index()
records = records[records["카테고리2"].apply(lambda x : True if len(x) > 5 else False)]
records['결제시간'] = records['결제일시'].map(lambda x: x.hour)

In [138]:
records.카테고리2.value_counts()

베이직_시그니처              6627
시그니처_에스프레소            3571
디저트_시그니처              3205
디저트_베이직               2742
디저트_베이직_시그니처          1982
베이직_에스프레소             1683
디저트_시그니처_에스프레소         880
베이직_시그니처_에스프레소         704
디저트_에스프레소              541
디저트_베이직_에스프레소          447
디저트_베이직_시그니처_에스프레소     178
Name: 카테고리2, dtype: int64

### 시간대별 동시 판매 품목

In [139]:
records['주말여부'] = records['결제일시'].map(lambda x : x.dayofweek in [6, 0])
records.groupby(['주말여부', '카테고리2']).count().unstack()

Unnamed: 0_level_0,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간
카테고리2,디저트_베이직,디저트_베이직_시그니처,디저트_베이직_시그니처_에스프레소,디저트_베이직_에스프레소,디저트_시그니처,디저트_시그니처_에스프레소,디저트_에스프레소,베이직_시그니처,베이직_시그니처_에스프레소,베이직_에스프레소,시그니처_에스프레소,디저트_베이직,디저트_베이직_시그니처,디저트_베이직_시그니처_에스프레소,디저트_베이직_에스프레소,디저트_시그니처,디저트_시그니처_에스프레소,디저트_에스프레소,베이직_시그니처,베이직_시그니처_에스프레소,베이직_에스프레소,시그니처_에스프레소
주말여부,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
False,1745,1245,111,294,2156,584,379,4346,488,1150,2435,1745,1245,111,294,2156,584,379,4346,488,1150,2435
True,997,737,67,153,1049,296,162,2281,216,533,1136,997,737,67,153,1049,296,162,2281,216,533,1136


In [140]:
records.groupby(['결제시간', '카테고리2']).count().unstack()

Unnamed: 0_level_0,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,주말여부,주말여부,주말여부,주말여부,주말여부,주말여부,주말여부,주말여부,주말여부,주말여부,주말여부
카테고리2,디저트_베이직,디저트_베이직_시그니처,디저트_베이직_시그니처_에스프레소,디저트_베이직_에스프레소,디저트_시그니처,디저트_시그니처_에스프레소,디저트_에스프레소,베이직_시그니처,베이직_시그니처_에스프레소,베이직_에스프레소,시그니처_에스프레소,디저트_베이직,디저트_베이직_시그니처,디저트_베이직_시그니처_에스프레소,디저트_베이직_에스프레소,디저트_시그니처,디저트_시그니처_에스프레소,디저트_에스프레소,베이직_시그니처,베이직_시그니처_에스프레소,베이직_에스프레소,시그니처_에스프레소
결제시간,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
9,53.0,26.0,4.0,11.0,79.0,24.0,13.0,93.0,11.0,20.0,45.0,53.0,26.0,4.0,11.0,79.0,24.0,13.0,93.0,11.0,20.0,45.0
10,246.0,210.0,31.0,46.0,338.0,126.0,76.0,597.0,89.0,204.0,407.0,246.0,210.0,31.0,46.0,338.0,126.0,76.0,597.0,89.0,204.0,407.0
11,262.0,203.0,20.0,48.0,294.0,107.0,56.0,785.0,80.0,215.0,427.0,262.0,203.0,20.0,48.0,294.0,107.0,56.0,785.0,80.0,215.0,427.0
12,359.0,263.0,34.0,74.0,460.0,119.0,97.0,1135.0,119.0,222.0,534.0,359.0,263.0,34.0,74.0,460.0,119.0,97.0,1135.0,119.0,222.0,534.0
13,463.0,322.0,28.0,66.0,473.0,115.0,68.0,1154.0,108.0,261.0,553.0,463.0,322.0,28.0,66.0,473.0,115.0,68.0,1154.0,108.0,261.0,553.0
14,455.0,345.0,24.0,73.0,515.0,127.0,77.0,1072.0,113.0,258.0,523.0,455.0,345.0,24.0,73.0,515.0,127.0,77.0,1072.0,113.0,258.0,523.0
15,439.0,304.0,21.0,59.0,550.0,123.0,73.0,839.0,95.0,238.0,436.0,439.0,304.0,21.0,59.0,550.0,123.0,73.0,839.0,95.0,238.0,436.0
16,322.0,230.0,11.0,44.0,351.0,96.0,54.0,620.0,47.0,159.0,376.0,322.0,230.0,11.0,44.0,351.0,96.0,54.0,620.0,47.0,159.0,376.0
17,122.0,69.0,5.0,20.0,127.0,37.0,24.0,286.0,38.0,93.0,220.0,122.0,69.0,5.0,20.0,127.0,37.0,24.0,286.0,38.0,93.0,220.0
18,21.0,10.0,,6.0,18.0,6.0,3.0,46.0,4.0,13.0,50.0,21.0,10.0,,6.0,18.0,6.0,3.0,46.0,4.0,13.0,50.0


### 2. 휘낭시에 쌍 제거 

In [141]:
records = offline_df2[offline_df2['상품재분류'] != '휘낭시에'][["결제일시", "상품재분류", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리"] == "핸드드립", "상품재분류"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리"] == "핸드드립", "상품재분류"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품재분류"].value_counts()


records = records.groupby("결제일시").agg({"상품재분류" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품재분류"].apply(lambda x : True if len(x) > 1 else False)]

In [142]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품재분류"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)
te_df

Unnamed: 0,바닐라라떼,브라우니,슈퍼클린,아메리카노,에스프레소,유자아메리카노,잠봉뵈르,치즈케이크,카페라떼,카페루이지,카푸치노,텐저린라떼,텐저린카푸치노,프레도,플래터,플랫화이트
0,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False
1,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True
2,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True
3,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False
4,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31168,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False
31169,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False
31170,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True
31171,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False


In [143]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)
itemset

Processing 182 combinations | Sampling itemset size 2Processing 552 combinations | Sampling itemset size 3Processing 156 combinations | Sampling itemset size 4


Unnamed: 0,support,itemsets,length
11,0.447599,(텐저린라떼),1
3,0.410483,(아메리카노),1
5,0.267924,(유자아메리카노),1
2,0.230745,(슈퍼클린),1
12,0.223527,(텐저린카푸치노),1
...,...,...,...
62,0.005325,"(텐저린라떼, 카푸치노)",2
84,0.005197,"(텐저린라떼, 아메리카노, 플랫화이트)",3
66,0.005165,"(텐저린라떼, 슈퍼클린, 브라우니)",3
53,0.005165,"(카페루이지, 치즈케이크)",2


In [144]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
7,(슈퍼클린),(카페루이지),0.230745,0.06974,0.029128,0.126234,1.810068,0.013036,1.064656
6,(카페루이지),(슈퍼클린),0.06974,0.230745,0.029128,0.417663,1.810068,0.013036,1.320981
21,"(아메리카노, 치즈케이크)",(브라우니),0.04321,0.111571,0.007731,0.178916,1.603609,0.00291,1.08202
22,(브라우니),"(아메리카노, 치즈케이크)",0.111571,0.04321,0.007731,0.069293,1.603609,0.00291,1.028024
8,(브라우니),(치즈케이크),0.111571,0.1185,0.019408,0.173951,1.467937,0.006187,1.067127
9,(치즈케이크),(브라우니),0.1185,0.111571,0.019408,0.163779,1.467937,0.006187,1.062433
23,(치즈케이크),"(브라우니, 아메리카노)",0.1185,0.044526,0.007731,0.065241,1.465242,0.002455,1.022161
20,"(브라우니, 아메리카노)",(치즈케이크),0.044526,0.1185,0.007731,0.173631,1.465242,0.002455,1.066715
27,(치즈케이크),"(텐저린라떼, 브라우니)",0.1185,0.04212,0.006897,0.058202,1.381833,0.001906,1.017077
24,"(텐저린라떼, 브라우니)",(치즈케이크),0.04212,0.1185,0.006897,0.163747,1.381833,0.001906,1.054107


In [145]:
records = offline_df2[offline_df2['상품재분류'] != '휘낭시에'][["결제일시", "상품재분류", "카테고리2"]]
records = records.dropna()
records.loc[records["카테고리2"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리2"] == "핸드드립", "상품재분류"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리2"] == "핸드드립", "상품재분류"] = records.loc[records["카테고리2"] == "핸드드립", "상품재분류"].apply(lambda x : x.split("_")[0])


records = records.groupby("결제일시").agg({"카테고리2" : lambda x : '_'.join(sorted(list(set(x))))}).reset_index()
records = records[records["카테고리2"].apply(lambda x : True if len(x) > 5 else False)]
records['결제시간'] = records['결제일시'].map(lambda x: x.hour)

In [146]:
records.카테고리2.value_counts()

베이직_시그니처              7038
시그니처_에스프레소            3788
디저트_시그니처              2402
디저트_베이직               2140
베이직_에스프레소             1777
디저트_베이직_시그니처          1571
베이직_시그니처_에스프레소         735
디저트_시그니처_에스프레소         663
디저트_에스프레소              405
디저트_베이직_에스프레소          353
디저트_베이직_시그니처_에스프레소     147
Name: 카테고리2, dtype: int64

### 시간대별 동시판매품목

In [147]:
records.groupby(['결제시간', '카테고리2']).count().unstack()

Unnamed: 0_level_0,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시
카테고리2,디저트_베이직,디저트_베이직_시그니처,디저트_베이직_시그니처_에스프레소,디저트_베이직_에스프레소,디저트_시그니처,디저트_시그니처_에스프레소,디저트_에스프레소,베이직_시그니처,베이직_시그니처_에스프레소,베이직_에스프레소,시그니처_에스프레소
결제시간,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
9,25.0,10.0,3.0,5.0,53.0,17.0,7.0,109.0,12.0,26.0,52.0
10,205.0,179.0,27.0,36.0,271.0,105.0,55.0,628.0,93.0,214.0,428.0
11,221.0,161.0,14.0,38.0,227.0,76.0,38.0,827.0,86.0,225.0,458.0
12,274.0,202.0,27.0,60.0,339.0,86.0,73.0,1196.0,126.0,236.0,567.0
13,362.0,257.0,22.0,54.0,345.0,84.0,56.0,1219.0,114.0,273.0,584.0
14,335.0,273.0,22.0,60.0,388.0,94.0,57.0,1144.0,115.0,271.0,556.0
15,351.0,238.0,18.0,45.0,416.0,93.0,52.0,905.0,98.0,252.0,466.0
16,253.0,189.0,10.0,34.0,258.0,75.0,46.0,661.0,48.0,169.0,397.0
17,102.0,54.0,4.0,16.0,90.0,28.0,19.0,301.0,39.0,97.0,229.0
18,12.0,8.0,,5.0,15.0,5.0,2.0,48.0,4.0,14.0,51.0


In [148]:
records['주말여부'] = records['결제일시'].map(lambda x : x.dayofweek in [6, 0])
records.groupby(['주말여부', '카테고리2']).count().unstack()

Unnamed: 0_level_0,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제일시,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간,결제시간
카테고리2,디저트_베이직,디저트_베이직_시그니처,디저트_베이직_시그니처_에스프레소,디저트_베이직_에스프레소,디저트_시그니처,디저트_시그니처_에스프레소,디저트_에스프레소,베이직_시그니처,베이직_시그니처_에스프레소,베이직_에스프레소,시그니처_에스프레소,디저트_베이직,디저트_베이직_시그니처,디저트_베이직_시그니처_에스프레소,디저트_베이직_에스프레소,디저트_시그니처,디저트_시그니처_에스프레소,디저트_에스프레소,베이직_시그니처,베이직_시그니처_에스프레소,베이직_에스프레소,시그니처_에스프레소
주말여부,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
False,1382,989,93,237,1600,453,289,4602,506,1207,2566,1382,989,93,237,1600,453,289,4602,506,1207,2566
True,758,582,54,116,802,210,116,2436,229,570,1222,758,582,54,116,802,210,116,2436,229,570,1222
