In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
from os.path import join
import matplotlib.pyplot as plt
import datetime as dt
import re
from tabulate import tabulate


## Local Load

In [4]:
# path = join(os.getcwd(), "data")
offline_raw = pd.read_excel("./9._offline_total.xlsx")

## Colab Load

In [5]:
# from google.colab import drive
# drive.mount('/content/drive')

# # 데이터 불러오기
# offline_df = pd.read_excel('/content/drive/MyDrive/9. offline_total.xlsx')

# plt.rcParams['font.family'] = 'AppleGothic' # 폰트 변경
# plt.rcParams['axes.unicode_minus'] = False # 축 값 마이너스 깨짐 해결

In [6]:
def convert_to_nan(data):
    
    df = data.copy()
    
    columns = df.columns.to_list()
    for col in columns:
        df[col] = df[col].apply(lambda x: np.nan if x == "-" else x)
    
    return df


def drop_columns(data):
    
    df = data.copy()
    
    rm_columns = ["온라인 스토어", "사용 포인트", "적립 포인트", "사용 선불권", "배달팁(매출 포함x)", "결제메모", "주문 채널"]
    df = df.drop(rm_columns, axis = 1)
    
    return df


def date_conversion(data):
    
    df = data.copy()
    
    df["결제일시"] = df.loc[:, "결제일"] + " " + df.loc[:, "결제시간"]
    df["결제일시"] = pd.to_datetime(df["결제일시"])
    df["year"] = df["결제일시"].apply(lambda x: x.year)
    df["month"] = df["결제일시"].apply(lambda x: x.month)
    df["day"] = df["결제일시"].apply(lambda x: x.day)
    df["hour"] = df["결제일시"].apply(lambda x: x.hour)
    df["day_name"] = df["결제일시"].apply(lambda x: x.day_name())
    df["year_month"] = pd.to_datetime(df["결제일"]).dt.strftime("%Y-%m")
    
    return df


def add_weekend(data):
    
    df = data.copy()
    
    # 평일, 주말 구분
    # 0 = 평일, 1 = 주말
    df["is_weekend"] = df["day_name"].apply(lambda x: 1 if (x == "Sunday") | (x == "Saturday") else 0)
    
    return df


def add_season(data):
    
    df = data.copy()
    
    # 계절 추가하기
    # 봄(3~5월) = 1, 여름(6~8월) = 2, 가을(9~11월0) = 3, 겨울(12~2월) = 4
    seasons = [1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1]
    season_dict = dict(zip(range(1,13), seasons))
    df["season"] = df["month"].map(season_dict)

    return df


def add_holiday(data):
    
    df = data.copy()
    
    conditionlist = [
        (df['결제일'] == '2022-03-01') | (df['결제일'] == '2022-05-05') | (df['결제일'] == '2022-05-08') |
        (df['결제일'] == '2022-06-06') | (df['결제일'] == '2022-08-15') | (df['결제일'] == '2022-09-09') |
        (df['결제일'] == '2022-09-10') | (df['결제일'] == '2022-09-11') | (df['결제일'] == '2022-10-09') |
        (df['결제일'] == '2022-10-03') | (df['결제일'] == '2022-12-25') | 
        (df['결제일'] == '2023-01-01') | (df['결제일'] == '2023-01-21') | (df['결제일'] == '2023-01-22') |
        (df['결제일'] == '2023-01-23') | (df['결제일'] == '2023-03-01') | (df['결제일'] == '2023-05-05') |
        (df['결제일'] == '2023-05-26') | (df['결제일'] == '2023-06-06')]

    choicelist = [1]
    df['is_holiday'] = np.select(conditionlist, choicelist, default= 0)
    df["weekend_n_holiday"] = df["is_weekend"] + df["is_holiday"]
    
    return df

def drop_row(data):
    
    df = data.copy()
    df["상품명"] = df["상품명"].apply(lambda x: re.sub(r"\s", "", x))
    
    drop_lst = ['야외',
                '포장',
                '무료시음권', 
                '캐리어',
                '종이백',
                '포크',
                '⚪️',
                '⚪', # 위 emoji 와 별개
                '일회용컵',
               ]

    custom_lst = ['덜달게',
                  '1샷추가', 
                  '오틀리', 
                  '연하게', 
                  '시럽', 
                  '얼음적게',
                  '오트사이드', 
                  '물적게', 
                  '바닐라시럽', 
                  '2샷추가',
                  '얼음X',
                  '샷추가',
                 ]

    idx = df.loc[df["상품명"].str.contains("|".join(drop_lst)),"상품명"].index
    df = df.drop(idx, axis = 0).reset_index(drop = True)
    
    idx = df.loc[df["상품명"].str.contains("|".join(custom_lst)),"상품명"].index
    df.loc[idx, "카테고리"] = "커스텀"
    
    return df


In [7]:
def preprocess_productname(data):
    tmp = data.copy()
    tmp["카테고리"] = tmp["카테고리"].apply(lambda x: re.sub(r"\s", "", x))
    pattern = r'\s*_\s*'
    
    
    # 카테고리 - basic_ice
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"플랫_", "플랫화이트_", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"템플", "I", x))

    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"][tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
        
    # 카테고리 - basic
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(r"\s|\(H\)", "", x))
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains("아메리카노|카페라떼|플랫화이트|카푸치노|바닐라라떼")].index
    tmp.loc[idx, "상품명"] = tmp.loc[idx, "상품명"].apply(lambda x: "(H)"+x)
    
    
    # 카테고리 - 시그니처
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"아이스텐라", "아이스텐저린라떼", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"유자아메리카노|아이스유자아메리카노", "아이스유자아메리카노", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"][tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[tmp["상품명"] == "텐저린카푸치노", "상품명"] = "텐저린카푸치노_쥬시"
    tmp.loc[tmp["상품명"] == "아이스텐저린라떼", "상품명"] = "아이스텐저린라떼_쥬시"
    tmp.loc[tmp["상품명"] == "아이스유자아메리카노", "상품명"] = "아이스유자아메리카노_쥬시"
        
    # 카테고리 - beverage
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    
    rename_dict = {"차가운어린이우유": "(I)어린이우유",
                   "따뜻한어린이우유": "(H)어린이우유",
                   
                   "얼그레이밀크티": "(H)얼그레이밀크티",
                   
                   "제주유기농귤피주스" : "(I)제주유기농귤피주스",
                   "문경선암리사과주스" : "(I)문경선암리사과주스",
                   "제주유기농감귤주스" : "(I)제주유기농감귤주스",
                   "어린이감귤주스" : "(I)어린이감귤주스",
                   
                   "시나몬플럼" : "(H)시나몬플럼",
                   "트로피칼루이보스" : "(H)트로피칼루이보스",
                   "카모마일" : "(H)카모마일"
                  }
    
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x: rename_dict[x] if x in rename_dict.keys() else x)
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].value_counts()
    
    
    # 카테고리 - 디저트
    tmp.loc[tmp["카테고리"] == "디저트", "상품명"] = tmp.loc[tmp["카테고리"] == "디저트", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 블랜딩원두
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 = 세트
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub(r"\s", '', x))
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub("Set.", "", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "세트", "상품명"][tmp.loc[tmp["카테고리"] == "세트", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"] = tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"].apply(lambda x : "(H)" + x)
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x: "Set_" + x)
    
    
    # 카테고리 - 드립백/캡슐
    tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"] = tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 에스프레소
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_", "스페셜_", "샘플_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"][tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])

    # 카테고리 - 핸드드립, 싱글원두
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].str.strip()
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\)\s', ")", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s*:\s*', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s+', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(강배전\)|\(강\)', "강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(중강배전\)|\(중\)', "중강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(디카프\)', "디카프", x))
    
    tmp["상품명_원산지"] = tmp["상품명"].copy()   
    idx = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"][tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.contains("ㅡ")].index
    tmp = tmp.drop(idx, axis = 0).reset_index(drop = True)

    rename_dict = {"디카페인 우일라 200g" : "디카페인 콜롬비아 우일라 200g",
                   "디카페인콜롬비아 리치 200g" : "디카페인 콜롬비아 리치 200g",
                   "엘리다 카투아이 100g" : "파나마 엘리다 카투아이 100g",
                   "엘리다 카투아이 ASD 100g" : "파나마 엘리다 카투아이 100g",
                   "엘파라이소 디카프 100g" : "콜롬비아 엘파라이소 디카프 100g",
                   "엘파라이소 리치 100g" : "콜롬비아 엘파라이소 리치 100g",
                   "엘파라이소 리치" : "콜롬비아 엘파라이소 리치",
                   "부산제 200g" : "르완다 부산제 200g",
                   "르완다부산제 200g" : "르완다 부산제 200g",
                   "에콰100g" : "에콰도르 100g",
                   "세로아줄 게이샤" : "콜롬비아 세로아줄 게이샤",
                   "페루게이샤" : "페루 게이샤",
                   "페루게이샤 100g" : "페루 게이샤 100g",
                   "니카라과강배전" : "니카라과 강배전",
                   "케냐키티투 200g" : "케냐 키티투 200g",
                   "케냐캄왕기. 200g" : "케냐 캄왕기 200g",
                   "쿠쿠세" : "에티오피아 쿠쿠세",
                   "(할인) 케냐카루만디 200g" : "(할인) 케냐 카루만디 200g",
                   "니카라과핀카케냐바티안" : "니카라과 핀카케냐바티안",
                   "온다라스 엘 케브라초 파라이네마 200g" : "온두라스 엘 케브라초 파라이네마 200g",
                   "페루엘사포테 200g" : "페루 엘사포테 200g",
                   "니카라과리틀 레드 200g" : "니카라과 리틀 레드 200g",
                   "(디카페인)콜롬비아 리치 200g" : "(디카페인) 콜롬비아 리치 200g",
                   "(디카페인)콜롬비아 리치 100g" : "(디카페인) 콜롬비아 리치 100g",
                   "콜룸비니 엘 파라이소 리치 100g" : "콜롬비아 엘 파라이소 리치 100g",
                   "(할인) 디카프 / 콜롬비아 엘 파라이소 리치" : "(할인) 디카프 콜롬비아 엘 파라이소 리치",
                   "[로우카페인] 시티트래블러" : "시티트래블러 로우카페인",

                   "(I)디카프_에티오피아" : "(I)에티오피아_디카프",
                   "(H)디카프_에티오피아" : "(H)에티오피아_디카프",
                   "(H)과테_레드_파카마라" : "(H)과테말라_레드_파카마라",
                   "(I)과테_레드_파카마라" : "(I)과테말라_레드_파카마라",
                   "(I)과테말라엘모리또" : "(I)과테말라_엘모리또",
                   "(H)과테말라엘모리또" : "(H)과테말라_엘모리또",
                   "(H)케냐띠리쿠" : "(H)케냐_띠리쿠",
                   "(H)콰트로_콜롬비아" : "(H)콜롬비아_콰트로",
                   "(I)콰트로_콜롬비아" : "(I)콜롬비아_콰트로",
                   "(H)디카페인_콜롬비아" : "(H)콜롬비아_디카페인",
                   "(I)디카페인_콜롬비아" : "(I)콜롬비아_디카페인",
                   "(H)디카페인_니카라과" : "(H)니카라과_디카페인",
                   "(I)디카페인_니카라과" : "(I)니카라과_디카페인",
                   "(I)디카페인_에티오피아" : "(I)에티오피아_디카페인",
                   "(H)디카페인_에티오피아" : "(H)에티오피아_디카페인",
                   "(I)오늘의커피" : "(I)오늘의_커피",
                   "(H)오늘의커피" : "(H)오늘의_커피"
                }
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[:, "상품명_원산지"] = tmp.loc[:, "상품명_원산지"].replace(rename_dict)
    
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub("예맨", "예멘", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x: x.split("_")[0])
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: re.sub(pattern, " ", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub("[()]", "", x)) 
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"_?[0-9]*g|할인|강배전", "", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"\s", "_", x))
    
    lst = ["디카페인_", "디카프_", "콰트로_"]
    for i in lst:
        tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x[len(i):] + "_" + i[:-1] if x[:len(i)] == i else x)
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x.split("_")[0])

    
    return tmp

In [185]:
pd.options.display.max_columns = None

# offline_raw = pd.read_excel(join(path, "9. offline_total.xlsx"))
offline_df = convert_to_nan(offline_raw)

offline_df = drop_columns(offline_df)
offline_df = date_conversion(offline_df)
offline_df = add_weekend(offline_df)
offline_df = add_season(offline_df)
offline_df = add_holiday(offline_df)
offline_df = preprocess_productname(offline_df)
offline_df = drop_row(offline_df)

offline_df



Unnamed: 0,결제일,결제시간,결제내역,합계,상품별 할인,결제 할인,카드 결제,현금 결제,간편 결제,기타 결제,환불,환불 일시,카테고리,상품명,옵션,수량,상품별 단가,상품별 합계,결제일시,year,month,day,hour,day_name,year_month,is_weekend,season,is_holiday,weekend_n_holiday,상품명_원산지
0,2022-02-07,19:28:53,샘플 캐모마일,,,,,,,,4500.0,2022-02-07 19:29:37,에스프레소,캐모마일_샘플,,1,4500,,2022-02-07 19:28:53,2022,2,7,19,Monday,2022-02,0,1,0,0,캐모마일_샘플
1,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(I)콜롬비아_로꼬_소르베,,1,10500,10500.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0,콜롬비아
2,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(H)니카라과_COE#1,,1,12000,12000.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0,니카라과
3,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(H)과테_레드_파카마라,,1,10000,10000.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0,과테말라
4,2022-02-10,10:13:57,아이스 텐저린 라떼 외 2건,20000.0,,,20000.0,,,,,,시그니처,아이스텐저린라떼_쥬시,,1,7000,7000.0,2022-02-10 10:13:57,2022,2,10,10,Thursday,2022-02,0,1,0,0,아이스텐저린라떼_쥬시
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140770,2023-05-31,17:21:24,드립백 쥬시 외 2건,36100.0,,,36100.0,,,,,,드립백/캡슐,드립백_쥬시,,1,18000,18000.0,2023-05-31 17:21:24,2023,5,31,17,Wednesday,2023-05,0,2,0,0,드립백_쥬시
140771,2023-05-31,17:21:24,드립백 쥬시 외 2건,36100.0,,,36100.0,,,,,,드립백/캡슐,드립백_클래식,,1,18000,18000.0,2023-05-31 17:21:24,2023,5,31,17,Wednesday,2023-05,0,2,0,0,드립백_클래식
140772,2023-05-31,17:22:16,(KCW) 기념 뱃지,6000.0,,,6000.0,,,,,,MD,(KCW)기념뱃지,,1,6000,6000.0,2023-05-31 17:22:16,2023,5,31,17,Wednesday,2023-05,0,2,0,0,(KCW) 기념 뱃지
140773,2023-05-31,17:24:37,(I) 오미자 에이드 외 1건,14000.0,,,14000.0,,,,,,비버리지,(I)오미자에이드,,1,7000,7000.0,2023-05-31 17:24:37,2023,5,31,17,Wednesday,2023-05,0,2,0,0,(I)오미자에이드


# apriori

휘낭시에 쌍 포함, 휘낭시에 쌍 제거
* 전체(밑에 있는 명단 전부)
* 커피류(핸드드립 제외, 비버리지 제외)
* 커피류 + 디저트
* 시그니처
* 시그니처 + 디저트
* 에스프레소
* 에스프레소 + 디저트
* 베이직(베이직 아이스 포함)
* 베이직 + 디저트
* 비버리지
* 비버리지 + 디저트


In [186]:
# 커피류('시그니처','Basic_ice','에스프레소','Basic')

In [187]:
offline_df['카테고리2'] = offline_df['카테고리'].replace({'시그니처' : '시그니처', 'Basic_ice' : '베이직', 
                                                     '에스프레소' : '에스프레소', 'Basic' : '베이직'})

offline_df2 = offline_df[offline_df['카테고리2'].isin(['에스프레소', '비버리지', '시그니처', '디저트', '베이직'])]

menu_df_set = pd.read_csv('./menu_repl_df.csv')

offline_df2 = pd.merge(offline_df2, menu_df_set[['상품명', '상품재분류']], how = 'left')

### 1. 휘낭시에 쌍 포함

In [189]:
from matplotlib.colors import LinearSegmentedColormap
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [190]:
records = offline_df2[["결제일시", "상품명", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품명"].value_counts()


records = records.groupby("결제일시").agg({"상품명" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품명"].apply(lambda x : True if len(x) > 1 else False)]

In [240]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품명"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)

In [239]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)

Processing 16 combinations | Sampling itemset size 4 3


In [193]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
10,(플레인_휘낭시에),(무화과_휘낭시에),0.042909,0.042088,0.013428,0.312938,7.435281,0.011622,1.394215
11,(무화과_휘낭시에),(플레인_휘낭시에),0.042088,0.042909,0.013428,0.319038,7.435281,0.011622,1.4055
36,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.095909,0.035142,0.008095,0.084403,2.401764,0.004725,1.053802
37,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.035142,0.095909,0.008095,0.23035,2.401764,0.004725,1.174679
13,(슈퍼클린_클래식),(카페_루이지),0.10745,0.05579,0.013045,0.121405,2.176124,0.00705,1.074682
12,(카페_루이지),(슈퍼클린_클래식),0.05579,0.10745,0.013045,0.233824,2.176124,0.00705,1.164941
32,((H)아메리카노_클래식),((H)카페라떼_클래식),0.095909,0.043319,0.008751,0.091246,2.106379,0.004597,1.052739
33,((H)카페라떼_클래식),((H)아메리카노_클래식),0.043319,0.095909,0.008751,0.20202,2.106379,0.004597,1.132975
38,(카페_루이지),(슈퍼클린_쥬시),0.05579,0.080238,0.007821,0.140196,1.747243,0.003345,1.069734
39,(슈퍼클린_쥬시),(카페_루이지),0.080238,0.05579,0.007821,0.097478,1.747243,0.003345,1.046191


In [194]:
offline_dfset= offline_df2[['상품명', '상품재분류', '카테고리2']].drop_duplicates()

repl_dict = {}

for x, y in offline_dfset[['상품명', '카테고리2']].values:
    repl_dict[x] = y
    
res = []
for x in association_df['antecedents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
    res.append('_'.join(tmp))
len(res)
association_df['antecedents_cat'] = res

res = []
for x in association_df['consequents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
        
    res.append('_'.join(tmp))
association_df['consequents_cat'] = res

association_df['cat'] = association_df['antecedents_cat']+'_'+association_df['consequents_cat']

전체

In [215]:
select_zip = {'커피류' : ['베이직_베이직', '에스프레소_에스프레소', '에스프레소_시그니처', '시그니처_에스프레소', '시그니처_시그니처'],
 '커피류+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', '베이직_디저트', '디저트_베이직',
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '시그니처' : ['시그니처_시그니처'],
 '시그니처+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', 
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '에스프레소' : ['에스프레소_에스프레소'],
 '에스프레소+디저트' : [],
 '베이직' : ['베이직_베이직'],
 '베이직+디저트' : ['베이직_디저트', '디저트_베이직'],
 '비버리지' : [],
 '비버리지+디저트' : ['비버리지_디저트', '디저트_비버리지']}

커피류

In [228]:
association_df[association_df['cat'].isin(select_zip['커피류'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.117896,0.291757,1.228227
1,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.117896,0.496316,1.228227
2,(텐저린카푸치노_쥬시),(슈퍼클린_클래식),0.022617,0.114734,1.067791
3,(슈퍼클린_클래식),(텐저린카푸치노_쥬시),0.022617,0.210486,1.067791
6,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018077,0.097882,1.417493
7,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018077,0.261782,1.417493
12,(카페_루이지),(슈퍼클린_클래식),0.013045,0.233824,2.176124
13,(슈퍼클린_클래식),(카페_루이지),0.013045,0.121405,2.176124
14,((I)아메리카노_클래식),((I)카페라떼_클래식),0.01206,0.065304,1.320751
15,((I)카페라떼_클래식),((I)아메리카노_클래식),0.01206,0.243916,1.320751


커피류+디저트

In [229]:
association_df[association_df['cat'].isin(select_zip['커피류+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.019554,0.105879,1.072159
5,(브라우니),((I)아메리카노_클래식),0.019554,0.198006,1.072159
20,(브라우니),((H)아메리카노_클래식),0.01031,0.104403,1.088568
21,((H)아메리카노_클래식),(브라우니),0.01031,0.107499,1.088568
22,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.009271,0.266509,1.121942
23,"(브라우니, 아이스유자아메리카노_쥬시)",(아이스텐저린라떼_쥬시),0.009271,0.489177,1.210562
24,(아이스텐저린라떼_쥬시),"(브라우니, 아이스유자아메리카노_쥬시)",0.009271,0.022943,1.210562
25,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.009271,0.039028,1.121942
26,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(아이스유자아메리카노_쥬시),0.009162,0.241877,1.018246
27,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.009162,0.476529,1.179261


시그니처

In [231]:
association_df[association_df['cat'].isin(select_zip['시그니처'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.117896,0.291757,1.228227
1,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.117896,0.496316,1.228227


시그니처+디저트

In [232]:
association_df[association_df['cat'].isin(select_zip['시그니처+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
22,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.009271,0.266509,1.121942
23,"(브라우니, 아이스유자아메리카노_쥬시)",(아이스텐저린라떼_쥬시),0.009271,0.489177,1.210562
24,(아이스텐저린라떼_쥬시),"(브라우니, 아이스유자아메리카노_쥬시)",0.009271,0.022943,1.210562
25,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.009271,0.039028,1.121942
26,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(아이스유자아메리카노_쥬시),0.009162,0.241877,1.018246
27,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.009162,0.476529,1.179261
28,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.009162,0.022672,1.179261
29,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",0.009162,0.038568,1.018246
30,(텐저린카푸치노_쥬시),(잠봉뵈르),0.008833,0.044811,1.057142
31,(잠봉뵈르),(텐저린카푸치노_쥬시),0.008833,0.208387,1.057142


에스프레소

In [233]:
association_df[association_df['cat'].isin(select_zip['에스프레소'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
12,(카페_루이지),(슈퍼클린_클래식),0.013045,0.233824,2.176124
13,(슈퍼클린_클래식),(카페_루이지),0.013045,0.121405,2.176124
18,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011185,0.104098,1.297354
19,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011185,0.1394,1.297354
38,(카페_루이지),(슈퍼클린_쥬시),0.007821,0.140196,1.747243
39,(슈퍼클린_쥬시),(카페_루이지),0.007821,0.097478,1.747243


에스프레소+디저트

In [234]:
association_df[association_df['cat'].isin(select_zip['에스프레소+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift


베이직

In [235]:
association_df[association_df['cat'].isin(select_zip['베이직'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
6,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018077,0.097882,1.417493
7,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018077,0.261782,1.417493
14,((I)아메리카노_클래식),((I)카페라떼_클래식),0.01206,0.065304,1.320751
15,((I)카페라떼_클래식),((I)아메리카노_클래식),0.01206,0.243916,1.320751
32,((H)아메리카노_클래식),((H)카페라떼_클래식),0.008751,0.091246,2.106379
33,((H)카페라떼_클래식),((H)아메리카노_클래식),0.008751,0.20202,2.106379
34,((I)아메리카노_클래식),((I)플랫화이트_클래식),0.008286,0.044869,1.062615
35,((I)플랫화이트_클래식),((I)아메리카노_클래식),0.008286,0.196244,1.062615
36,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.008095,0.084403,2.401764
37,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.008095,0.23035,2.401764


베이직+디저트

In [236]:
association_df[association_df['cat'].isin(select_zip['베이직+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.019554,0.105879,1.072159
5,(브라우니),((I)아메리카노_클래식),0.019554,0.198006,1.072159
20,(브라우니),((H)아메리카노_클래식),0.01031,0.104403,1.088568
21,((H)아메리카노_클래식),(브라우니),0.01031,0.107499,1.088568
56,((H)카페라떼_클래식),(바스크_치즈케이크),0.005251,0.121212,1.160273
57,(바스크_치즈케이크),((H)카페라떼_클래식),0.005251,0.050262,1.160273


비버리지

In [237]:
association_df[association_df['cat'].isin(select_zip['비버리지'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift


비버리지+디저트

In [238]:
association_df[association_df['cat'].isin(select_zip['비버리지+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
48,(브라우니),((I)얼그레이밀크티),0.00577,0.058433,1.315668
49,((I)얼그레이밀크티),(브라우니),0.00577,0.129926,1.315668


### 2. 휘낭시에 쌍 제거 

In [243]:
records = offline_df2[offline_df2['상품재분류'] != '휘낭시에'][["결제일시", "상품명", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품명"].value_counts()


records = records.groupby("결제일시").agg({"상품명" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품명"].apply(lambda x : True if len(x) > 1 else False)]

In [246]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품명"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)

In [249]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)

Processing 16 combinations | Sampling itemset size 4 3


In [250]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
33,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.035815,0.097928,0.008334,0.232704,2.376288,0.004827,1.175652
32,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.097928,0.035815,0.008334,0.085106,2.376288,0.004827,1.053877
11,(슈퍼클린_클래식),(카페_루이지),0.109556,0.057129,0.013431,0.122591,2.145849,0.007172,1.074608
10,(카페_루이지),(슈퍼클린_클래식),0.057129,0.109556,0.013431,0.235091,2.145849,0.007172,1.164117
29,((H)카페라떼_클래식),((H)아메리카노_클래식),0.043896,0.097928,0.00901,0.20526,2.096034,0.004711,1.135053
28,((H)아메리카노_클래식),((H)카페라떼_클래식),0.097928,0.043896,0.00901,0.092007,2.096034,0.004711,1.052986
35,(슈퍼클린_쥬시),(카페_루이지),0.08185,0.057129,0.008053,0.098383,1.722118,0.003377,1.045756
34,(카페_루이지),(슈퍼클린_쥬시),0.057129,0.08185,0.008053,0.140956,1.722118,0.003377,1.068804
8,(브라우니),(바스크_치즈케이크),0.100405,0.106262,0.017035,0.169658,1.5966,0.006365,1.076349
9,(바스크_치즈케이크),(브라우니),0.106262,0.100405,0.017035,0.160307,1.5966,0.006365,1.071338


In [252]:
offline_dfset= offline_df2[['상품명', '상품재분류', '카테고리2']].drop_duplicates()

repl_dict = {}

for x, y in offline_dfset[['상품명', '카테고리2']].values:
    repl_dict[x] = y
    
res = []
for x in association_df['antecedents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
    res.append('_'.join(tmp))
len(res)
association_df['antecedents_cat'] = res

res = []
for x in association_df['consequents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
        
    res.append('_'.join(tmp))
association_df['consequents_cat'] = res

association_df['cat'] = association_df['antecedents_cat']+'_'+association_df['consequents_cat']

In [253]:
## 전체

select_zip = {'커피류' : ['베이직_베이직', '에스프레소_에스프레소', '에스프레소_시그니처', '시그니처_에스프레소', '시그니처_시그니처'],
 '커피류+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', '베이직_디저트', '디저트_베이직',
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '시그니처' : ['시그니처_시그니처'],
 '시그니처+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', 
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '에스프레소' : ['에스프레소_에스프레소'],
 '에스프레소+디저트' : [],
 '베이직' : ['베이직_베이직'],
 '베이직+디저트' : ['베이직_디저트', '디저트_베이직'],
 '비버리지' : [],
 '비버리지+디저트' : ['비버리지_디저트', '디저트_비버리지']}

## 커피류 
association_df[association_df['cat'].isin(select_zip['커피류'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.121382,0.29574,1.220344
1,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.121382,0.500871,1.220344
2,(텐저린카푸치노_쥬시),(슈퍼클린_클래식),0.023285,0.116857,1.066643
3,(슈퍼클린_클래식),(텐저린카푸치노_쥬시),0.023285,0.212542,1.066643
6,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018611,0.098849,1.400919
7,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018611,0.263767,1.400919
10,(카페_루이지),(슈퍼클린_클래식),0.013431,0.235091,2.145849
11,(슈퍼클린_클래식),(카페_루이지),0.013431,0.122591,2.145849
12,((I)아메리카노_클래식),((I)카페라떼_클래식),0.012417,0.065949,1.31071
13,((I)카페라떼_클래식),((I)아메리카노_클래식),0.012417,0.246782,1.31071


In [254]:

## 커피류+디저트
association_df[association_df['cat'].isin(select_zip['커피류+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.020132,0.106924,1.064921
5,(브라우니),((I)아메리카노_클래식),0.020132,0.200505,1.064921
18,(브라우니),((H)아메리카노_클래식),0.010615,0.105721,1.079579
19,((H)아메리카노_클래식),(브라우니),0.010615,0.108396,1.079579
20,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.009545,0.266509,1.099727
21,"(브라우니, 아이스유자아메리카노_쥬시)",(아이스텐저린라떼_쥬시),0.009545,0.489177,1.191852
22,(아이스텐저린라떼_쥬시),"(브라우니, 아이스유자아메리카노_쥬시)",0.009545,0.023256,1.191852
23,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.009545,0.039387,1.099727
24,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.009432,0.476529,1.161035
25,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.009432,0.022981,1.161035


In [255]:

## 시그니처
association_df[association_df['cat'].isin(select_zip['시그니처'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.121382,0.29574,1.220344
1,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.121382,0.500871,1.220344


In [256]:

## 시그니처+디저트
association_df[association_df['cat'].isin(select_zip['시그니처+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
20,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.009545,0.266509,1.099727
21,"(브라우니, 아이스유자아메리카노_쥬시)",(아이스텐저린라떼_쥬시),0.009545,0.489177,1.191852
22,(아이스텐저린라떼_쥬시),"(브라우니, 아이스유자아메리카노_쥬시)",0.009545,0.023256,1.191852
23,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.009545,0.039387,1.099727
24,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.009432,0.476529,1.161035
25,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.009432,0.022981,1.161035
26,(텐저린카푸치노_쥬시),(잠봉뵈르),0.009094,0.045641,1.052584
27,(잠봉뵈르),(텐저린카푸치노_쥬시),0.009094,0.20974,1.052584
46,"(아이스텐저린라떼_쥬시, 브라우니)",(바스크_치즈케이크),0.005885,0.164308,1.546256
47,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(브라우니),0.005885,0.150903,1.502932


In [257]:

## 에스프레소
association_df[association_df['cat'].isin(select_zip['에스프레소'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
10,(카페_루이지),(슈퍼클린_클래식),0.013431,0.235091,2.145849
11,(슈퍼클린_클래식),(카페_루이지),0.013431,0.122591,2.145849
16,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011516,0.105114,1.284225
17,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011516,0.140695,1.284225
34,(카페_루이지),(슈퍼클린_쥬시),0.008053,0.140956,1.722118
35,(슈퍼클린_쥬시),(카페_루이지),0.008053,0.098383,1.722118


In [258]:

## 에스프레소+디저트
association_df[association_df['cat'].isin(select_zip['에스프레소+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift


In [259]:

## 베이직
association_df[association_df['cat'].isin(select_zip['베이직'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
6,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018611,0.098849,1.400919
7,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018611,0.263767,1.400919
12,((I)아메리카노_클래식),((I)카페라떼_클래식),0.012417,0.065949,1.31071
13,((I)카페라떼_클래식),((I)아메리카노_클래식),0.012417,0.246782,1.31071
28,((H)아메리카노_클래식),((H)카페라떼_클래식),0.00901,0.092007,2.096034
29,((H)카페라떼_클래식),((H)아메리카노_클래식),0.00901,0.20526,2.096034
30,((I)아메리카노_클래식),((I)플랫화이트_클래식),0.008531,0.045312,1.046355
31,((I)플랫화이트_클래식),((I)아메리카노_클래식),0.008531,0.197009,1.046355
32,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.008334,0.085106,2.376288
33,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.008334,0.232704,2.376288


In [260]:

## 베이직+디저트
association_df[association_df['cat'].isin(select_zip['베이직+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.020132,0.106924,1.064921
5,(브라우니),((I)아메리카노_클래식),0.020132,0.200505,1.064921
18,(브라우니),((H)아메리카노_클래식),0.010615,0.105721,1.079579
19,((H)아메리카노_클래식),(브라우니),0.010615,0.108396,1.079579
52,((H)카페라떼_클래식),(바스크_치즈케이크),0.005406,0.123156,1.158984
53,(바스크_치즈케이크),((H)카페라떼_클래식),0.005406,0.050874,1.158984


In [261]:

## 비버리지
association_df[association_df['cat'].isin(select_zip['비버리지'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift


In [262]:

## 비버리지+디저트
association_df[association_df['cat'].isin(select_zip['비버리지+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
44,(브라우니),((I)얼그레이밀크티),0.005941,0.05917,1.298011
45,((I)얼그레이밀크티),(브라우니),0.005941,0.130327,1.298011
