In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
from os.path import join
import matplotlib.pyplot as plt
import datetime as dt
import re
from tabulate import tabulate


## Local Load

In [2]:
# path = join(os.getcwd(), "data")
offline_raw = pd.read_excel("./9._offline_total.xlsx")

## Colab Load

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

# # 데이터 불러오기
# offline_df = pd.read_excel('/content/drive/MyDrive/9. offline_total.xlsx')

# plt.rcParams['font.family'] = 'AppleGothic' # 폰트 변경
# plt.rcParams['axes.unicode_minus'] = False # 축 값 마이너스 깨짐 해결

In [4]:
def convert_to_nan(data):
    
    df = data.copy()
    
    columns = df.columns.to_list()
    for col in columns:
        df[col] = df[col].apply(lambda x: np.nan if x == "-" else x)
    
    return df


def drop_columns(data):
    
    df = data.copy()
    
    rm_columns = ["온라인 스토어", "사용 포인트", "적립 포인트", "사용 선불권", "배달팁(매출 포함x)", "결제메모", "주문 채널"]
    df = df.drop(rm_columns, axis = 1)
    
    return df


def date_conversion(data):
    
    df = data.copy()
    
    df["결제일시"] = df.loc[:, "결제일"] + " " + df.loc[:, "결제시간"]
    df["결제일시"] = pd.to_datetime(df["결제일시"])
    df["year"] = df["결제일시"].apply(lambda x: x.year)
    df["month"] = df["결제일시"].apply(lambda x: x.month)
    df["day"] = df["결제일시"].apply(lambda x: x.day)
    df["hour"] = df["결제일시"].apply(lambda x: x.hour)
    df["day_name"] = df["결제일시"].apply(lambda x: x.day_name())
    df["year_month"] = pd.to_datetime(df["결제일"]).dt.strftime("%Y-%m")
    
    return df


def add_weekend(data):
    
    df = data.copy()
    
    # 평일, 주말 구분
    # 0 = 평일, 1 = 주말
    df["is_weekend"] = df["day_name"].apply(lambda x: 1 if (x == "Sunday") | (x == "Saturday") else 0)
    
    return df


def add_season(data):
    
    df = data.copy()
    
    # 계절 추가하기
    # 봄(3~5월) = 1, 여름(6~8월) = 2, 가을(9~11월0) = 3, 겨울(12~2월) = 4
    seasons = [1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1]
    season_dict = dict(zip(range(1,13), seasons))
    df["season"] = df["month"].map(season_dict)

    return df


def add_holiday(data):
    
    df = data.copy()
    
    conditionlist = [
        (df['결제일'] == '2022-03-01') | (df['결제일'] == '2022-05-05') | (df['결제일'] == '2022-05-08') |
        (df['결제일'] == '2022-06-06') | (df['결제일'] == '2022-08-15') | (df['결제일'] == '2022-09-09') |
        (df['결제일'] == '2022-09-10') | (df['결제일'] == '2022-09-11') | (df['결제일'] == '2022-10-09') |
        (df['결제일'] == '2022-10-03') | (df['결제일'] == '2022-12-25') | 
        (df['결제일'] == '2023-01-01') | (df['결제일'] == '2023-01-21') | (df['결제일'] == '2023-01-22') |
        (df['결제일'] == '2023-01-23') | (df['결제일'] == '2023-03-01') | (df['결제일'] == '2023-05-05') |
        (df['결제일'] == '2023-05-26') | (df['결제일'] == '2023-06-06')]

    choicelist = [1]
    df['is_holiday'] = np.select(conditionlist, choicelist, default= 0)
    df["weekend_n_holiday"] = df["is_weekend"] + df["is_holiday"]
    
    return df

def drop_row(data):
    
    df = data.copy()
    df["상품명"] = df["상품명"].apply(lambda x: re.sub(r"\s", "", x))
    
    drop_lst = ['야외',
                '포장',
                '무료시음권', 
                '캐리어',
                '종이백',
                '포크',
                '⚪️',
                '⚪', # 위 emoji 와 별개
                '일회용컵',
               ]

    custom_lst = ['덜달게',
                  '1샷추가', 
                  '오틀리', 
                  '연하게', 
                  '시럽', 
                  '얼음적게',
                  '오트사이드', 
                  '물적게', 
                  '바닐라시럽', 
                  '2샷추가',
                  '얼음X',
                  '샷추가',
                 ]

    idx = df.loc[df["상품명"].str.contains("|".join(drop_lst)),"상품명"].index
    df = df.drop(idx, axis = 0).reset_index(drop = True)
    
    idx = df.loc[df["상품명"].str.contains("|".join(custom_lst)),"상품명"].index
    df.loc[idx, "카테고리"] = "커스텀"
    
    return df


In [5]:
def preprocess_productname(data):
    tmp = data.copy()
    tmp["카테고리"] = tmp["카테고리"].apply(lambda x: re.sub(r"\s", "", x))
    pattern = r'\s*_\s*'
    
    
    # 카테고리 - basic_ice
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"플랫_", "플랫화이트_", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"템플", "I", x))

    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"][tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
        
    # 카테고리 - basic
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(r"\s|\(H\)", "", x))
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains("아메리카노|카페라떼|플랫화이트|카푸치노|바닐라라떼")].index
    tmp.loc[idx, "상품명"] = tmp.loc[idx, "상품명"].apply(lambda x: "(H)"+x)
    
    
    # 카테고리 - 시그니처
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"아이스텐라", "아이스텐저린라떼", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"유자아메리카노|아이스유자아메리카노", "아이스유자아메리카노", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"][tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[tmp["상품명"] == "텐저린카푸치노", "상품명"] = "텐저린카푸치노_쥬시"
    tmp.loc[tmp["상품명"] == "아이스텐저린라떼", "상품명"] = "아이스텐저린라떼_쥬시"
    tmp.loc[tmp["상품명"] == "아이스유자아메리카노", "상품명"] = "아이스유자아메리카노_쥬시"
        
    # 카테고리 - beverage
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    
    rename_dict = {"차가운어린이우유": "(I)어린이우유",
                   "따뜻한어린이우유": "(H)어린이우유",
                   
                   "얼그레이밀크티": "(H)얼그레이밀크티",
                   
                   "제주유기농귤피주스" : "(I)제주유기농귤피주스",
                   "문경선암리사과주스" : "(I)문경선암리사과주스",
                   "제주유기농감귤주스" : "(I)제주유기농감귤주스",
                   "어린이감귤주스" : "(I)어린이감귤주스",
                   
                   "시나몬플럼" : "(H)시나몬플럼",
                   "트로피칼루이보스" : "(H)트로피칼루이보스",
                   "카모마일" : "(H)카모마일"
                  }
    
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x: rename_dict[x] if x in rename_dict.keys() else x)
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].value_counts()
    
    
    # 카테고리 - 디저트
    tmp.loc[tmp["카테고리"] == "디저트", "상품명"] = tmp.loc[tmp["카테고리"] == "디저트", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 블랜딩원두
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 = 세트
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub(r"\s", '', x))
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub("Set.", "", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "세트", "상품명"][tmp.loc[tmp["카테고리"] == "세트", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"] = tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"].apply(lambda x : "(H)" + x)
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x: "Set_" + x)
    
    
    # 카테고리 - 드립백/캡슐
    tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"] = tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 에스프레소
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_", "스페셜_", "샘플_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"][tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])

    # 카테고리 - 핸드드립, 싱글원두
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].str.strip()
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\)\s', ")", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s*:\s*', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s+', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(강배전\)|\(강\)', "강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(중강배전\)|\(중\)', "중강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(디카프\)', "디카프", x))
    
    tmp["상품명_원산지"] = tmp["상품명"].copy()   
    idx = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"][tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.contains("ㅡ")].index
    tmp = tmp.drop(idx, axis = 0).reset_index(drop = True)

    rename_dict = {"디카페인 우일라 200g" : "디카페인 콜롬비아 우일라 200g",
                   "디카페인콜롬비아 리치 200g" : "디카페인 콜롬비아 리치 200g",
                   "엘리다 카투아이 100g" : "파나마 엘리다 카투아이 100g",
                   "엘리다 카투아이 ASD 100g" : "파나마 엘리다 카투아이 100g",
                   "엘파라이소 디카프 100g" : "콜롬비아 엘파라이소 디카프 100g",
                   "엘파라이소 리치 100g" : "콜롬비아 엘파라이소 리치 100g",
                   "엘파라이소 리치" : "콜롬비아 엘파라이소 리치",
                   "부산제 200g" : "르완다 부산제 200g",
                   "르완다부산제 200g" : "르완다 부산제 200g",
                   "에콰100g" : "에콰도르 100g",
                   "세로아줄 게이샤" : "콜롬비아 세로아줄 게이샤",
                   "페루게이샤" : "페루 게이샤",
                   "페루게이샤 100g" : "페루 게이샤 100g",
                   "니카라과강배전" : "니카라과 강배전",
                   "케냐키티투 200g" : "케냐 키티투 200g",
                   "케냐캄왕기. 200g" : "케냐 캄왕기 200g",
                   "쿠쿠세" : "에티오피아 쿠쿠세",
                   "(할인) 케냐카루만디 200g" : "(할인) 케냐 카루만디 200g",
                   "니카라과핀카케냐바티안" : "니카라과 핀카케냐바티안",
                   "온다라스 엘 케브라초 파라이네마 200g" : "온두라스 엘 케브라초 파라이네마 200g",
                   "페루엘사포테 200g" : "페루 엘사포테 200g",
                   "니카라과리틀 레드 200g" : "니카라과 리틀 레드 200g",
                   "(디카페인)콜롬비아 리치 200g" : "(디카페인) 콜롬비아 리치 200g",
                   "(디카페인)콜롬비아 리치 100g" : "(디카페인) 콜롬비아 리치 100g",
                   "콜룸비니 엘 파라이소 리치 100g" : "콜롬비아 엘 파라이소 리치 100g",
                   "(할인) 디카프 / 콜롬비아 엘 파라이소 리치" : "(할인) 디카프 콜롬비아 엘 파라이소 리치",
                   "[로우카페인] 시티트래블러" : "시티트래블러 로우카페인",

                   "(I)디카프_에티오피아" : "(I)에티오피아_디카프",
                   "(H)디카프_에티오피아" : "(H)에티오피아_디카프",
                   "(H)과테_레드_파카마라" : "(H)과테말라_레드_파카마라",
                   "(I)과테_레드_파카마라" : "(I)과테말라_레드_파카마라",
                   "(I)과테말라엘모리또" : "(I)과테말라_엘모리또",
                   "(H)과테말라엘모리또" : "(H)과테말라_엘모리또",
                   "(H)케냐띠리쿠" : "(H)케냐_띠리쿠",
                   "(H)콰트로_콜롬비아" : "(H)콜롬비아_콰트로",
                   "(I)콰트로_콜롬비아" : "(I)콜롬비아_콰트로",
                   "(H)디카페인_콜롬비아" : "(H)콜롬비아_디카페인",
                   "(I)디카페인_콜롬비아" : "(I)콜롬비아_디카페인",
                   "(H)디카페인_니카라과" : "(H)니카라과_디카페인",
                   "(I)디카페인_니카라과" : "(I)니카라과_디카페인",
                   "(I)디카페인_에티오피아" : "(I)에티오피아_디카페인",
                   "(H)디카페인_에티오피아" : "(H)에티오피아_디카페인",
                   "(I)오늘의커피" : "(I)오늘의_커피",
                   "(H)오늘의커피" : "(H)오늘의_커피"
                }
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[:, "상품명_원산지"] = tmp.loc[:, "상품명_원산지"].replace(rename_dict)
    
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub("예맨", "예멘", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x: x.split("_")[0])
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: re.sub(pattern, " ", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub("[()]", "", x)) 
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"_?[0-9]*g|할인|강배전", "", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"\s", "_", x))
    
    lst = ["디카페인_", "디카프_", "콰트로_"]
    for i in lst:
        tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x[len(i):] + "_" + i[:-1] if x[:len(i)] == i else x)
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x.split("_")[0])

    
    return tmp

In [6]:
pd.options.display.max_columns = None

# offline_raw = pd.read_excel(join(path, "9. offline_total.xlsx"))
offline_df = convert_to_nan(offline_raw)

offline_df = drop_columns(offline_df)
offline_df = date_conversion(offline_df)
offline_df = add_weekend(offline_df)
offline_df = add_season(offline_df)
offline_df = add_holiday(offline_df)
offline_df = preprocess_productname(offline_df)
offline_df = drop_row(offline_df)

offline_df = offline_df[offline_df['is_weekend'] == 0].reset_index(drop = True)



# apriori

휘낭시에 쌍 포함, 휘낭시에 쌍 제거
* 전체(밑에 있는 명단 전부)
* 커피류(핸드드립 제외, 비버리지 제외)
* 커피류 + 디저트
* 시그니처
* 시그니처 + 디저트
* 에스프레소
* 에스프레소 + 디저트
* 베이직(베이직 아이스 포함)
* 베이직 + 디저트
* 비버리지
* 비버리지 + 디저트


In [7]:
# 커피류('시그니처','Basic_ice','에스프레소','Basic')

In [8]:
offline_df['카테고리2'] = offline_df['카테고리'].replace({'시그니처' : '시그니처', 'Basic_ice' : '베이직', 
                                                     '에스프레소' : '에스프레소', 'Basic' : '베이직'})

offline_df2 = offline_df[offline_df['카테고리2'].isin(['에스프레소', '비버리지', '시그니처', '디저트', '베이직'])]

menu_df_set = pd.read_csv('./menu_repl_df.csv')

offline_df2 = pd.merge(offline_df2, menu_df_set[['상품명', '상품재분류']], how = 'left')

### 1. 휘낭시에 쌍 포함

In [9]:
from matplotlib.colors import LinearSegmentedColormap
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [10]:
records = offline_df2[["결제일시", "상품명", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품명"].value_counts()


records = records.groupby("결제일시").agg({"상품명" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품명"].apply(lambda x : True if len(x) > 1 else False)]

In [11]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품명"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)

In [12]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)

Processing 16 combinations | Sampling itemset size 4 3


In [13]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
16,(무화과_휘낭시에),(플레인_휘낭시에),0.040308,0.041562,0.012586,0.312241,7.512623,0.010910,1.393566
17,(플레인_휘낭시에),(무화과_휘낭시에),0.041562,0.040308,0.012586,0.302817,7.512623,0.010910,1.376528
27,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.094121,0.037172,0.008948,0.095069,2.557555,0.005449,1.063980
26,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.037172,0.094121,0.008948,0.240720,2.557555,0.005449,1.193076
12,(카페_루이지),(슈퍼클린_클래식),0.060420,0.107083,0.014175,0.234602,2.190841,0.007705,1.166605
...,...,...,...,...,...,...,...,...,...
50,(아이스텐저린라떼_쥬시),((I)제주유기농감귤주스),0.403830,0.014802,0.006063,0.015013,1.014299,0.000085,1.000215
41,(플레인_휘낭시에),(텐저린카푸치노_쥬시),0.041562,0.197566,0.008237,0.198189,1.003152,0.000026,1.000777
40,(텐저린카푸치노_쥬시),(플레인_휘낭시에),0.197566,0.041562,0.008237,0.041693,1.003152,0.000026,1.000137
8,(텐저린카푸치노_쥬시),(슈퍼클린_쥬시),0.197566,0.084839,0.016767,0.084868,1.000343,0.000006,1.000032


In [14]:
offline_dfset= offline_df2[['상품명', '상품재분류', '카테고리2']].drop_duplicates()

repl_dict = {}

for x, y in offline_dfset[['상품명', '카테고리2']].values:
    repl_dict[x] = y
    
res = []
for x in association_df['antecedents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
    res.append('_'.join(tmp))
len(res)
association_df['antecedents_cat'] = res

res = []
for x in association_df['consequents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
        
    res.append('_'.join(tmp))
association_df['consequents_cat'] = res

association_df['cat'] = association_df['antecedents_cat']+'_'+association_df['consequents_cat']

전체

In [15]:
select_zip = {'커피류' : ['베이직_베이직', '에스프레소_에스프레소', '에스프레소_시그니처', '시그니처_에스프레소', '시그니처_시그니처'],
 '커피류+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', '베이직_디저트', '디저트_베이직',
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '시그니처' : ['시그니처_시그니처'],
 '시그니처+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', 
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '에스프레소' : ['에스프레소_에스프레소'],
 '에스프레소+디저트' : [],
 '베이직' : ['베이직_베이직'],
 '베이직+디저트' : ['베이직_디저트', '디저트_베이직'],
 '비버리지' : [],
 '비버리지+디저트' : ['비버리지_디저트', '디저트_비버리지']}

커피류

In [16]:
association_df[association_df['cat'].isin(select_zip['커피류'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.11854,0.49702,1.230764
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.11854,0.293539,1.230764
2,(텐저린카푸치노_쥬시),(슈퍼클린_클래식),0.02283,0.115556,1.07912
3,(슈퍼클린_클래식),(텐저린카푸치노_쥬시),0.02283,0.213198,1.07912
6,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.017896,0.264361,1.469654
7,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.017896,0.099489,1.469654
8,(텐저린카푸치노_쥬시),(슈퍼클린_쥬시),0.016767,0.084868,1.000343
9,(슈퍼클린_쥬시),(텐저린카푸치노_쥬시),0.016767,0.197634,1.000343
12,(카페_루이지),(슈퍼클린_클래식),0.014175,0.234602,2.190841
13,(슈퍼클린_클래식),(카페_루이지),0.014175,0.13237,2.190841


커피류+디저트

In [17]:
association_df[association_df['cat'].isin(select_zip['커피류+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.018314,0.101813,1.096336
5,(브라우니),((I)아메리카노_클래식),0.018314,0.197208,1.096336
24,((H)아메리카노_클래식),(브라우니),0.009115,0.096846,1.042848
25,(브라우니),((H)아메리카노_클래식),0.009115,0.098154,1.042848
28,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.008823,0.497642,1.232304
29,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.008823,0.270166,1.132767
30,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.008823,0.036992,1.132767
31,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 브라우니)",0.008823,0.021847,1.232304
34,(텐저린카푸치노_쥬시),(잠봉뵈르),0.008572,0.043386,1.031437
35,(잠봉뵈르),(텐저린카푸치노_쥬시),0.008572,0.203777,1.031437


시그니처

In [18]:
association_df[association_df['cat'].isin(select_zip['시그니처'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.11854,0.49702,1.230764
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.11854,0.293539,1.230764


시그니처+디저트

In [19]:
association_df[association_df['cat'].isin(select_zip['시그니처+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
28,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.008823,0.497642,1.232304
29,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.008823,0.270166,1.132767
30,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.008823,0.036992,1.132767
31,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 브라우니)",0.008823,0.021847,1.232304
34,(텐저린카푸치노_쥬시),(잠봉뵈르),0.008572,0.043386,1.031437
35,(잠봉뵈르),(텐저린카푸치노_쥬시),0.008572,0.203777,1.031437
38,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.008321,0.471564,1.167729
39,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.008321,0.020605,1.167729
40,(텐저린카푸치노_쥬시),(플레인_휘낭시에),0.008237,0.041693,1.003152
41,(플레인_휘낭시에),(텐저린카푸치노_쥬시),0.008237,0.198189,1.003152


에스프레소

In [20]:
association_df[association_df['cat'].isin(select_zip['에스프레소'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
12,(카페_루이지),(슈퍼클린_클래식),0.014175,0.234602,2.190841
13,(슈퍼클린_클래식),(카페_루이지),0.014175,0.13237,2.190841
20,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011039,0.103085,1.215069
21,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011039,0.130113,1.215069
22,(카페_루이지),(슈퍼클린_쥬시),0.009199,0.152249,1.794574
23,(슈퍼클린_쥬시),(카페_루이지),0.009199,0.108428,1.794574


에스프레소+디저트

In [21]:
association_df[association_df['cat'].isin(select_zip['에스프레소+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift


베이직

In [22]:
association_df[association_df['cat'].isin(select_zip['베이직'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
6,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.017896,0.264361,1.469654
7,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.017896,0.099489,1.469654
18,((I)카페라떼_클래식),((I)아메리카노_클래식),0.01108,0.235975,1.31185
19,((I)아메리카노_클래식),((I)카페라떼_클래식),0.01108,0.061599,1.31185
26,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.008948,0.24072,2.557555
27,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.008948,0.095069,2.557555
32,((H)카페라떼_클래식),((H)아메리카노_클래식),0.008781,0.203883,2.166183
33,((H)아메리카노_클래식),((H)카페라떼_클래식),0.008781,0.093292,2.166183
42,((I)플랫화이트_클래식),((I)아메리카노_클래식),0.008112,0.189084,1.051169
43,((I)아메리카노_클래식),((I)플랫화이트_클래식),0.008112,0.045095,1.051169


베이직+디저트

In [23]:
association_df[association_df['cat'].isin(select_zip['베이직+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.018314,0.101813,1.096336
5,(브라우니),((I)아메리카노_클래식),0.018314,0.197208,1.096336
24,((H)아메리카노_클래식),(브라우니),0.009115,0.096846,1.042848
25,(브라우니),((H)아메리카노_클래식),0.009115,0.098154,1.042848


비버리지

In [24]:
association_df[association_df['cat'].isin(select_zip['비버리지'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift


비버리지+디저트

In [25]:
association_df[association_df['cat'].isin(select_zip['비버리지+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
52,((I)제주유기농귤피주스),(바스크_치즈케이크),0.005937,0.101356,1.039912
53,(바스크_치즈케이크),((I)제주유기농귤피주스),0.005937,0.060918,1.039912
58,((I)얼그레이밀크티),(브라우니),0.005478,0.121747,1.310989
59,(브라우니),((I)얼그레이밀크티),0.005478,0.058982,1.310989


### 2. 휘낭시에 쌍 제거 

In [26]:
records = offline_df2[offline_df2['상품재분류'] != '휘낭시에'][["결제일시", "상품명", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품명"].value_counts()


records = records.groupby("결제일시").agg({"상품명" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품명"].apply(lambda x : True if len(x) > 1 else False)]

In [27]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품명"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)

In [28]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)

Processing 16 combinations | Sampling itemset size 4 3


In [29]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
27,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.096034,0.037888,0.009224,0.096050,2.535115,0.005586,1.064342
26,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.037888,0.096034,0.009224,0.243458,2.535115,0.005586,1.194866
32,((H)카페라떼_클래식),((H)아메리카노_클래식),0.043621,0.096034,0.009052,0.207510,2.160785,0.004863,1.140665
33,((H)아메리카노_클래식),((H)카페라떼_클래식),0.096034,0.043621,0.009052,0.094255,2.160785,0.004863,1.055903
15,(슈퍼클린_클래식),(카페_루이지),0.109526,0.061940,0.014612,0.133412,2.153904,0.007828,1.082476
...,...,...,...,...,...,...,...,...,...
50,((I)아메리카노_클래식),((I)문경선암리사과주스),0.183448,0.032586,0.006078,0.033130,1.016679,0.000100,1.000562
5,(바스크_치즈케이크),(텐저린카푸치노_쥬시),0.099009,0.199397,0.019784,0.199826,1.002153,0.000043,1.000537
4,(텐저린카푸치노_쥬시),(바스크_치즈케이크),0.199397,0.099009,0.019784,0.099222,1.002153,0.000043,1.000237
11,(슈퍼클린_쥬시),(텐저린카푸치노_쥬시),0.086595,0.199397,0.017284,0.199602,1.001029,0.000018,1.000256


In [30]:
offline_dfset= offline_df2[['상품명', '상품재분류', '카테고리2']].drop_duplicates()

repl_dict = {}

for x, y in offline_dfset[['상품명', '카테고리2']].values:
    repl_dict[x] = y
    
res = []
for x in association_df['antecedents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
    res.append('_'.join(tmp))
len(res)
association_df['antecedents_cat'] = res

res = []
for x in association_df['consequents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
        
    res.append('_'.join(tmp))
association_df['consequents_cat'] = res

association_df['cat'] = association_df['antecedents_cat']+'_'+association_df['consequents_cat']

In [31]:
## 전체

select_zip = {'커피류' : ['베이직_베이직', '에스프레소_에스프레소', '에스프레소_시그니처', '시그니처_에스프레소', '시그니처_시그니처'],
 '커피류+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', '베이직_디저트', '디저트_베이직',
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '시그니처' : ['시그니처_시그니처'],
 '시그니처+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', 
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '에스프레소' : ['에스프레소_에스프레소'],
 '에스프레소+디저트' : [],
 '베이직' : ['베이직_베이직'],
 '베이직+디저트' : ['베이직_디저트', '디저트_베이직'],
 '비버리지' : [],
 '비버리지+디저트' : ['비버리지_디저트', '디저트_비버리지']}

## 커피류 
association_df[association_df['cat'].isin(select_zip['커피류'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.122198,0.502392,1.223161
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.122198,0.297513,1.223161
2,(텐저린카푸치노_쥬시),(슈퍼클린_클래식),0.023534,0.118029,1.077632
3,(슈퍼클린_클래식),(텐저린카푸치노_쥬시),0.023534,0.214876,1.077632
8,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018448,0.265509,1.447322
9,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018448,0.100564,1.447322
10,(텐저린카푸치노_쥬시),(슈퍼클린_쥬시),0.017284,0.086684,1.001029
11,(슈퍼클린_쥬시),(텐저린카푸치노_쥬시),0.017284,0.199602,1.001029
14,(카페_루이지),(슈퍼클린_클래식),0.014612,0.235908,2.153904
15,(슈퍼클린_클래식),(카페_루이지),0.014612,0.133412,2.153904


In [32]:

## 커피류+디저트
association_df[association_df['cat'].isin(select_zip['커피류+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
4,(텐저린카푸치노_쥬시),(바스크_치즈케이크),0.019784,0.099222,1.002153
5,(바스크_치즈케이크),(텐저린카푸치노_쥬시),0.019784,0.199826,1.002153
6,((I)아메리카노_클래식),(브라우니),0.018879,0.102914,1.088734
7,(브라우니),((I)아메리카노_클래식),0.018879,0.199726,1.088734
24,((H)아메리카노_클래식),(브라우니),0.009397,0.097846,1.03512
25,(브라우니),((H)아메리카노_클래식),0.009397,0.099407,1.03512
28,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.009095,0.497642,1.211594
29,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.009095,0.270166,1.110732
30,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.009095,0.037391,1.110732
31,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 브라우니)",0.009095,0.022143,1.211594


In [33]:

## 시그니처
association_df[association_df['cat'].isin(select_zip['시그니처'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.122198,0.502392,1.223161
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.122198,0.297513,1.223161


In [34]:

## 시그니처+디저트
association_df[association_df['cat'].isin(select_zip['시그니처+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
4,(텐저린카푸치노_쥬시),(바스크_치즈케이크),0.019784,0.099222,1.002153
5,(바스크_치즈케이크),(텐저린카푸치노_쥬시),0.019784,0.199826,1.002153
28,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.009095,0.497642,1.211594
29,"(아이스텐저린라떼_쥬시, 브라우니)",(아이스유자아메리카노_쥬시),0.009095,0.270166,1.110732
30,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 브라우니)",0.009095,0.037391,1.110732
31,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 브라우니)",0.009095,0.022143,1.211594
34,(텐저린카푸치노_쥬시),(잠봉뵈르),0.008836,0.044315,1.030162
35,(잠봉뵈르),(텐저린카푸치노_쥬시),0.008836,0.205411,1.030162
38,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.008578,0.471564,1.148104
39,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.008578,0.020884,1.148104


In [35]:

## 에스프레소
association_df[association_df['cat'].isin(select_zip['에스프레소'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
14,(카페_루이지),(슈퍼클린_클래식),0.014612,0.235908,2.153904
15,(슈퍼클린_클래식),(카페_루이지),0.014612,0.133412,2.153904
20,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011379,0.103896,1.199796
21,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011379,0.131409,1.199796
22,(카페_루이지),(슈퍼클린_쥬시),0.009483,0.153097,1.767966
23,(슈퍼클린_쥬시),(카페_루이지),0.009483,0.109507,1.767966


In [36]:

## 에스프레소+디저트
association_df[association_df['cat'].isin(select_zip['에스프레소+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift


In [37]:

## 베이직
association_df[association_df['cat'].isin(select_zip['베이직'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
8,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018448,0.265509,1.447322
9,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018448,0.100564,1.447322
18,((I)카페라떼_클래식),((I)아메리카노_클래식),0.011422,0.238954,1.302569
19,((I)아메리카노_클래식),((I)카페라떼_클래식),0.011422,0.062265,1.302569
26,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.009224,0.243458,2.535115
27,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.009224,0.09605,2.535115
32,((H)카페라떼_클래식),((H)아메리카노_클래식),0.009052,0.20751,2.160785
33,((H)아메리카노_클래식),((H)카페라떼_클래식),0.009052,0.094255,2.160785
40,((I)플랫화이트_클래식),((I)아메리카노_클래식),0.008362,0.189638,1.033743
41,((I)아메리카노_클래식),((I)플랫화이트_클래식),0.008362,0.045583,1.033743


In [38]:

## 베이직+디저트
association_df[association_df['cat'].isin(select_zip['베이직+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
6,((I)아메리카노_클래식),(브라우니),0.018879,0.102914,1.088734
7,(브라우니),((I)아메리카노_클래식),0.018879,0.199726,1.088734
24,((H)아메리카노_클래식),(브라우니),0.009397,0.097846,1.03512
25,(브라우니),((H)아메리카노_클래식),0.009397,0.099407,1.03512
60,(바스크_치즈케이크),((H)카페라떼_클래식),0.005086,0.051371,1.177683
61,((H)카페라떼_클래식),(바스크_치즈케이크),0.005086,0.116601,1.177683


In [39]:

## 비버리지
association_df[association_df['cat'].isin(select_zip['비버리지'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift


In [40]:

## 비버리지+디저트
association_df[association_df['cat'].isin(select_zip['비버리지+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
48,((I)제주유기농귤피주스),(바스크_치즈케이크),0.006121,0.101719,1.027377
49,(바스크_치즈케이크),((I)제주유기농귤피주스),0.006121,0.06182,1.027377
54,((I)얼그레이밀크티),(브라우니),0.005647,0.122201,1.292784
55,(브라우니),((I)얼그레이밀크티),0.005647,0.059736,1.292784
