In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
from os.path import join
import matplotlib.pyplot as plt
import datetime as dt
import re
from tabulate import tabulate


## Local Load

In [2]:
# path = join(os.getcwd(), "data")
offline_raw = pd.read_excel("./9._offline_total.xlsx")

## Colab Load

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

# # 데이터 불러오기
# offline_df = pd.read_excel('/content/drive/MyDrive/9. offline_total.xlsx')

# plt.rcParams['font.family'] = 'AppleGothic' # 폰트 변경
# plt.rcParams['axes.unicode_minus'] = False # 축 값 마이너스 깨짐 해결

In [4]:
def convert_to_nan(data):
    
    df = data.copy()
    
    columns = df.columns.to_list()
    for col in columns:
        df[col] = df[col].apply(lambda x: np.nan if x == "-" else x)
    
    return df


def drop_columns(data):
    
    df = data.copy()
    
    rm_columns = ["온라인 스토어", "사용 포인트", "적립 포인트", "사용 선불권", "배달팁(매출 포함x)", "결제메모", "주문 채널"]
    df = df.drop(rm_columns, axis = 1)
    
    return df


def date_conversion(data):
    
    df = data.copy()
    
    df["결제일시"] = df.loc[:, "결제일"] + " " + df.loc[:, "결제시간"]
    df["결제일시"] = pd.to_datetime(df["결제일시"])
    df["year"] = df["결제일시"].apply(lambda x: x.year)
    df["month"] = df["결제일시"].apply(lambda x: x.month)
    df["day"] = df["결제일시"].apply(lambda x: x.day)
    df["hour"] = df["결제일시"].apply(lambda x: x.hour)
    df["day_name"] = df["결제일시"].apply(lambda x: x.day_name())
    df["year_month"] = pd.to_datetime(df["결제일"]).dt.strftime("%Y-%m")
    
    return df


def add_weekend(data):
    
    df = data.copy()
    
    # 평일, 주말 구분
    # 0 = 평일, 1 = 주말
    df["is_weekend"] = df["day_name"].apply(lambda x: 1 if (x == "Sunday") | (x == "Saturday") else 0)
    
    return df


def add_season(data):
    
    df = data.copy()
    
    # 계절 추가하기
    # 봄(3~5월) = 1, 여름(6~8월) = 2, 가을(9~11월0) = 3, 겨울(12~2월) = 4
    seasons = [1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1]
    season_dict = dict(zip(range(1,13), seasons))
    df["season"] = df["month"].map(season_dict)

    return df


def add_holiday(data):
    
    df = data.copy()
    
    conditionlist = [
        (df['결제일'] == '2022-03-01') | (df['결제일'] == '2022-05-05') | (df['결제일'] == '2022-05-08') |
        (df['결제일'] == '2022-06-06') | (df['결제일'] == '2022-08-15') | (df['결제일'] == '2022-09-09') |
        (df['결제일'] == '2022-09-10') | (df['결제일'] == '2022-09-11') | (df['결제일'] == '2022-10-09') |
        (df['결제일'] == '2022-10-03') | (df['결제일'] == '2022-12-25') | 
        (df['결제일'] == '2023-01-01') | (df['결제일'] == '2023-01-21') | (df['결제일'] == '2023-01-22') |
        (df['결제일'] == '2023-01-23') | (df['결제일'] == '2023-03-01') | (df['결제일'] == '2023-05-05') |
        (df['결제일'] == '2023-05-26') | (df['결제일'] == '2023-06-06')]

    choicelist = [1]
    df['is_holiday'] = np.select(conditionlist, choicelist, default= 0)
    df["weekend_n_holiday"] = df["is_weekend"] + df["is_holiday"]
    
    return df

def drop_row(data):
    
    df = data.copy()
    df["상품명"] = df["상품명"].apply(lambda x: re.sub(r"\s", "", x))
    
    drop_lst = ['야외',
                '포장',
                '무료시음권', 
                '캐리어',
                '종이백',
                '포크',
                '⚪️',
                '⚪', # 위 emoji 와 별개
                '일회용컵',
               ]

    custom_lst = ['덜달게',
                  '1샷추가', 
                  '오틀리', 
                  '연하게', 
                  '시럽', 
                  '얼음적게',
                  '오트사이드', 
                  '물적게', 
                  '바닐라시럽', 
                  '2샷추가',
                  '얼음X',
                  '샷추가',
                 ]

    idx = df.loc[df["상품명"].str.contains("|".join(drop_lst)),"상품명"].index
    df = df.drop(idx, axis = 0).reset_index(drop = True)
    
    idx = df.loc[df["상품명"].str.contains("|".join(custom_lst)),"상품명"].index
    df.loc[idx, "카테고리"] = "커스텀"
    
    return df


In [5]:
def preprocess_productname(data):
    tmp = data.copy()
    tmp["카테고리"] = tmp["카테고리"].apply(lambda x: re.sub(r"\s", "", x))
    pattern = r'\s*_\s*'
    
    
    # 카테고리 - basic_ice
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"플랫_", "플랫화이트_", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"템플", "I", x))

    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"][tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
        
    # 카테고리 - basic
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(r"\s|\(H\)", "", x))
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains("아메리카노|카페라떼|플랫화이트|카푸치노|바닐라라떼")].index
    tmp.loc[idx, "상품명"] = tmp.loc[idx, "상품명"].apply(lambda x: "(H)"+x)
    
    
    # 카테고리 - 시그니처
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"아이스텐라", "아이스텐저린라떼", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"유자아메리카노|아이스유자아메리카노", "아이스유자아메리카노", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"][tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[tmp["상품명"] == "텐저린카푸치노", "상품명"] = "텐저린카푸치노_쥬시"
    tmp.loc[tmp["상품명"] == "아이스텐저린라떼", "상품명"] = "아이스텐저린라떼_쥬시"
    tmp.loc[tmp["상품명"] == "아이스유자아메리카노", "상품명"] = "아이스유자아메리카노_쥬시"
        
    # 카테고리 - beverage
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    
    rename_dict = {"차가운어린이우유": "(I)어린이우유",
                   "따뜻한어린이우유": "(H)어린이우유",
                   
                   "얼그레이밀크티": "(H)얼그레이밀크티",
                   
                   "제주유기농귤피주스" : "(I)제주유기농귤피주스",
                   "문경선암리사과주스" : "(I)문경선암리사과주스",
                   "제주유기농감귤주스" : "(I)제주유기농감귤주스",
                   "어린이감귤주스" : "(I)어린이감귤주스",
                   
                   "시나몬플럼" : "(H)시나몬플럼",
                   "트로피칼루이보스" : "(H)트로피칼루이보스",
                   "카모마일" : "(H)카모마일"
                  }
    
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x: rename_dict[x] if x in rename_dict.keys() else x)
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].value_counts()
    
    
    # 카테고리 - 디저트
    tmp.loc[tmp["카테고리"] == "디저트", "상품명"] = tmp.loc[tmp["카테고리"] == "디저트", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 블랜딩원두
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 = 세트
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub(r"\s", '', x))
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub("Set.", "", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "세트", "상품명"][tmp.loc[tmp["카테고리"] == "세트", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"] = tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"].apply(lambda x : "(H)" + x)
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x: "Set_" + x)
    
    
    # 카테고리 - 드립백/캡슐
    tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"] = tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 에스프레소
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_", "스페셜_", "샘플_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"][tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])

    # 카테고리 - 핸드드립, 싱글원두
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].str.strip()
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\)\s', ")", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s*:\s*', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\s+', "_", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(강배전\)|\(강\)', "강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(중강배전\)|\(중\)', "중강배전", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명"].apply(lambda x: re.sub(r'\(디카프\)', "디카프", x))
    
    tmp["상품명_원산지"] = tmp["상품명"].copy()   
    idx = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"][tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.contains("ㅡ")].index
    tmp = tmp.drop(idx, axis = 0).reset_index(drop = True)

    rename_dict = {"디카페인 우일라 200g" : "디카페인 콜롬비아 우일라 200g",
                   "디카페인콜롬비아 리치 200g" : "디카페인 콜롬비아 리치 200g",
                   "엘리다 카투아이 100g" : "파나마 엘리다 카투아이 100g",
                   "엘리다 카투아이 ASD 100g" : "파나마 엘리다 카투아이 100g",
                   "엘파라이소 디카프 100g" : "콜롬비아 엘파라이소 디카프 100g",
                   "엘파라이소 리치 100g" : "콜롬비아 엘파라이소 리치 100g",
                   "엘파라이소 리치" : "콜롬비아 엘파라이소 리치",
                   "부산제 200g" : "르완다 부산제 200g",
                   "르완다부산제 200g" : "르완다 부산제 200g",
                   "에콰100g" : "에콰도르 100g",
                   "세로아줄 게이샤" : "콜롬비아 세로아줄 게이샤",
                   "페루게이샤" : "페루 게이샤",
                   "페루게이샤 100g" : "페루 게이샤 100g",
                   "니카라과강배전" : "니카라과 강배전",
                   "케냐키티투 200g" : "케냐 키티투 200g",
                   "케냐캄왕기. 200g" : "케냐 캄왕기 200g",
                   "쿠쿠세" : "에티오피아 쿠쿠세",
                   "(할인) 케냐카루만디 200g" : "(할인) 케냐 카루만디 200g",
                   "니카라과핀카케냐바티안" : "니카라과 핀카케냐바티안",
                   "온다라스 엘 케브라초 파라이네마 200g" : "온두라스 엘 케브라초 파라이네마 200g",
                   "페루엘사포테 200g" : "페루 엘사포테 200g",
                   "니카라과리틀 레드 200g" : "니카라과 리틀 레드 200g",
                   "(디카페인)콜롬비아 리치 200g" : "(디카페인) 콜롬비아 리치 200g",
                   "(디카페인)콜롬비아 리치 100g" : "(디카페인) 콜롬비아 리치 100g",
                   "콜룸비니 엘 파라이소 리치 100g" : "콜롬비아 엘 파라이소 리치 100g",
                   "(할인) 디카프 / 콜롬비아 엘 파라이소 리치" : "(할인) 디카프 콜롬비아 엘 파라이소 리치",
                   "[로우카페인] 시티트래블러" : "시티트래블러 로우카페인",

                   "(I)디카프_에티오피아" : "(I)에티오피아_디카프",
                   "(H)디카프_에티오피아" : "(H)에티오피아_디카프",
                   "(H)과테_레드_파카마라" : "(H)과테말라_레드_파카마라",
                   "(I)과테_레드_파카마라" : "(I)과테말라_레드_파카마라",
                   "(I)과테말라엘모리또" : "(I)과테말라_엘모리또",
                   "(H)과테말라엘모리또" : "(H)과테말라_엘모리또",
                   "(H)케냐띠리쿠" : "(H)케냐_띠리쿠",
                   "(H)콰트로_콜롬비아" : "(H)콜롬비아_콰트로",
                   "(I)콰트로_콜롬비아" : "(I)콜롬비아_콰트로",
                   "(H)디카페인_콜롬비아" : "(H)콜롬비아_디카페인",
                   "(I)디카페인_콜롬비아" : "(I)콜롬비아_디카페인",
                   "(H)디카페인_니카라과" : "(H)니카라과_디카페인",
                   "(I)디카페인_니카라과" : "(I)니카라과_디카페인",
                   "(I)디카페인_에티오피아" : "(I)에티오피아_디카페인",
                   "(H)디카페인_에티오피아" : "(H)에티오피아_디카페인",
                   "(I)오늘의커피" : "(I)오늘의_커피",
                   "(H)오늘의커피" : "(H)오늘의_커피"
                }
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[:, "상품명_원산지"] = tmp.loc[:, "상품명_원산지"].replace(rename_dict)
    
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub("예맨", "예멘", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x))
    tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "핸드드립", "상품명_원산지"].apply(lambda x: x.split("_")[0])
    
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: re.sub(pattern, " ", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub("[()]", "", x)) 
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"_?[0-9]*g|할인|강배전", "", x))
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].str.strip()
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x : re.sub(r"\s", "_", x))
    
    lst = ["디카페인_", "디카프_", "콰트로_"]
    for i in lst:
        tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x[len(i):] + "_" + i[:-1] if x[:len(i)] == i else x)
    tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"] = tmp.loc[tmp["카테고리"] == "싱글원두", "상품명_원산지"].apply(lambda x: x.split("_")[0])

    
    return tmp

In [6]:
pd.options.display.max_columns = None

# offline_raw = pd.read_excel(join(path, "9. offline_total.xlsx"))
offline_df = convert_to_nan(offline_raw)

offline_df = drop_columns(offline_df)
offline_df = date_conversion(offline_df)
offline_df = add_weekend(offline_df)
offline_df = add_season(offline_df)
offline_df = add_holiday(offline_df)
offline_df = preprocess_productname(offline_df)
offline_df = drop_row(offline_df)

offline_df = offline_df[offline_df['is_weekend'] == 1].reset_index(drop = True)



# apriori

휘낭시에 쌍 포함, 휘낭시에 쌍 제거
* 전체(밑에 있는 명단 전부)
* 커피류(핸드드립 제외, 비버리지 제외)
* 커피류 + 디저트
* 시그니처
* 시그니처 + 디저트
* 에스프레소
* 에스프레소 + 디저트
* 베이직(베이직 아이스 포함)
* 베이직 + 디저트
* 비버리지
* 비버리지 + 디저트


In [7]:
# 커피류('시그니처','Basic_ice','에스프레소','Basic')

In [8]:
offline_df['카테고리2'] = offline_df['카테고리'].replace({'시그니처' : '시그니처', 'Basic_ice' : '베이직', 
                                                     '에스프레소' : '에스프레소', 'Basic' : '베이직'})

offline_df2 = offline_df[offline_df['카테고리2'].isin(['에스프레소', '비버리지', '시그니처', '디저트', '베이직'])]

menu_df_set = pd.read_csv('./menu_repl_df.csv')

offline_df2 = pd.merge(offline_df2, menu_df_set[['상품명', '상품재분류']], how = 'left')

### 1. 휘낭시에 쌍 포함

In [9]:
from matplotlib.colors import LinearSegmentedColormap
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [10]:
records = offline_df2[["결제일시", "상품명", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품명"].value_counts()


records = records.groupby("결제일시").agg({"상품명" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품명"].apply(lambda x : True if len(x) > 1 else False)]

In [11]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품명"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)

In [12]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)

Processing 2862 combinations | Sampling itemset size 2Processing 2076 combinations | Sampling itemset size 3Processing 24 combinations | Sampling itemset size 4


In [13]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
70,(무화과_휘낭시에),(레몬_휘낭시에),0.045455,0.013043,0.005296,0.116522,8.933333,0.004704,1.117126
71,(레몬_휘낭시에),(무화과_휘낭시에),0.013043,0.045455,0.005296,0.406061,8.933333,0.004704,1.607143
10,(플레인_휘낭시에),(무화과_휘낭시에),0.045455,0.045455,0.015020,0.330435,7.269565,0.012954,1.425620
11,(무화과_휘낭시에),(플레인_휘낭시에),0.045455,0.045455,0.015020,0.330435,7.269565,0.012954,1.425620
18,(카페_루이지),(슈퍼클린_클래식),0.047036,0.108142,0.010909,0.231933,2.144700,0.005823,1.161171
...,...,...,...,...,...,...,...,...,...
3,(텐저린카푸치노_쥬시),(슈퍼클린_클래식),0.196285,0.108142,0.022213,0.113170,1.046487,0.000987,1.005669
5,(브라우니),((I)아메리카노_클래식),0.109881,0.193755,0.021897,0.199281,1.028519,0.000607,1.006901
4,((I)아메리카노_클래식),(브라우니),0.193755,0.109881,0.021897,0.113015,1.028519,0.000607,1.003533
39,((I)문경선암리사과주스),((I)아메리카노_클래식),0.037075,0.193755,0.007194,0.194030,1.001419,0.000010,1.000341


In [14]:
offline_dfset= offline_df2[['상품명', '상품재분류', '카테고리2']].drop_duplicates()

repl_dict = {}

for x, y in offline_dfset[['상품명', '카테고리2']].values:
    repl_dict[x] = y
    
res = []
for x in association_df['antecedents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
    res.append('_'.join(tmp))
len(res)
association_df['antecedents_cat'] = res

res = []
for x in association_df['consequents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
        
    res.append('_'.join(tmp))
association_df['consequents_cat'] = res

association_df['cat'] = association_df['antecedents_cat']+'_'+association_df['consequents_cat']

전체

In [15]:
select_zip = {'커피류' : ['베이직_베이직', '에스프레소_에스프레소', '에스프레소_시그니처', '시그니처_에스프레소', '시그니처_시그니처'],
 '커피류+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', '베이직_디저트', '디저트_베이직',
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '시그니처' : ['시그니처_시그니처'],
 '시그니처+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', 
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '에스프레소' : ['에스프레소_에스프레소'],
 '에스프레소+디저트' : [],
 '베이직' : ['베이직_베이직'],
 '베이직+디저트' : ['베이직_디저트', '디저트_베이직'],
 '비버리지' : [],
 '비버리지+디저트' : ['비버리지_디저트', '디저트_비버리지']}

커피류

In [16]:
association_df[association_df['cat'].isin(select_zip['커피류'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.11668,0.49497,1.223401
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.11668,0.288394,1.223401
2,(슈퍼클린_클래식),(텐저린카푸치노_쥬시),0.022213,0.205409,1.046487
3,(텐저린카푸치노_쥬시),(슈퍼클린_클래식),0.022213,0.11317,1.046487
8,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018419,0.095063,1.327318
9,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018419,0.257174,1.327318
12,((I)카페라떼_클래식),((I)아메리카노_클래식),0.013913,0.256934,1.326079
13,((I)아메리카노_클래식),((I)카페라떼_클래식),0.013913,0.071807,1.326079
16,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011462,0.105994,1.481576
17,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011462,0.160221,1.481576


커피류+디저트

In [17]:
association_df[association_df['cat'].isin(select_zip['커피류+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.021897,0.113015,1.028519
5,(브라우니),((I)아메리카노_클래식),0.021897,0.199281,1.028519
14,(브라우니),((H)아메리카노_클래식),0.012569,0.114388,1.152082
15,((H)아메리카노_클래식),(브라우니),0.012569,0.126592,1.152082
20,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.010751,0.483986,1.196252
21,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(아이스유자아메리카노_쥬시),0.010751,0.251386,1.066411
22,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",0.010751,0.045607,1.066411
23,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.010751,0.026573,1.196252
26,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.010119,0.475836,1.17611
27,"(브라우니, 아이스텐저린라떼_쥬시)",(아이스유자아메리카노_쥬시),0.010119,0.260692,1.105889


시그니처

In [18]:
association_df[association_df['cat'].isin(select_zip['시그니처'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.11668,0.49497,1.223401
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.11668,0.288394,1.223401


시그니처+디저트

In [19]:
association_df[association_df['cat'].isin(select_zip['시그니처+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
20,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.010751,0.483986,1.196252
21,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(아이스유자아메리카노_쥬시),0.010751,0.251386,1.066411
22,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",0.010751,0.045607,1.066411
23,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.010751,0.026573,1.196252
26,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.010119,0.475836,1.17611
27,"(브라우니, 아이스텐저린라떼_쥬시)",(아이스유자아메리카노_쥬시),0.010119,0.260692,1.105889
28,(아이스유자아메리카노_쥬시),"(브라우니, 아이스텐저린라떼_쥬시)",0.010119,0.042924,1.105889
29,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 브라우니)",0.010119,0.02501,1.17611
30,(잠봉뵈르),(텐저린카푸치노_쥬시),0.009328,0.216912,1.105088
31,(텐저린카푸치노_쥬시),(잠봉뵈르),0.009328,0.047523,1.105088


에스프레소

In [20]:
association_df[association_df['cat'].isin(select_zip['에스프레소'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
16,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011462,0.105994,1.481576
17,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011462,0.160221,1.481576
18,(카페_루이지),(슈퍼클린_클래식),0.010909,0.231933,2.1447
19,(슈퍼클린_클래식),(카페_루이지),0.010909,0.100877,2.1447
72,(카페_루이지),(슈퍼클린_쥬시),0.005217,0.110924,1.55049
73,(슈퍼클린_쥬시),(카페_루이지),0.005217,0.072928,1.55049


에스프레소+디저트

In [21]:
association_df[association_df['cat'].isin(select_zip['에스프레소+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift


베이직

In [22]:
association_df[association_df['cat'].isin(select_zip['베이직'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
8,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018419,0.095063,1.327318
9,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018419,0.257174,1.327318
12,((I)카페라떼_클래식),((I)아메리카노_클래식),0.013913,0.256934,1.326079
13,((I)아메리카노_클래식),((I)카페라떼_클래식),0.013913,0.071807,1.326079
34,((H)카페라떼_클래식),((H)아메리카노_클래식),0.008696,0.198556,1.999787
35,((H)아메리카노_클래식),((H)카페라떼_클래식),0.008696,0.08758,1.999787
36,((I)아메리카노_클래식),((I)플랫화이트_클래식),0.008617,0.044472,1.086035
37,((I)플랫화이트_클래식),((I)아메리카노_클래식),0.008617,0.210425,1.086035
48,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.006482,0.207071,2.085545
49,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.006482,0.065287,2.085545


베이직+디저트

In [23]:
association_df[association_df['cat'].isin(select_zip['베이직+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.021897,0.113015,1.028519
5,(브라우니),((I)아메리카노_클래식),0.021897,0.199281,1.028519
14,(브라우니),((H)아메리카노_클래식),0.012569,0.114388,1.152082
15,((H)아메리카노_클래식),(브라우니),0.012569,0.126592,1.152082
58,((H)카페라떼_클래식),(바스크_치즈케이크),0.00585,0.133574,1.134796
59,(바스크_치즈케이크),((H)카페라떼_클래식),0.00585,0.049698,1.134796
64,((H)카페라떼_클래식),(브라우니),0.005455,0.124549,1.133483
65,(브라우니),((H)카페라떼_클래식),0.005455,0.04964,1.133483
74,(잠봉뵈르),((H)아메리카노_클래식),0.005217,0.121324,1.221929
75,((H)아메리카노_클래식),(잠봉뵈르),0.005217,0.052548,1.221929


비버리지

In [24]:
association_df[association_df['cat'].isin(select_zip['비버리지'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift


비버리지+디저트

In [25]:
association_df[association_df['cat'].isin(select_zip['비버리지+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
50,(브라우니),((I)얼그레이밀크티),0.006324,0.057554,1.328572
51,((I)얼그레이밀크티),(브라우니),0.006324,0.145985,1.328572
60,((I)얼그레이밀크티),(바스크_치즈케이크),0.005613,0.129562,1.100712
61,(바스크_치즈케이크),((I)얼그레이밀크티),0.005613,0.047683,1.100712


### 2. 휘낭시에 쌍 제거 

In [26]:
records = offline_df2[offline_df2['상품재분류'] != '휘낭시에'][["결제일시", "상품명", "카테고리"]]
records = records.dropna()
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : re.sub(r"\(H\)|\(I\)", "", x ))
records.loc[records["카테고리"] == "핸드드립", "상품명"] = records.loc[records["카테고리"] == "핸드드립", "상품명"].apply(lambda x : x.split("_")[0])
records.loc[records["카테고리"] == "핸드드립", "상품명"].value_counts()


records = records.groupby("결제일시").agg({"상품명" : lambda x : list(x)}).reset_index(drop = True)


records = records[records["상품명"].apply(lambda x : True if len(x) > 1 else False)]

In [27]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품명"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)

In [28]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)

Processing 2352 combinations | Sampling itemset size 2Processing 1623 combinations | Sampling itemset size 3Processing 24 combinations | Sampling itemset size 4


In [29]:
from mlxtend.frequent_patterns import association_rules
association_df = association_rules(itemset, metric="lift", min_threshold= 1)
association_df.sort_values(by = "lift", ascending = False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
17,(카페_루이지),(슈퍼클린_클래식),0.048068,0.109614,0.011205,0.233108,2.126637,0.005936,1.161033
16,(슈퍼클린_클래식),(카페_루이지),0.109614,0.048068,0.011205,0.102222,2.126637,0.005936,1.060321
42,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.031910,0.101494,0.006658,0.208651,2.055801,0.003419,1.135411
43,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.101494,0.031910,0.006658,0.065600,2.055801,0.003419,1.036056
32,((H)카페라떼_클래식),((H)아메리카노_클래식),0.044414,0.101494,0.008931,0.201097,1.981367,0.004424,1.124675
...,...,...,...,...,...,...,...,...,...
2,(슈퍼클린_클래식),(텐저린카푸치노_쥬시),0.109614,0.199009,0.022816,0.208148,1.045921,0.001002,1.011541
19,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(아이스유자아메리카노_쥬시),0.043927,0.240663,0.011043,0.251386,1.044559,0.000471,1.014325
20,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",0.240663,0.043927,0.011043,0.045884,1.044559,0.000471,1.002051
5,(브라우니),((I)아메리카노_클래식),0.111481,0.197386,0.022491,0.201748,1.022101,0.000486,1.005465


In [30]:
offline_dfset= offline_df2[['상품명', '상품재분류', '카테고리2']].drop_duplicates()

repl_dict = {}

for x, y in offline_dfset[['상품명', '카테고리2']].values:
    repl_dict[x] = y
    
res = []
for x in association_df['antecedents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
    res.append('_'.join(tmp))
len(res)
association_df['antecedents_cat'] = res

res = []
for x in association_df['consequents'] :
    tmp = []
    for t in x :
        tmp.append(repl_dict[t])
        
    res.append('_'.join(tmp))
association_df['consequents_cat'] = res

association_df['cat'] = association_df['antecedents_cat']+'_'+association_df['consequents_cat']

In [31]:
## 전체

select_zip = {'커피류' : ['베이직_베이직', '에스프레소_에스프레소', '에스프레소_시그니처', '시그니처_에스프레소', '시그니처_시그니처'],
 '커피류+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', '베이직_디저트', '디저트_베이직',
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '시그니처' : ['시그니처_시그니처'],
 '시그니처+디저트' : ['시그니처_디저트_시그니처', '시그니처_시그니처_디저트', '시그니처_디저트_디저트', 
             '디저트_시그니처_디저트', '디저트_시그니처', '시그니처_디저트', '디저트_시그니처_시그니처'],
 '에스프레소' : ['에스프레소_에스프레소'],
 '에스프레소+디저트' : [],
 '베이직' : ['베이직_베이직'],
 '베이직+디저트' : ['베이직_디저트', '디저트_베이직'],
 '비버리지' : [],
 '비버리지+디저트' : ['비버리지_디저트', '디저트_비버리지']}

## 커피류 
association_df[association_df['cat'].isin(select_zip['커피류'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.119844,0.497976,1.21495
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.119844,0.292393,1.21495
2,(슈퍼클린_클래식),(텐저린카푸치노_쥬시),0.022816,0.208148,1.045921
3,(텐저린카푸치노_쥬시),(슈퍼클린_클래식),0.022816,0.114647,1.045921
8,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018918,0.095845,1.320393
9,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018918,0.260626,1.320393
10,((I)카페라떼_클래식),((I)아메리카노_클래식),0.01429,0.259587,1.315127
11,((I)아메리카노_클래식),((I)카페라떼_클래식),0.01429,0.072398,1.315127
14,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011773,0.107407,1.473084
15,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011773,0.16147,1.473084


In [32]:

## 커피류+디저트
association_df[association_df['cat'].isin(select_zip['커피류+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.022491,0.113945,1.022101
5,(브라우니),((I)아메리카노_클래식),0.022491,0.201748,1.022101
12,(브라우니),((H)아메리카노_클래식),0.01291,0.115805,1.141002
13,((H)아메리카노_클래식),(브라우니),0.01291,0.1272,1.141002
18,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.011043,0.483986,1.180818
19,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(아이스유자아메리카노_쥬시),0.011043,0.251386,1.044559
20,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",0.011043,0.045884,1.044559
21,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.011043,0.026941,1.180818
24,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.010393,0.475836,1.160935
25,"(브라우니, 아이스텐저린라떼_쥬시)",(아이스유자아메리카노_쥬시),0.010393,0.260692,1.083228


In [33]:

## 시그니처
association_df[association_df['cat'].isin(select_zip['시그니처'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(아이스유자아메리카노_쥬시),(아이스텐저린라떼_쥬시),0.119844,0.497976,1.21495
1,(아이스텐저린라떼_쥬시),(아이스유자아메리카노_쥬시),0.119844,0.292393,1.21495


In [34]:

## 시그니처+디저트
association_df[association_df['cat'].isin(select_zip['시그니처+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
18,"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",(아이스텐저린라떼_쥬시),0.011043,0.483986,1.180818
19,"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",(아이스유자아메리카노_쥬시),0.011043,0.251386,1.044559
20,(아이스유자아메리카노_쥬시),"(아이스텐저린라떼_쥬시, 바스크_치즈케이크)",0.011043,0.045884,1.044559
21,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 바스크_치즈케이크)",0.011043,0.026941,1.180818
24,"(아이스유자아메리카노_쥬시, 브라우니)",(아이스텐저린라떼_쥬시),0.010393,0.475836,1.160935
25,"(브라우니, 아이스텐저린라떼_쥬시)",(아이스유자아메리카노_쥬시),0.010393,0.260692,1.083228
26,(아이스유자아메리카노_쥬시),"(브라우니, 아이스텐저린라떼_쥬시)",0.010393,0.043185,1.083228
27,(아이스텐저린라떼_쥬시),"(아이스유자아메리카노_쥬시, 브라우니)",0.010393,0.025357,1.160935
28,(잠봉뵈르),(텐저린카푸치노_쥬시),0.009581,0.217712,1.093979
29,(텐저린카푸치노_쥬시),(잠봉뵈르),0.009581,0.048144,1.093979


In [35]:

## 에스프레소
association_df[association_df['cat'].isin(select_zip['에스프레소'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
14,(슈퍼클린_클래식),(슈퍼클린_쥬시),0.011773,0.107407,1.473084
15,(슈퍼클린_쥬시),(슈퍼클린_클래식),0.011773,0.16147,1.473084
16,(슈퍼클린_클래식),(카페_루이지),0.011205,0.102222,2.126637
17,(카페_루이지),(슈퍼클린_클래식),0.011205,0.233108,2.126637
58,(카페_루이지),(슈퍼클린_쥬시),0.005359,0.111486,1.529028
59,(슈퍼클린_쥬시),(카페_루이지),0.005359,0.073497,1.529028


In [36]:

## 에스프레소+디저트
association_df[association_df['cat'].isin(select_zip['에스프레소+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift


In [37]:

## 베이직
association_df[association_df['cat'].isin(select_zip['베이직'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
8,((I)아메리카노_클래식),((I)아메리카노_쥬시),0.018918,0.095845,1.320393
9,((I)아메리카노_쥬시),((I)아메리카노_클래식),0.018918,0.260626,1.320393
10,((I)카페라떼_클래식),((I)아메리카노_클래식),0.01429,0.259587,1.315127
11,((I)아메리카노_클래식),((I)카페라떼_클래식),0.01429,0.072398,1.315127
32,((H)카페라떼_클래식),((H)아메리카노_클래식),0.008931,0.201097,1.981367
33,((H)아메리카노_클래식),((H)카페라떼_클래식),0.008931,0.088,1.981367
34,((I)아메리카노_클래식),((I)플랫화이트_클래식),0.00885,0.044838,1.07227
35,((I)플랫화이트_클래식),((I)아메리카노_클래식),0.00885,0.21165,1.07227
42,((H)아메리카노_쥬시),((H)아메리카노_클래식),0.006658,0.208651,2.055801
43,((H)아메리카노_클래식),((H)아메리카노_쥬시),0.006658,0.0656,2.055801


In [38]:

## 베이직+디저트
association_df[association_df['cat'].isin(select_zip['베이직+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
4,((I)아메리카노_클래식),(브라우니),0.022491,0.113945,1.022101
5,(브라우니),((I)아메리카노_클래식),0.022491,0.201748,1.022101
12,(브라우니),((H)아메리카노_클래식),0.01291,0.115805,1.141002
13,((H)아메리카노_클래식),(브라우니),0.01291,0.1272,1.141002
50,((H)카페라떼_클래식),(바스크_치즈케이크),0.006008,0.135283,1.128064
51,(바스크_치즈케이크),((H)카페라떼_클래식),0.006008,0.050102,1.128064
56,((H)카페라떼_클래식),(브라우니),0.005602,0.126143,1.131517
57,(브라우니),((H)카페라떼_클래식),0.005602,0.050255,1.131517
60,(잠봉뵈르),((H)아메리카노_클래식),0.005359,0.121771,1.199787
61,((H)아메리카노_클래식),(잠봉뵈르),0.005359,0.0528,1.199787


In [39]:

## 비버리지
association_df[association_df['cat'].isin(select_zip['비버리지'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift


In [40]:

## 비버리지+디저트
association_df[association_df['cat'].isin(select_zip['비버리지+디저트'])][['antecedents', 'consequents', 
                                                               'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
46,(브라우니),((I)얼그레이밀크티),0.006496,0.058267,1.311903
47,((I)얼그레이밀크티),(브라우니),0.006496,0.146252,1.311903
52,((I)얼그레이밀크티),(바스크_치즈케이크),0.005765,0.129799,1.082331
53,(바스크_치즈케이크),((I)얼그레이밀크티),0.005765,0.04807,1.082331
