In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
from os.path import join
import matplotlib.pyplot as plt
import datetime as dt
import re


## Local Load

In [2]:
path = join(os.getcwd(), "data")
offline_raw = pd.read_excel(join(path, "9. offline_total.xlsx"))

## Colab Load

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

# # 데이터 불러오기
# offline_df = pd.read_excel('/content/drive/MyDrive/9. offline_total.xlsx')

# plt.rcParams['font.family'] = 'AppleGothic' # 폰트 변경
# plt.rcParams['axes.unicode_minus'] = False # 축 값 마이너스 깨짐 해결

In [113]:
def convert_to_nan(data):
    
    df = data.copy()
    
    columns = df.columns.to_list()
    for col in columns:
        df[col] = df[col].apply(lambda x: np.nan if x == "-" else x)
    
    return df


def drop_columns(data):
    
    df = data.copy()
    
    rm_columns = ["온라인 스토어", "사용 포인트", "적립 포인트", "사용 선불권", "배달팁(매출 포함x)", "결제메모", "주문 채널"]
    df = df.drop(rm_columns, axis = 1)
    
    return df


def date_conversion(data):
    
    df = data.copy()
    
    df["결제일시"] = df.loc[:, "결제일"] + " " + df.loc[:, "결제시간"]
    df["결제일시"] = pd.to_datetime(df["결제일시"])
    df["year"] = df["결제일시"].apply(lambda x: x.year)
    df["month"] = df["결제일시"].apply(lambda x: x.month)
    df["day"] = df["결제일시"].apply(lambda x: x.day)
    df["hour"] = df["결제일시"].apply(lambda x: x.hour)
    df["day_name"] = df["결제일시"].apply(lambda x: x.day_name())
    df["year_month"] = pd.to_datetime(df["결제일"]).dt.strftime("%Y-%m")
    
    return df


def add_weekend(data):
    
    df = data.copy()
    
    # 평일, 주말 구분
    # 0 = 평일, 1 = 주말
    df["is_weekend"] = df["day_name"].apply(lambda x: 1 if (x == "Sunday") | (x == "Saturday") else 0)
    
    return df


def add_season(data):
    
    df = data.copy()
    
    # 계절 추가하기
    # 봄(3~5월) = 1, 여름(6~8월) = 2, 가을(9~11월0) = 3, 겨울(12~2월) = 4
    seasons = [1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1]
    season_dict = dict(zip(range(1,13), seasons))
    df["season"] = df["month"].map(season_dict)

    return df


def add_holiday(data):
    
    df = data.copy()
    
    conditionlist = [
        (df['결제일'] == '2022-03-01') | (df['결제일'] == '2022-05-05') | (df['결제일'] == '2022-05-08') |
        (df['결제일'] == '2022-06-06') | (df['결제일'] == '2022-08-15') | (df['결제일'] == '2022-09-09') |
        (df['결제일'] == '2022-09-10') | (df['결제일'] == '2022-09-11') | (df['결제일'] == '2022-10-09') |
        (df['결제일'] == '2022-10-03') | (df['결제일'] == '2022-12-25') | 
        (df['결제일'] == '2023-01-01') | (df['결제일'] == '2023-01-21') | (df['결제일'] == '2023-01-22') |
        (df['결제일'] == '2023-01-23') | (df['결제일'] == '2023-03-01') | (df['결제일'] == '2023-05-05') |
        (df['결제일'] == '2023-05-26') | (df['결제일'] == '2023-06-06')]

    choicelist = [1]
    df['is_holiday'] = np.select(conditionlist, choicelist, default= 0)
    df["weekend_n_holiday"] = df["is_weekend"] + df["is_holiday"]
    
    return df

def drop_row(data):
    
    df = data.copy()
    df["상품명"] = df["상품명"].apply(lambda x: re.sub(r"\s", "", x))
    
    drop_lst = ['야외',
                '포장',
                '무료시음권', 
                '캐리어',
                '종이백',
                '포크',
                '⚪️',
                '⚪', # 위 emoji 와 별개
                '일회용컵',
               ]

    custom_lst = ['덜달게',
                  '1샷추가', 
                  '오틀리', 
                  '연하게', 
                  '시럽', 
                  '얼음적게',
                  '오트사이드', 
                  '물적게', 
                  '바닐라시럽', 
                  '2샷추가',
                  '얼음X',
                  '샷추가',
                 ]

    idx = df.loc[df["상품명"].str.contains("|".join(drop_lst)),"상품명"].index
    df = df.drop(idx, axis = 0).reset_index(drop = True)
    
    idx = df.loc[df["상품명"].str.contains("|".join(custom_lst)),"상품명"].index
    df.loc[idx, "카테고리"] = "커스텀"
    
    return df


In [35]:
def preprocess_productname(data):
    tmp = data.copy()
    tmp["카테고리"] = tmp["카테고리"].apply(lambda x: re.sub(r"\s", "", x))
    pattern = r'\s*_\s*'
    
    
    # 카테고리 - basic_ice
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"플랫_", "플랫화이트_", x))
    tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].apply(lambda x : re.sub(r"템플", "I", x))

    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"][tmp.loc[tmp["카테고리"] == "Basic_ice", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
        
    # 카테고리 - basic
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "Basic", "상품명"] = tmp.loc[tmp["카테고리"] == "Basic", "상품명"].apply(lambda x : re.sub(r"\s|\(H\)", "", x))
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    idx = tmp.loc[tmp["카테고리"] == "Basic", "상품명"][tmp.loc[tmp["카테고리"] == "Basic", "상품명"].str.contains("아메리카노|카페라떼|플랫화이트|카푸치노|바닐라라떼")].index
    tmp.loc[idx, "상품명"] = tmp.loc[idx, "상품명"].apply(lambda x: "(H)"+x)
    
    
    # 카테고리 - 시그니처
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"아이스텐라", "아이스텐저린라떼", x))
    tmp.loc[tmp["카테고리"] == "시그니처", "상품명"] = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].apply(lambda x : re.sub(r"유자아메리카노|아이스유자아메리카노", "아이스유자아메리카노", x))
    
    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "시그니처", "상품명"][tmp.loc[tmp["카테고리"] == "시그니처", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
        
    # 카테고리 - beverage
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x : re.sub(r"\s", "", x))
    
    rename_dict = {"차가운어린이우유": "(I)어린이우유",
                   "따뜻한어린이우유": "(H)어린이우유",
                   
                   "얼그레이밀크티": "(H)얼그레이밀크티",
                   
                   "제주유기농귤피주스" : "(I)제주유기농귤피주스",
                   "문경선암리사과주스" : "(I)문경선암리사과주스",
                   "제주유기농감귤주스" : "(I)제주유기농감귤주스",
                   "어린이감귤주스" : "(I)어린이감귤주스",
                   
                   "시나몬플럼" : "(H)시나몬플럼",
                   "트로피칼루이보스" : "(H)트로피칼루이보스",
                   "카모마일" : "(H)카모마일"
                  }
    
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"] = tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].apply(lambda x: rename_dict[x] if x in rename_dict.keys() else x)
    tmp.loc[tmp["카테고리"] == "비버리지", "상품명"].value_counts()
    
    
    # 카테고리 - 디저트
    tmp.loc[tmp["카테고리"] == "디저트", "상품명"] = tmp.loc[tmp["카테고리"] == "디저트", "상품명"].apply(lambda x : re.sub("\s", r'_', x))
    
    
    # 카테고리 - 블랜딩원두
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"] = tmp.loc[tmp["카테고리"] == "블렌딩원두", "상품명"].apply(lambda x : re.sub("\s", r'_', x))
    
    
    # 카테고리 = 세트
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub("\s", '', x))
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x : re.sub("Set.", "", x))
    
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "세트", "상품명"][tmp.loc[tmp["카테고리"] == "세트", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"] = tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"].apply(lambda x : "(H)" + x)
    tmp.loc[tmp["카테고리"] == "세트", "상품명"] = tmp.loc[tmp["카테고리"] == "세트", "상품명"].apply(lambda x: "Set_" + x)
    
    
    # 카테고리 - 드립백/캡슐
    tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"] = tmp.loc[tmp["카테고리"] == "드립백/캡슐", "상품명"].apply(lambda x : re.sub(r"\s", r'_', x))
    
    
    # 카테고리 - 에스프레소
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub(pattern, r'_', x))
    tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"] = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].apply(lambda x : re.sub("\s", r'_', x))
    
    beans_lst = ["클래식_", "쥬시_", "싱글_", "디카프_", "스페셜_", "샘플_"]
    for bean in beans_lst:
        idx = tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"][tmp.loc[tmp["카테고리"] == "에스프레소", "상품명"].str.contains(bean)].index
        tmp.loc[idx,"상품명"] = tmp.loc[idx,"상품명"].apply(lambda x: x[len(bean):] + "_" + bean[:-1])
        
    return tmp

In [66]:
pd.options.display.max_columns = None

offline_raw = pd.read_excel(join(path, "9. offline_total.xlsx"))
offline_df = convert_to_nan(offline_raw)

offline_df = drop_columns(offline_df)
offline_df = date_conversion(offline_df)
offline_df = add_weekend(offline_df)
offline_df = add_season(offline_df)
offline_df = add_holiday(offline_df)
offline_df = preprocess_productname(offline_df)
# offline_df = drop_row(offline_df)

offline_df

  tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"] = tmp.loc[(tmp["카테고리"] == "세트") & (~tmp["상품명"].str.contains("(I)")), "상품명"].apply(lambda x : "(H)" + x)


Unnamed: 0,결제일,결제시간,결제내역,합계,상품별 할인,결제 할인,카드 결제,현금 결제,간편 결제,기타 결제,환불,환불 일시,카테고리,상품명,옵션,수량,상품별 단가,상품별 합계,결제일시,year,month,day,hour,day_name,year_month,is_weekend,season,is_holiday,weekend_n_holiday
0,2022-02-07,19:28:53,샘플 캐모마일,,,,,,,,4500.0,2022-02-07 19:29:37,에스프레소,캐모마일_샘플,,1,4500,,2022-02-07 19:28:53,2022,2,7,19,Monday,2022-02,0,1,0,0
1,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(I)콜롬비아로꼬소르베,,1,10500,10500.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0
2,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(H)니카라과COE#1,,1,12000,12000.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0
3,2022-02-10,10:03:28,(H) 니카라과 COE#1 외 2건,32500.0,,,32500.0,,,,,,핸드드립,(H)과테레드파카마라,,1,10000,10000.0,2022-02-10 10:03:28,2022,2,10,10,Thursday,2022-02,0,1,0,0
4,2022-02-10,10:13:57,아이스 텐저린 라떼 외 2건,20000.0,,,20000.0,,,,,,시그니처,아이스텐저린라떼,,1,7000,7000.0,2022-02-10 10:13:57,2022,2,10,10,Thursday,2022-02,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145005,2023-05-31,17:21:24,드립백 쥬시 외 2건,36100.0,,,36100.0,,,,,,드립백/캡슐,드립백_클래식,,1,18000,18000.0,2023-05-31 17:21:24,2023,5,31,17,Wednesday,2023-05,0,2,0,0
145006,2023-05-31,17:21:24,드립백 쥬시 외 2건,36100.0,,,36100.0,,,,,,포장,종이백,,1,100,100.0,2023-05-31 17:21:24,2023,5,31,17,Wednesday,2023-05,0,2,0,0
145007,2023-05-31,17:22:16,(KCW) 기념 뱃지,6000.0,,,6000.0,,,,,,MD,(KCW)기념뱃지,,1,6000,6000.0,2023-05-31 17:22:16,2023,5,31,17,Wednesday,2023-05,0,2,0,0
145008,2023-05-31,17:24:37,(I) 오미자 에이드 외 1건,14000.0,,,14000.0,,,,,,비버리지,(I)오미자에이드,,1,7000,7000.0,2023-05-31 17:24:37,2023,5,31,17,Wednesday,2023-05,0,2,0,0


In [229]:
menu_lst = ['시그니처', 
            'Basic_ice', 
            '디저트', 
            '에스프레소', 
            'Basic', 
            # '비버리지', 
            # '핸드드립',
            # '드립백/캡슐',
            # '싱글원두',
            # '블렌딩원두',
            # '커스텀', 
            "세트",
           ]
menu_df = offline_df.loc[offline_df["카테고리"].str.contains("|".join(menu_lst)),:].reset_index(drop = True)
menu_df

Unnamed: 0,결제일,결제시간,결제내역,합계,상품별 할인,결제 할인,카드 결제,현금 결제,간편 결제,기타 결제,환불,환불 일시,카테고리,상품명,옵션,수량,상품별 단가,상품별 합계,결제일시,year,month,day,hour,day_name,year_month,is_weekend,season,is_holiday,weekend_n_holiday
0,2022-02-07,19:28:53,샘플 캐모마일,,,,,,,,4500.0,2022-02-07 19:29:37,에스프레소,캐모마일_샘플,,1,4500,,2022-02-07 19:28:53,2022,2,7,19,Monday,2022-02,0,1,0,0
1,2022-02-10,10:13:57,아이스 텐저린 라떼 외 2건,20000.0,,,20000.0,,,,,,시그니처,아이스텐저린라떼,,1,7000,7000.0,2022-02-10 10:13:57,2022,2,10,10,Thursday,2022-02,0,1,0,0
2,2022-02-10,10:13:57,아이스 텐저린 라떼 외 2건,20000.0,,,20000.0,,,,,,에스프레소,슈퍼클린_클래식,,1,6500,6500.0,2022-02-10 10:13:57,2022,2,10,10,Thursday,2022-02,0,1,0,0
3,2022-02-10,10:13:57,아이스 텐저린 라떼 외 2건,20000.0,,,20000.0,,,,,,에스프레소,슈퍼클린_쥬시,,1,6500,6500.0,2022-02-10 10:13:57,2022,2,10,10,Thursday,2022-02,0,1,0,0
4,2022-02-10,10:15:59,아이스 텐저린 라떼,7000.0,,,7000.0,,,,,,시그니처,아이스텐저린라떼,,1,7000,7000.0,2022-02-10 10:15:59,2022,2,10,10,Thursday,2022-02,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101940,2023-05-31,16:53:23,싱글_(I) 아메리카노 외 1건,13000.0,,,13000.0,,,,,,Basic_ice,(I)아메리카노_싱글,,1,7000,7000.0,2023-05-31 16:53:23,2023,5,31,16,Wednesday,2023-05,0,2,0,0
101941,2023-05-31,16:53:23,싱글_(I) 아메리카노 외 1건,13000.0,,,13000.0,,,,,,Basic,(H)아메리카노_클래식,,1,6000,6000.0,2023-05-31 16:53:23,2023,5,31,16,Wednesday,2023-05,0,2,0,0
101942,2023-05-31,17:04:46,쥬시_아이스 텐라 외 1건,14000.0,,,14000.0,,,,,,시그니처,아이스텐저린라떼_쥬시,,1,7000,7000.0,2023-05-31 17:04:46,2023,5,31,17,Wednesday,2023-05,0,2,0,0
101943,2023-05-31,17:04:46,쥬시_아이스 텐라 외 1건,14000.0,,,14000.0,,,,,,시그니처,아이스유자아메리카노_쥬시,,1,7000,7000.0,2023-05-31 17:04:46,2023,5,31,17,Wednesday,2023-05,0,2,0,0


In [318]:
# records = menu_df.groupby("결제일시")["상품명"].value_counts().to_frame().unstack("상품명").reset_index(drop = True)
# # records.columns = records.columns.droplevel()
# # records.droplevel("상품명", axis = 1)

# records = records.droplevel(None, axis = 1)
# records

In [182]:
from matplotlib.colors import LinearSegmentedColormap
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules


In [311]:
records = menu_df[["결제일시", "상품명"]]
records = records.groupby("결제일시").agg({"상품명" : lambda x : list(x)}).reset_index(drop = True)
records["상품명"]

0                                        [캐모마일_샘플]
1                    [아이스텐저린라떼, 슈퍼클린_클래식, 슈퍼클린_쥬시]
2                                       [아이스텐저린라떼]
3        [(H)플랫화이트_클래식, (H)카페라떼_클래식, (H)아메리카노_클래식]
4                                       [아이스텐저린라떼]
                           ...                    
54632                               [아이스텐저린라떼_디카프]
54633                     [(H)플랫화이트_디카프, 에스프레소_쥬시]
54634                  [(I)아메리카노_싱글, (H)아메리카노_클래식]
54635                 [아이스텐저린라떼_쥬시, 아이스유자아메리카노_쥬시]
54636                                  [카페_루이지_쥬시]
Name: 상품명, Length: 54637, dtype: object

In [312]:
te = TransactionEncoder()
te_ary = te.fit_transform(records["상품명"])
te_df = pd.DataFrame(te_ary, columns= te.columns_)
te_df

Unnamed: 0,(H)바닐라라떼_디카프,(H)바닐라라떼_싱글,(H)바닐라라떼_쥬시,(H)바닐라라떼_클래식,(H)아메리카노_디카프,(H)아메리카노_싱글,(H)아메리카노_쥬시,(H)아메리카노_클래식,(H)카페라떼_디카프,(H)카페라떼_싱글,(H)카페라떼_쥬시,(H)카페라떼_클래식,(H)카푸치노_디카프,(H)카푸치노_싱글,(H)카푸치노_쥬시,(H)카푸치노_클래식,(H)플랫화이트_디카프,(H)플랫화이트_싱글,(H)플랫화이트_쥬시,(H)플랫화이트_클래식,(I)바닐라라떼_디카프,(I)바닐라라떼_싱글,(I)바닐라라떼_쥬시,(I)바닐라라떼_클래식,(I)아메리카노_디카프,(I)아메리카노_싱글,(I)아메리카노_쥬시,(I)아메리카노_클래식,(I)카페라떼_디카프,(I)카페라떼_싱글,(I)카페라떼_쥬시,(I)카페라떼_클래식,(I)플랫화이트_디카프,(I)플랫화이트_싱글,(I)플랫화이트_쥬시,(I)플랫화이트_클래식,Set_(H)아메리카노_디카프,Set_(H)아메리카노_싱글,Set_(H)아메리카노_쥬시,Set_(H)아메리카노_클래식,Set_(H)카페라떼_디카프,Set_(H)카페라떼_싱글,Set_(H)카페라떼_쥬시,Set_(H)카페라떼_클래식,Set_(I)아메리카노_디카프,Set_(I)아메리카노_싱글,Set_(I)아메리카노_쥬시,Set_(I)아메리카노_클래식,Set_(I)카페라떼_싱글,Set_(I)카페라떼_쥬시,Set_(I)카페라떼_클래식,레몬_휘낭시에,무화과_휘낭시에,미미MIMI,바스크_치즈케이크,브라우니,슈퍼클린_스페셜,슈퍼클린_싱글,슈퍼클린_쥬시,슈퍼클린_클래식,시나몬_휘낭시에,아이스유자아메리카노,아이스유자아메리카노_디카프,아이스유자아메리카노_쥬시,아이스텐저린라떼,아이스텐저린라떼_디카프,아이스텐저린라떼_쥬시,에스프레소_싱글,에스프레소_쥬시,에스프레소_클래식,에스프레소_프레도_쥬시,에스프레소_프레도_클래식,에스프레소_플래터_쥬시,에스프레소_플래터_클래식,잠봉뵈르,카페_루이지,카페_루이지_쥬시,캐모마일_샘플,텐저린카푸치노,텐저린카푸치노_디카프,텐저린카푸치노_쥬시,플레인_휘낭시에,헤이즐넛_휘낭시에
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54632,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
54633,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False
54634,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
54635,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [316]:
itemset = apriori(te_df,
                  min_support=0.005, 
                  max_len=5, 
                  use_colnames=True, 
                  verbose=1,
                 )
itemset['length'] = itemset['itemsets'].map(lambda x: len(x))
itemset = itemset.sort_values(by = 'support',ascending=False)
itemset

Processing 8 combinations | Sampling itemset size 4 32


Unnamed: 0,support,itemsets,length
31,0.293574,(아이스텐저린라떼),1
36,0.167707,(텐저린카푸치노),1
28,0.164980,(아이스유자아메리카노),1
14,0.150008,((I)아메리카노_클래식),1
26,0.087578,(슈퍼클린_클래식),1
...,...,...,...
92,0.005381,"(플레인_휘낭시에, 텐저린카푸치노)",2
76,0.005235,"(슈퍼클린_쥬시, 카페_루이지)",2
94,0.005125,"(아이스텐저린라떼, 브라우니, 아이스유자아메리카노)",3
93,0.005052,"(바스크_치즈케이크, 아이스텐저린라떼, 아이스유자아메리카노)",3


In [317]:
from mlxtend.frequent_patterns import association_rules
association_rules(itemset, metric="confidence", min_threshold=0.1) 

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(아이스텐저린라떼),(아이스유자아메리카노),0.293574,0.16498,0.065468,0.223005,1.351711,0.017035,1.074679,0.368329
1,(아이스유자아메리카노),(아이스텐저린라떼),0.16498,0.293574,0.065468,0.396827,1.351711,0.017035,1.171183,0.311606
2,(아이스텐저린라떼),(텐저린카푸치노),0.293574,0.167707,0.039039,0.13298,0.792931,-0.010195,0.959947,-0.269896
3,(텐저린카푸치노),(아이스텐저린라떼),0.167707,0.293574,0.039039,0.232784,0.792931,-0.010195,0.920766,-0.238828
4,(아이스텐저린라떼),((I)아메리카노_클래식),0.293574,0.150008,0.030968,0.105486,0.703203,-0.01307,0.950228,-0.374008
5,((I)아메리카노_클래식),(아이스텐저린라떼),0.150008,0.293574,0.030968,0.206442,0.703203,-0.01307,0.890201,-0.331797
6,(아이스유자아메리카노),(텐저린카푸치노),0.16498,0.167707,0.023812,0.144331,0.860615,-0.003857,0.972681,-0.16245
7,(텐저린카푸치노),(아이스유자아메리카노),0.167707,0.16498,0.023812,0.141984,0.860615,-0.003857,0.973199,-0.162896
8,(바스크_치즈케이크),(아이스텐저린라떼),0.080129,0.293574,0.021744,0.271357,0.924322,-0.00178,0.969509,-0.081732
9,(브라우니),(아이스텐저린라떼),0.075077,0.293574,0.019803,0.263774,0.898492,-0.002237,0.959523,-0.108851
