In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

# 필요한 데이터를 불러오기
df = pd.read_csv('data/retail_data.csv')

# 결제 건별로 로우를 재정비
basket_df = df.groupby('OrderID')['ProdName'].apply(list).reset_index()

# TransactionEncoder를 활용해 데이터를 One-Hot Encoding
te = TransactionEncoder()
te_result = te.fit_transform(basket_df['ProdName'])

# 위의 결과물을 DataFrame에 담아 te_df라는 이름으로 저장
te_df = pd.DataFrame(te_result, columns=te.columns_) 
te_df.head()


Unnamed: 0,4 PURPLE FLOCK DINNER CANDLES,SET 2 TEA TOWELS I LOVE LONDON,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 DAISY PEGS IN WOOD BOX,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,...,"WRAP, BILLBOARD FONTS DESIGN",YELLOW BREAKFAST CUP AND SAUCER,YELLOW COAT RACK PARIS FASHION,YELLOW GIANT GARDEN THERMOMETER,YELLOW SHARK HELICOPTER,YOU'RE CONFUSING ME METAL SIGN,YULETIDE IMAGES GIFT WRAP SET,ZINC FINISH 15CM PLANTER POTS,ZINC METAL HEART DECORATION,ZINC WILLIE WINKIE CANDLE STICK
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [3]:
from mlxtend.frequent_patterns import fpgrowth

# mlxtend 라이브러리의 fpgrowth()로 FP-Growth 알고리즘 구현
# 최소 지지도를 0.06으로 설정
frequent_itemsets = fpgrowth(te_df, min_support=0.06, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.137037,(WHITE HANGING HEART T-LIGHT HOLDER)
1,0.103704,(RED WOOLLY HOTTIE WHITE HEART.)
2,0.096296,(SET 7 BABUSHKA NESTING BOXES)
3,0.085185,(KNITTED UNION FLAG HOT WATER BOTTLE)
4,0.066667,(CREAM CUPID HEARTS COAT HANGER)
...,...,...
56,0.062963,"(GLASS STAR FROSTED T-LIGHT HOLDER, RED WOOLLY..."
57,0.062963,"(WHITE HANGING HEART T-LIGHT HOLDER, KNITTED U..."
58,0.062963,"(HAND WARMER RED POLKA DOT, HAND WARMER UNION ..."
59,0.062963,"(WHITE HANGING HEART T-LIGHT HOLDER, WOOD 2 DR..."


In [4]:
from mlxtend.frequent_patterns import association_rules

# 연관규칙을 추출하고 주요 지표를 바탕으로 규칙을 평가하는 과정은 Apriori 알고리즘을 적용할 때와 동일
# mlxtend 라이브러리의 association_rules() 함수 활용. 신뢰도가 0.8 이상인 규칙으로 조건을 걸음
association_rules(frequent_itemsets, metric='confidence', min_threshold=0.8)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,"(RED WOOLLY HOTTIE WHITE HEART., WHITE HANGING...",(SET 7 BABUSHKA NESTING BOXES),0.070370,0.096296,0.062963,0.894737,9.291498,1.0,0.056187,8.585185,0.959925,0.607143,0.883520,0.774291
1,"(RED WOOLLY HOTTIE WHITE HEART., SET 7 BABUSHK...",(WHITE HANGING HEART T-LIGHT HOLDER),0.062963,0.137037,0.062963,1.000000,7.297297,1.0,0.054335,inf,0.920949,0.459459,1.000000,0.729730
2,"(WHITE HANGING HEART T-LIGHT HOLDER, SET 7 BAB...",(RED WOOLLY HOTTIE WHITE HEART.),0.062963,0.103704,0.062963,1.000000,9.642857,1.0,0.056433,inf,0.956522,0.607143,1.000000,0.803571
3,(KNITTED UNION FLAG HOT WATER BOTTLE),(RED WOOLLY HOTTIE WHITE HEART.),0.085185,0.103704,0.074074,0.869565,8.385093,1.0,0.065240,6.871605,0.962753,0.645161,0.854474,0.791925
4,(KNITTED UNION FLAG HOT WATER BOTTLE),(WHITE HANGING HEART T-LIGHT HOLDER),0.085185,0.137037,0.070370,0.826087,6.028202,1.0,0.058697,4.962037,0.911784,0.463415,0.798470,0.669800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,(WHITE METAL LANTERN),"(GLASS STAR FROSTED T-LIGHT HOLDER, RED WOOLLY...",0.066667,0.062963,0.062963,0.944444,15.000000,1.0,0.058765,16.866667,1.000000,0.944444,0.940711,0.972222
155,(GLASS STAR FROSTED T-LIGHT HOLDER),"(RED WOOLLY HOTTIE WHITE HEART., WHITE METAL L...",0.066667,0.062963,0.062963,0.944444,15.000000,1.0,0.058765,16.866667,1.000000,0.944444,0.940711,0.972222
156,(HAND WARMER RED POLKA DOT),(HAND WARMER UNION JACK),0.066667,0.118519,0.062963,0.944444,7.968750,1.0,0.055062,15.866667,0.936975,0.515152,0.936975,0.737847
157,(WOOD 2 DRAWER CABINET WHITE FINISH),(WHITE HANGING HEART T-LIGHT HOLDER),0.066667,0.137037,0.062963,0.944444,6.891892,1.0,0.053827,15.533333,0.915966,0.447368,0.935622,0.701952
