In [1]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
ds_raw = [['소주','콜라','맥주'],
         ['소주','콜라','와인'],
         ['소주','주스'],
         ['콜라','맥주'],
         ['소주','콜라','맥주','와인'],
         ['주스']]

ds_raw

[['소주', '콜라', '맥주'],
 ['소주', '콜라', '와인'],
 ['소주', '주스'],
 ['콜라', '맥주'],
 ['소주', '콜라', '맥주', '와인'],
 ['주스']]

In [3]:
enc = TransactionEncoder()
ds_raw_enc = enc.fit_transform(X=ds_raw)
df_asso = pd.DataFrame(ds_raw_enc, columns = enc.columns_)
df_asso.head()

Unnamed: 0,맥주,소주,와인,주스,콜라
0,True,True,False,False,True
1,False,True,True,False,True
2,False,True,False,True,False
3,True,False,False,False,True
4,True,True,True,False,True


In [7]:
min_support = 0.5

df_freq = apriori(df_asso, min_support=0.5, use_colnames=True)
df_freq.round(3)

Unnamed: 0,support,itemsets
0,0.5,(맥주)
1,0.667,(소주)
2,0.667,(콜라)
3,0.5,"(콜라, 맥주)"
4,0.5,"(콜라, 소주)"


In [9]:
df_asso_rule = association_rules(df_freq, metric='confidence', min_threshold=0.5)
df_asso_rule.round(3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(콜라),(맥주),0.667,0.5,0.5,0.75,1.5,0.167,2.0
1,(맥주),(콜라),0.5,0.667,0.5,1.0,1.5,0.167,inf
2,(콜라),(소주),0.667,0.667,0.5,0.75,1.125,0.056,1.333
3,(소주),(콜라),0.667,0.667,0.5,0.75,1.125,0.056,1.333


---

## 실습

In [16]:
df_raw = pd.read_csv('./data/상품구매.csv')
df_raw.head(20)

Unnamed: 0,ID,PRODUCT
0,C-11,BAGUETTE
1,C-11,HERRING
2,C-11,AVOCADO
3,C-11,ARTICHOKE
4,C-11,HEINEKEN
5,C-11,APPLES
6,C-11,CORNED BEEF
7,C-12,HERRING
8,C-12,CORNED BEEF
9,C-12,APPLES


In [12]:
ID = list(set(df_raw['ID']))
ID.sort()
list_association = []
for i in ID:
    tmp_list = list(df_raw[df_raw['ID'] == i]['PRODUCT'])
    tmp_list.sort()
    list_association.append(tmp_list)
for row in list_association:
    print(row)

['APPLES', 'ARTICHOKE', 'AVOCADO', 'BAGUETTE', 'CORNED BEEF', 'HEINEKEN', 'HERRING']
['APPLES', 'CORNED BEEF', 'HEINEKEN', 'HERRING', 'OLIVES', 'SARDINES', 'STEAK']
['APPLES', 'AVOCADO', 'BAGUETTE', 'ICE CREAM', 'PEPPERS', 'SARDINES', 'STEAK']
['APPLES', 'COKE', 'CORNED BEEF', 'HAM', 'HERRING', 'OLIVES', 'TURKEY']
['ARTICHOKE', 'BOURBON', 'COKE', 'HAM', 'ICE CREAM', 'OLIVES', 'TURKEY']
['ARTICHOKE', 'AVOCADO', 'BAGUETTE', 'COKE', 'HEINEKEN', 'HERRING', 'TURKEY']
['APPLES', 'CHICKEN', 'COKE', 'CORNED BEEF', 'HEINEKEN', 'ICE CREAM', 'SARDINES']
['BAGUETTE', 'BOURBON', 'CRACKERS', 'HEINEKEN', 'OLIVES', 'PEPPERS', 'SODA']
['BOURBON', 'CRACKERS', 'HEINEKEN', 'HERRING', 'OLIVES', 'SODA', 'STEAK']
['APPLES', 'BAGUETTE', 'CORNED BEEF', 'HAM', 'HERRING', 'OLIVES', 'TURKEY']
['ARTICHOKE', 'AVOCADO', 'BAGUETTE', 'BOURBON', 'CORNED BEEF', 'HEINEKEN', 'HERRING']
['ARTICHOKE', 'BOURBON', 'CRACKERS', 'HEINEKEN', 'OLIVES', 'SODA', 'STEAK']
['BOURBON', 'CORNED BEEF', 'CRACKERS', 'HEINEKEN', 'HERRING', 

In [13]:
enc = TransactionEncoder()
ds_raw_enc = enc.fit_transform(X=list_association)
df_asso = pd.DataFrame(ds_raw_enc, columns = enc.columns_)
df_asso.head()

Unnamed: 0,APPLES,ARTICHOKE,AVOCADO,BAGUETTE,BOURBON,CHICKEN,COKE,CORNED BEEF,CRACKERS,HAM,HEINEKEN,HERRING,ICE CREAM,OLIVES,PEPPERS,SARDINES,SODA,STEAK,TURKEY
0,True,True,True,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False
1,True,False,False,False,False,False,False,True,False,False,True,True,False,True,False,True,False,True,False
2,True,False,True,True,False,False,False,False,False,False,False,False,True,False,True,True,False,True,False
3,True,False,False,False,False,False,True,True,False,True,False,True,False,True,False,False,False,False,True
4,False,True,False,False,True,False,True,False,False,True,False,False,True,True,False,False,False,False,True


In [14]:
min_support = 0.5

df_freq = apriori(df_asso, min_support=0.5, use_colnames=True)
df_freq.round(3)

Unnamed: 0,support,itemsets
0,0.55,(BOURBON)
1,0.5,(COKE)
2,0.6,(HEINEKEN)
3,0.65,(OLIVES)
4,0.5,"(BOURBON, OLIVES)"


In [15]:
df_asso_rule = association_rules(df_freq, metric='confidence', min_threshold=0.5)
df_asso_rule.round(3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(BOURBON),(OLIVES),0.55,0.65,0.5,0.909,1.399,0.142,3.85
1,(OLIVES),(BOURBON),0.65,0.55,0.5,0.769,1.399,0.142,1.95


In [17]:
print(40/500)

0.08


In [18]:
print(1-0.08)

0.92


In [19]:
print(27/40ds_raw)

0.675


In [22]:
print((5/460))

0.010869565217391304


In [24]:
ds_raw2 = [['우유','라면'],
         ['라면'],
         ['아이스크림','우유'],
         ['과자','라면'],
         ['아이스크림']]

ds_raw2

[['우유', '라면'], ['라면'], ['아이스크림', '우유'], ['과자', '라면'], ['아이스크림']]

In [25]:
enc = TransactionEncoder()
ds_raw_enc = enc.fit_transform(X=ds_raw2)
df_asso = pd.DataFrame(ds_raw_enc, columns = enc.columns_)
df_asso.head()

Unnamed: 0,과자,라면,아이스크림,우유
0,False,True,False,True
1,False,True,False,False
2,False,False,True,True
3,True,True,False,False
4,False,False,True,False


In [31]:
min_support = 0.5

df_freq = apriori(df_asso, min_support=0.2, use_colnames=True)
df_freq.round(3)

Unnamed: 0,support,itemsets
0,0.2,(과자)
1,0.6,(라면)
2,0.4,(아이스크림)
3,0.4,(우유)
4,0.2,"(라면, 과자)"
5,0.2,"(라면, 우유)"
6,0.2,"(우유, 아이스크림)"


In [32]:
df_asso_rule = association_rules(df_freq, metric='confidence', min_threshold=0.2)
df_asso_rule.round(3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(라면),(과자),0.6,0.2,0.2,0.333,1.667,0.08,1.2
1,(과자),(라면),0.2,0.6,0.2,1.0,1.667,0.08,inf
2,(라면),(우유),0.6,0.4,0.2,0.333,0.833,-0.04,0.9
3,(우유),(라면),0.4,0.6,0.2,0.5,0.833,-0.04,0.8
4,(우유),(아이스크림),0.4,0.4,0.2,0.5,1.25,0.04,1.2
5,(아이스크림),(우유),0.4,0.4,0.2,0.5,1.25,0.04,1.2
