In [1]:
import os
import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from method import Apriori, FPgrowth, gen_association_rules, write_association_rules

In [2]:
# load data
data = pd.read_csv(os.path.join('../data', 'Market_Basket_Optimisation.csv'), header=None)
data = data.apply(lambda x: x.str.strip())
print(len(data))
data.head(3)

7501


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,


In [3]:
# data preprocess
data = data.apply(lambda x: list(set(x[~x.isna()].tolist())), axis=1).to_list()
te = TransactionEncoder()
te_data = te.fit(data).transform(data)

ap_data = pd.DataFrame(te_data, columns=te.columns_).astype(int)
fp_data = FPgrowth.preprocess_data(data)

In [4]:
min_support = 0.01
min_confidence = 0.1

In [5]:
# Apriori frequent itemsets
ap = Apriori(ap_data, min_support)
df_ap = ap.get_frequent_itemsets(max_length=20)
df_ap['length'] = df_ap.itemsets.apply(len)
df_ap.sort_values(by=['length','support'], ignore_index=True, ascending=[True, False], inplace=True)
df_ap

Unnamed: 0,support,itemsets,length
0,0.238368,(mineral water),1
1,0.179709,(eggs),1
2,0.174110,(spaghetti),1
3,0.170911,(french fries),1
4,0.163845,(chocolate),1
...,...,...,...
252,0.010932,"(mineral water, ground beef, chocolate)",3
253,0.010532,"(spaghetti, eggs, chocolate)",3
254,0.010265,"(olive oil, spaghetti, mineral water)",3
255,0.010132,"(eggs, mineral water, ground beef)",3


In [6]:
# FP-grouth frequent itemsets
fp = FPgrowth(fp_data, min_support)
df_fp = fp.get_frequent_itemsets()
df_fp['length'] = df_fp.itemsets.apply(len)
df_fp.sort_values(by=['length','support'], ignore_index=True, ascending=[True, False], inplace=True)
df_fp

Unnamed: 0,support,itemsets,length
0,0.238368,(mineral water),1
1,0.179709,(eggs),1
2,0.174110,(spaghetti),1
3,0.170911,(french fries),1
4,0.163845,(chocolate),1
...,...,...,...
252,0.010932,"(mineral water, ground beef, chocolate)",3
253,0.010532,"(spaghetti, eggs, chocolate)",3
254,0.010265,"(olive oil, spaghetti, mineral water)",3
255,0.010132,"(mineral water, eggs, ground beef)",3


In [7]:
# Apriori association rule
df_rule_ap = gen_association_rules(df_ap, min_confidence)
df_rule_ap

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(spaghetti),(mineral water),0.17411,0.238368,0.059725,0.343032,1.439085,0.018223,1.159314
1,(mineral water),(spaghetti),0.238368,0.17411,0.059725,0.250559,1.439085,0.018223,1.102008
2,(mineral water),(chocolate),0.238368,0.163845,0.05266,0.220917,1.348332,0.013604,1.073256
3,(chocolate),(mineral water),0.163845,0.238368,0.05266,0.3214,1.348332,0.013604,1.122357
4,(mineral water),(eggs),0.238368,0.179709,0.050927,0.213647,1.188845,0.00809,1.043158
...,...,...,...,...,...,...,...,...,...
315,"(eggs, ground beef)",(mineral water),0.019997,0.238368,0.010132,0.506667,2.125563,0.005365,1.543848
316,"(mineral water, ground beef)",(eggs),0.040928,0.179709,0.010132,0.247557,1.377541,0.002777,1.09017
317,"(french fries, spaghetti)",(mineral water),0.027596,0.238368,0.010132,0.36715,1.540263,0.003554,1.203494
318,"(french fries, mineral water)",(spaghetti),0.033729,0.17411,0.010132,0.300395,1.725318,0.004259,1.180509


In [8]:
# FP-grouth frequent itemsets
df_rule_fp = gen_association_rules(df_fp, min_confidence)
df_rule_fp

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(spaghetti),(mineral water),0.17411,0.238368,0.059725,0.343032,1.439085,0.018223,1.159314
1,(mineral water),(spaghetti),0.238368,0.17411,0.059725,0.250559,1.439085,0.018223,1.102008
2,(mineral water),(chocolate),0.238368,0.163845,0.05266,0.220917,1.348332,0.013604,1.073256
3,(chocolate),(mineral water),0.163845,0.238368,0.05266,0.3214,1.348332,0.013604,1.122357
4,(mineral water),(eggs),0.238368,0.179709,0.050927,0.213647,1.188845,0.00809,1.043158
...,...,...,...,...,...,...,...,...,...
315,"(mineral water, ground beef)",(eggs),0.040928,0.179709,0.010132,0.247557,1.377541,0.002777,1.09017
316,"(eggs, ground beef)",(mineral water),0.019997,0.238368,0.010132,0.506667,2.125563,0.005365,1.543848
317,"(french fries, spaghetti)",(mineral water),0.027596,0.238368,0.010132,0.36715,1.540263,0.003554,1.203494
318,"(french fries, mineral water)",(spaghetti),0.033729,0.17411,0.010132,0.300395,1.725318,0.004259,1.180509
