In [1]:
import os
import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from method import Apriori, FPgrowth, gen_association_rules, trans_ibm_data, write_association_rules

In [2]:
# load data
ibm_data = np.loadtxt('../data/ibm-own.txt', dtype=int)
data = trans_ibm_data(ibm_data)
len(data)

9858

In [3]:
# data preprocess
te = TransactionEncoder()
te_data = te.fit(data).transform(data)

ap_data = pd.DataFrame(te_data, columns=te.columns_).astype(int)
fp_data = FPgrowth.preprocess_data(data)

In [4]:
min_support = 0.01
min_confidence = 0.1

In [5]:
# Apriori frequent itemsets
ap = Apriori(ap_data, min_support)
df_ap = ap.get_frequent_itemsets(max_length=10)
df_ap['length'] = df_ap.itemsets.apply(len)
df_ap.sort_values(by=['length','support'], ignore_index=True, ascending=[True, False], inplace=True)
df_ap

Unnamed: 0,support,itemsets,length
0,0.076486,(607),1
1,0.075877,(553),1
2,0.073341,(238),1
3,0.062995,(592),1
4,0.062995,(973),1
...,...,...,...
1204,0.012376,"(877, 913, 402, 83, 471, 510, 607)",7
1205,0.012376,"(429, 877, 913, 83, 471, 510, 607)",7
1206,0.011767,"(902, 649, 879, 49, 915, 890, 988)",7
1207,0.010753,"(69, 520, 236, 883, 117, 924, 959)",7


In [6]:
# FP-grouth frequent itemsets
fp = FPgrowth(fp_data, min_support)
df_fp = fp.get_frequent_itemsets()
df_fp['length'] = df_fp.itemsets.apply(len)
df_fp.sort_values(by=['length','support'], ignore_index=True, ascending=[True, False], inplace=True)
df_fp

Unnamed: 0,support,itemsets,length
0,0.076486,(607),1
1,0.075877,(553),1
2,0.073341,(238),1
3,0.062995,(592),1
4,0.062995,(973),1
...,...,...,...
1204,0.012376,"(913, 402, 83, 471, 877, 607, 429)",7
1205,0.012376,"(913, 402, 83, 471, 877, 510, 607)",7
1206,0.011767,"(49, 915, 902, 649, 890, 988, 879)",7
1207,0.010753,"(883, 69, 117, 520, 924, 236, 959)",7


In [7]:
# Apriori association rule
df_rule_ap = gen_association_rules(df_ap, min_confidence)
df_rule_ap

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(607),(471),0.076486,0.060154,0.029621,0.387268,6.437921,0.02502,1.533861
1,(471),(607),0.060154,0.076486,0.029621,0.492411,6.437921,0.02502,1.819414
2,(915),(879),0.036316,0.037229,0.018056,0.497207,13.355487,0.016704,1.914845
3,(879),(915),0.037229,0.036316,0.018056,0.485014,13.355487,0.016704,1.871281
4,(238),(607),0.073341,0.076486,0.016738,0.228216,2.983755,0.011128,1.196596
...,...,...,...,...,...,...,...,...,...
12603,"(877, 429, 913, 402, 471, 510, 607)",(83),0.012477,0.026273,0.012376,0.99187,37.752331,0.012048,119.768397
12604,"(877, 429, 913, 83, 471, 510, 607)",(402),0.012376,0.029519,0.012376,1.0,33.876289,0.01201,970480827.754108
12605,"(877, 429, 402, 83, 471, 510, 607)",(913),0.012781,0.030534,0.012376,0.968254,31.711122,0.011985,30.538191
12606,"(429, 913, 402, 83, 471, 510, 607)",(877),0.012477,0.024041,0.012376,0.99187,41.256766,0.012076,120.042895


In [8]:
# FP-grouth frequent itemsets
df_rule_fp = gen_association_rules(df_fp, min_confidence)
df_rule_fp

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(607),(471),0.076486,0.060154,0.029621,0.387268,6.437921,0.02502,1.533861
1,(471),(607),0.060154,0.076486,0.029621,0.492411,6.437921,0.02502,1.819414
2,(915),(879),0.036316,0.037229,0.018056,0.497207,13.355487,0.016704,1.914845
3,(879),(915),0.037229,0.036316,0.018056,0.485014,13.355487,0.016704,1.871281
4,(238),(607),0.073341,0.076486,0.016738,0.228216,2.983755,0.011128,1.196596
...,...,...,...,...,...,...,...,...,...
12603,"(877, 429, 913, 402, 471, 510, 607)",(83),0.012477,0.026273,0.012376,0.99187,37.752331,0.012048,119.768397
12604,"(877, 429, 913, 83, 471, 510, 607)",(402),0.012376,0.029519,0.012376,1.0,33.876289,0.01201,970480827.754108
12605,"(877, 429, 402, 83, 471, 510, 607)",(913),0.012781,0.030534,0.012376,0.968254,31.711122,0.011985,30.538191
12606,"(877, 913, 402, 83, 471, 510, 607)",(429),0.012376,0.021201,0.012376,1.0,47.167464,0.012113,978799162.356347
