# Association rule mining

In [28]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Load data

In [26]:
data = pd.read_csv("/Users/ola/spbpu/supermarket_short.csv")
data.head (10)

Unnamed: 0,grocery misc,baby needs,bread and cake,baking needs,coupons,juice-sat-cord-ms,tea,biscuits,canned fish-meat,canned fruit,...,750ml white nz,750ml red nz,750ml white imp,750ml red imp,sparkling nz,sparkling imp,brew kits/accesry,port and sherry,ctrled label wine,non host support
0,0,1,1,1,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4622,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
4623,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
4624,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4625,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Find frequent itemsets and rules

In [37]:
frequent_itemsets = apriori(data, min_support=0.1, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4)

### Add antecedent lenght column

In [None]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

### Filter rules

In [41]:
rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.75) &
       (rules['support'] > 0.3) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
666,"(baking needs, biscuits)",(bread and cake),0.381241,0.719689,0.314675,0.825397,1.14688,0.0403,1.605419,2
703,"(baking needs, frozen foods)",(bread and cake),0.396585,0.719689,0.320942,0.809264,1.124464,0.035524,1.469632,2
751,"(milk-cream, baking needs)",(bread and cake),0.412146,0.719689,0.341474,0.828526,1.151229,0.044857,1.63472,2
774,"(fruit, baking needs)",(bread and cake),0.410633,0.719689,0.338016,0.823158,1.143769,0.042488,1.585093,2
779,"(vegetables, baking needs)",(bread and cake),0.421223,0.719689,0.342771,0.813751,1.130698,0.039621,1.505032,2
958,"(frozen foods, biscuits)",(bread and cake),0.391182,0.719689,0.326345,0.834254,1.159187,0.044816,1.691211,2
1002,"(milk-cream, biscuits)",(bread and cake),0.381889,0.719689,0.320942,0.840407,1.167737,0.046101,1.756418,2
1024,"(fruit, biscuits)",(bread and cake),0.397018,0.719689,0.333045,0.838868,1.165598,0.047316,1.739634,2
1029,"(vegetables, biscuits)",(bread and cake),0.381241,0.719689,0.321375,0.842971,1.171299,0.047,1.785087,2
1472,"(milk-cream, frozen foods)",(bread and cake),0.39464,0.719689,0.327642,0.83023,1.153596,0.043624,1.651123,2
