# Association rule mining

In [2]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Load data

In [3]:
data = pd.read_csv("./supermarket_short.csv")
data.head (10)

Unnamed: 0,grocery misc,baby needs,bread and cake,baking needs,coupons,juice-sat-cord-ms,tea,biscuits,canned fish-meat,canned fruit,...,750ml white nz,750ml red nz,750ml white imp,750ml red imp,sparkling nz,sparkling imp,brew kits/accesry,port and sherry,ctrled label wine,non host support
0,0,1,1,1,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,1,1,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,1,1,0,1,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
7,0,1,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0


### Find frequent itemsets and rules

In [4]:
frequent_itemsets = apriori(data, min_support=0.1, use_colnames=True)

In [9]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

### Add antecedent lenght column

In [10]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

### Filter rules

In [29]:
rules[ (rules['antecedent_len'] >= 1) &
       (rules['confidence'] > 0.65) &
       (rules['support'] > 0.29) ].sort_values(by=['confidence', 'conviction'], ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
4026,"(vegetables, biscuits)",(fruit),0.381241,0.640156,0.303436,0.795918,1.24332,0.059383,1.763238,2
2236,"(baking needs, fruit)",(vegetables),0.410633,0.639939,0.321807,0.783684,1.224622,0.059026,1.664513,2
6199,"(frozen foods, fruit)",(vegetables),0.402204,0.639939,0.313594,0.779688,1.218378,0.056208,1.634323,2
1046,"(vegetables, bread and cake)",(fruit),0.49665,0.640156,0.387076,0.779373,1.217475,0.069143,1.631011,2
7078,"(vegetables, milk-cream)",(fruit),0.437649,0.640156,0.339529,0.775802,1.211897,0.059366,1.605033,2
6198,"(vegetables, frozen foods)",(fruit),0.406743,0.640156,0.313594,0.770988,1.204376,0.053215,1.571293,2
7079,"(milk-cream, fruit)",(vegetables),0.440458,0.639939,0.339529,0.770854,1.204573,0.057662,1.571313,2
1047,"(fruit, bread and cake)",(vegetables),0.502485,0.639939,0.387076,0.770323,1.203743,0.065516,1.567679,2
330,"(margarine, bread and cake)",(baking needs),0.395721,0.604063,0.293495,0.741671,1.227804,0.054454,1.532686,2
412,"(bread and cake, biscuits)",(frozen foods),0.450184,0.587206,0.326345,0.724916,1.234518,0.061995,1.500612,2
