# Association rule mining

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Load data

In [2]:
data = pd.read_csv("/supermarket_short.csv")
data.head (10)

Unnamed: 0,grocery misc,baby needs,bread and cake,baking needs,coupons,juice-sat-cord-ms,tea,biscuits,canned fish-meat,canned fruit,...,750ml white nz,750ml red nz,750ml white imp,750ml red imp,sparkling nz,sparkling imp,brew kits/accesry,port and sherry,ctrled label wine,non host support
0,0,1,1,1,0,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,1,1,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,1,1,0,1,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
7,0,1,1,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,0,0,1,1,0,1,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0


### Find frequent itemsets and rules

In [3]:
frequent_itemsets = apriori(data, min_support=0.1, use_colnames=True, max_len=2)

rules = association_rules(frequent_itemsets, metric="conviction", min_threshold=1.1)

### Add antecedent lenght column

In [4]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

### Filter rules

In [5]:
rules[((rules['antecedents'] == {'tissues-paper prd'}) & (rules['consequents']== {'baking needs'})) |
      ((rules['antecedents'] == {'tissues-paper prd'}) & (rules['consequents']== {'biscuits'})) | 
      ((rules['antecedents'] == {'tissues-paper prd'}) & (rules['consequents']== {'frozen foods'})) |
      ((rules['antecedents'] == {'baking needs'}) & (rules['consequents']== {'tissues-paper prd'}))
     ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
70,(baking needs),(tissues-paper prd),0.604063,0.485628,0.339961,0.562791,1.158893,0.046611,1.17649,1
71,(tissues-paper prd),(baking needs),0.485628,0.604063,0.339961,0.700045,1.158893,0.046611,1.319985,1
158,(tissues-paper prd),(biscuits),0.485628,0.563,0.314026,0.64664,1.148562,0.040618,1.2367,1
356,(tissues-paper prd),(frozen foods),0.485628,0.587206,0.325265,0.669782,1.140626,0.040101,1.250066,1
