# Association rules: Stationery case study

In [78]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

  and should_run_async(code)


## Exploration

In [79]:
dataset = pd.read_csv('Stationary.csv')
dataset.head()

  and should_run_async(code)


Unnamed: 0,CUSTOMER_ID,BOUGHT_ID,Quantity
0,1,0,1
1,1,4,1
2,1,5,1
3,1,6,1
4,1,7,1


## Pre-processing

### create stockname dict for mapping

In [80]:
stockname = pd.read_csv('STOCKNAME.csv')
stockname
# create dict key is itemname value is code
stockname = stockname.set_index('ITEMNAME')['CODE'].to_dict()
stockname

  and should_run_async(code)


{'tape': 1,
 'pencil': 2,
 'book': 3,
 'ruler': 4,
 'disket': 5,
 'eraser': 6,
 'pen': 7,
 'paper': 8,
 'liquidpaper': 9,
 'ink': 0}

### group by customer id

In [81]:
# group by customer_id
dataset = dataset.groupby('CUSTOMER_ID')['BOUGHT_ID'].apply(list).reset_index()
dataset

  and should_run_async(code)


Unnamed: 0,CUSTOMER_ID,BOUGHT_ID
0,1,"[0, 4, 5, 6, 7, 8, 9]"
1,2,"[0, 1, 3, 4, 5, 6, 7, 8, 9]"
2,3,"[2, 3, 5, 6, 7, 9]"
3,4,"[0, 1, 3, 4, 5, 6, 7, 8, 9]"
4,5,"[3, 4, 5, 6, 7, 8, 9]"
...,...,...
285,286,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]"
286,287,"[0, 3, 4, 6, 7, 8, 9]"
287,288,"[0, 1, 3, 4, 5, 6, 8, 9]"
288,289,"[0, 3, 5, 6, 7, 8, 9]"


### put list of bought id into each stockname

In [82]:
dataset[['tape','pencil','book','ruler','disket','eraser','pen','paper','liquidpaper','ink']] = np.False_
for i in ['tape','pencil','book','ruler','disket','eraser','pen','paper','liquidpaper','ink']:
  dataset[i] = dataset['BOUGHT_ID'].map(lambda x: stockname[i] in x)
dataset = dataset.drop('CUSTOMER_ID', axis=1)
dataset = dataset.drop('BOUGHT_ID', axis=1)
dataset

  and should_run_async(code)


Unnamed: 0,tape,pencil,book,ruler,disket,eraser,pen,paper,liquidpaper,ink
0,False,False,False,True,True,True,True,True,True,True
1,True,False,True,True,True,True,True,True,True,True
2,False,True,True,False,True,True,True,False,True,False
3,True,False,True,True,True,True,True,True,True,True
4,False,False,True,True,True,True,True,True,True,False
...,...,...,...,...,...,...,...,...,...,...
285,True,True,True,True,True,True,True,True,True,True
286,False,False,True,True,False,True,True,True,True,True
287,True,False,True,True,True,True,False,True,True,True
288,False,False,True,False,True,True,True,True,True,True


## Construct model using Association Rules

### Apply FP-growth to determine all the frequent itemsets. Set minimum support value to 80%

In [83]:
frequent_itemsets=apriori(dataset,min_support=0.8,use_colnames=True)
frequent_itemsets

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.948276,(book)
1,0.944828,(paper)
2,0.924138,(liquidpaper)
3,0.810345,(ink)
4,0.896552,"(paper, book)"
5,0.882759,"(liquidpaper, book)"
6,0.872414,"(paper, liquidpaper)"
7,0.834483,"(paper, liquidpaper, book)"


### Apply “Create Association Rules” operators. Set confidence value to 90%

In [84]:
my_rules = association_rules(frequent_itemsets,metric="confidence",min_threshold=0.9)
my_rules

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(paper),(book),0.944828,0.948276,0.896552,0.948905,1.000664,0.000595,1.012315,0.012019
1,(book),(paper),0.948276,0.944828,0.896552,0.945455,1.000664,0.000595,1.011494,0.012821
2,(liquidpaper),(book),0.924138,0.948276,0.882759,0.955224,1.007327,0.006421,1.155172,0.095881
3,(book),(liquidpaper),0.948276,0.924138,0.882759,0.930909,1.007327,0.006421,1.098004,0.140625
4,(paper),(liquidpaper),0.944828,0.924138,0.872414,0.923358,0.999156,-0.000737,0.989819,-0.015085
5,(liquidpaper),(paper),0.924138,0.944828,0.872414,0.94403,0.999156,-0.000737,0.985747,-0.011016
6,"(paper, liquidpaper)",(book),0.872414,0.948276,0.834483,0.956522,1.008696,0.007194,1.189655,0.067568
7,"(paper, book)",(liquidpaper),0.896552,0.924138,0.834483,0.930769,1.007176,0.005945,1.095785,0.068871
8,"(liquidpaper, book)",(paper),0.882759,0.944828,0.834483,0.945312,1.000513,0.000428,1.008867,0.004375
9,(liquidpaper),"(paper, book)",0.924138,0.896552,0.834483,0.902985,1.007176,0.005945,1.066313,0.093914


## Model deployment using Association Rules

### Answer the following questions

- List all the frequent itemsets that have been found (with their support)?

In [87]:
my_rules[['antecedents','consequents']]

  and should_run_async(code)


Unnamed: 0,antecedents,consequents
0,(paper),(book)
1,(book),(paper)
2,(liquidpaper),(book)
3,(book),(liquidpaper)
4,(paper),(liquidpaper)
5,(liquidpaper),(paper)
6,"(paper, liquidpaper)",(book)
7,"(paper, book)",(liquidpaper)
8,"(liquidpaper, book)",(paper)
9,(liquidpaper),"(paper, book)"


- What is the maximal frequent itemset found? Give its support? List all the strong association
rules that can be generated from it.

In [90]:
my_rules.iloc[6]

  and should_run_async(code)


Unnamed: 0,6
antecedents,"(paper, liquidpaper)"
consequents,(book)
antecedent support,0.872414
consequent support,0.948276
support,0.834483
confidence,0.956522
lift,1.008696
leverage,0.007194
conviction,1.189655
zhangs_metric,0.067568


ค่า support 83% และมีค่า confidence 95 %

all combinations

In [110]:
# create combination of my_rules.iloc[6]
from itertools import combinations
x = my_rules.iloc[6]['antecedents'] | my_rules.iloc[6]['consequents']
for j in range(1,len(x)+1):
  for i in list(combinations(x,j)):
    y = x.copy()
    print(i,"->",tuple(y.difference(i)))

('paper',) -> ('liquidpaper', 'book')
('liquidpaper',) -> ('paper', 'book')
('book',) -> ('paper', 'liquidpaper')
('paper', 'liquidpaper') -> ('book',)
('paper', 'book') -> ('liquidpaper',)
('liquidpaper', 'book') -> ('paper',)
('paper', 'liquidpaper', 'book') -> ()


  and should_run_async(code)


- According to the rule {liquidpaper} => {book,paper},explain its meaning
  - คนที่ซื้อ liquid paper จะซื้อ book paper ด้วย ถ้ามี support 83 %
- What items are expected to be bought by customers who buy liquidpaper?
  - book, paper