In [10]:
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules

### 1.Data loading

In [3]:
data = pd.read_csv('Datasets/book.csv')
data.head()

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence
0,0,1,0,1,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,1,1,1,0,1,0,1,0,0,0,0
4,0,0,1,0,0,0,1,0,0,0,0


### 2.Data Analysis

In [4]:
data.shape

(2000, 11)

In [5]:
data.dtypes

ChildBks     int64
YouthBks     int64
CookBks      int64
DoItYBks     int64
RefBks       int64
ArtBks       int64
GeogBks      int64
ItalCook     int64
ItalAtlas    int64
ItalArt      int64
Florence     int64
dtype: object

In [6]:
data.isna().sum()

ChildBks     0
YouthBks     0
CookBks      0
DoItYBks     0
RefBks       0
ArtBks       0
GeogBks      0
ItalCook     0
ItalAtlas    0
ItalArt      0
Florence     0
dtype: int64

#### data seems to be in one hot encoding format

### applying Base Apriori algorithm to get df of support anditem sets

In [11]:
apriori(df=data,min_support=0.50)

Unnamed: 0,support,itemsets


#### decreasing the value of support threshold

In [14]:
apriori(df=data,min_support=0.20)

Unnamed: 0,support,itemsets
0,0.423,(0)
1,0.2475,(1)
2,0.431,(2)
3,0.282,(3)
4,0.2145,(4)
5,0.241,(5)
6,0.276,(6)
7,0.256,"(0, 2)"


In [19]:
apriori(df=data,min_support=0.10)

Unnamed: 0,support,itemsets
0,0.423,(0)
1,0.2475,(1)
2,0.431,(2)
3,0.282,(3)
4,0.2145,(4)
5,0.241,(5)
6,0.276,(6)
7,0.1135,(7)
8,0.1085,(10)
9,0.165,"(0, 1)"


In [33]:
apri_df = apriori(df=data,min_support=0.07,use_colnames=True)
print(apri_df.shape)
apri_df.tail(10)

(66, 2)


Unnamed: 0,support,itemsets
56,0.089,"(CookBks, RefBks, GeogBks)"
57,0.1035,"(CookBks, ArtBks, GeogBks)"
58,0.073,"(ArtBks, DoItYBks, GeogBks)"
59,0.082,"(CookBks, YouthBks, DoItYBks, ChildBks)"
60,0.083,"(CookBks, YouthBks, ChildBks, GeogBks)"
61,0.0745,"(CookBks, RefBks, DoItYBks, ChildBks)"
62,0.082,"(CookBks, ArtBks, DoItYBks, ChildBks)"
63,0.089,"(CookBks, DoItYBks, ChildBks, GeogBks)"
64,0.0785,"(CookBks, RefBks, ChildBks, GeogBks)"
65,0.0835,"(CookBks, ArtBks, ChildBks, GeogBks)"


### Changing max len

In [28]:
apriori(df=data,min_support=0.07,max_len=30).shape


(66, 2)

In [29]:
apriori(df=data,min_support=0.07,max_len=70).shape

(66, 2)

#### applying Association rules to the df (of item sets and support ) from apriori algorithm to get best assosiates

**associate rules with out hyperparameter tuning**

In [36]:
associ_df = association_rules(df=apri_df, metric='confidence',min_threshold=0.8,support_only=False)
print(associ_df.shape)
associ_df.head()

(33, 9)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ItalCook),(CookBks),0.1135,0.431,0.1135,1.0,2.320186,0.064582,inf
1,"(YouthBks, DoItYBks)",(ChildBks),0.1155,0.423,0.095,0.822511,1.94447,0.046143,3.250902
2,"(YouthBks, RefBks)",(ChildBks),0.0965,0.423,0.083,0.860104,2.033342,0.042181,4.124481
3,"(YouthBks, GeogBks)",(ChildBks),0.1205,0.423,0.099,0.821577,1.942262,0.048029,3.233884
4,"(CookBks, RefBks)",(ChildBks),0.1525,0.423,0.1225,0.803279,1.899004,0.057993,2.933083


**Decreasing the threshold**

In [37]:
association_rules(df=apri_df, metric='confidence',min_threshold=0.5,support_only=False).shape

(133, 9)

**the no of associates increases**

In [38]:
association_rules(df=apri_df, metric='confidence',min_threshold=0.4,support_only=False).shape

(190, 9)

**let us change the metric**

In [39]:
association_rules(df=apri_df, metric='lift',min_threshold=0.8,support_only=False).shape

(306, 9)

In [47]:
association_rules(df=apri_df, metric='lift',min_threshold=1,support_only=False).shape

(306, 9)

In [46]:
association_rules(df=apri_df, metric='lift',min_threshold=2.5,support_only=False).shape

(42, 9)