In [13]:
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules

## 1.Import data

In [3]:
data = pd.read_csv('Datasets/my_movies.csv')
data.head()

Unnamed: 0,V1,V2,V3,V4,V5,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile
0,Sixth Sense,LOTR1,Harry Potter1,Green Mile,LOTR2,1,0,1,1,0,1,0,0,0,1
1,Gladiator,Patriot,Braveheart,,,0,1,0,0,1,0,0,0,1,0
2,LOTR1,LOTR2,,,,0,0,1,0,0,1,0,0,0,0
3,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
4,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0


## 2.Data Analysis

In [4]:
data.shape

(10, 15)

In [5]:
data.dtypes

V1               object
V2               object
V3               object
V4               object
V5               object
Sixth Sense       int64
Gladiator         int64
LOTR1             int64
Harry Potter1     int64
Patriot           int64
LOTR2             int64
Harry Potter2     int64
LOTR              int64
Braveheart        int64
Green Mile        int64
dtype: object

In [6]:
data.isna().sum()

V1               0
V2               0
V3               3
V4               8
V5               9
Sixth Sense      0
Gladiator        0
LOTR1            0
Harry Potter1    0
Patriot          0
LOTR2            0
Harry Potter2    0
LOTR             0
Braveheart       0
Green Mile       0
dtype: int64

## 3.Data preprocessing

In [7]:
data.head()

Unnamed: 0,V1,V2,V3,V4,V5,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile
0,Sixth Sense,LOTR1,Harry Potter1,Green Mile,LOTR2,1,0,1,1,0,1,0,0,0,1
1,Gladiator,Patriot,Braveheart,,,0,1,0,0,1,0,0,0,1,0
2,LOTR1,LOTR2,,,,0,0,1,0,0,1,0,0,0,0
3,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
4,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0


In [8]:
data['V1'].unique()

array(['Sixth Sense', 'Gladiator', 'LOTR1', 'Harry Potter1'], dtype=object)

In [12]:
data2 = data.drop(labels=(['V1','V2','V3','V4','V5']),axis=1)
data2.head()

Unnamed: 0,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile
0,1,0,1,1,0,1,0,0,0,1
1,0,1,0,0,1,0,0,0,1,0
2,0,0,1,0,0,1,0,0,0,0
3,1,1,0,0,1,0,0,0,0,0
4,1,1,0,0,1,0,0,0,0,0


## applying Base Apriori algorithm to get df of support and item sets

In [14]:
apriori(df=data2,min_support=0.50)

Unnamed: 0,support,itemsets
0,0.6,(0)
1,0.7,(1)
2,0.6,(4)
3,0.5,"(0, 1)"
4,0.6,"(1, 4)"


In [16]:
apriori(df=data2,min_support=0.20)

Unnamed: 0,support,itemsets
0,0.6,(0)
1,0.7,(1)
2,0.2,(2)
3,0.2,(3)
4,0.6,(4)
5,0.2,(5)
6,0.2,(9)
7,0.5,"(0, 1)"
8,0.4,"(0, 4)"
9,0.2,"(0, 9)"


In [25]:
apri_df = apriori(df=data2,min_support=0.10,use_colnames=True)
print(apri_df.shape)
apri_df

(53, 2)


Unnamed: 0,support,itemsets
0,0.6,(Sixth Sense)
1,0.7,(Gladiator)
2,0.2,(LOTR1)
3,0.2,(Harry Potter1)
4,0.6,(Patriot)
5,0.2,(LOTR2)
6,0.1,(Harry Potter2)
7,0.1,(LOTR)
8,0.1,(Braveheart)
9,0.2,(Green Mile)


## applying Association rules to the df (of item sets and support ) from apriori algorithm to get best assosiates
### associate rules with out hyperparameter tuning

In [27]:
associ_df = association_rules(df=apri_df, metric='confidence',min_threshold=0.8,support_only=False)
print(associ_df.shape)
associ_df

(128, 9)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Sixth Sense),(Gladiator),0.6,0.7,0.5,0.833333,1.190476,0.08,1.8
1,(LOTR),(Sixth Sense),0.1,0.6,0.1,1.000000,1.666667,0.04,inf
2,(Green Mile),(Sixth Sense),0.2,0.6,0.2,1.000000,1.666667,0.08,inf
3,(Gladiator),(Patriot),0.7,0.6,0.6,0.857143,1.428571,0.18,2.8
4,(Patriot),(Gladiator),0.6,0.7,0.6,1.000000,1.428571,0.18,inf
...,...,...,...,...,...,...,...,...,...
123,"(Sixth Sense, Harry Potter1)","(LOTR1, Green Mile, LOTR2)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
124,"(Sixth Sense, LOTR2)","(LOTR1, Green Mile, Harry Potter1)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
125,"(Green Mile, Harry Potter1)","(LOTR1, Sixth Sense, LOTR2)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
126,"(LOTR2, Harry Potter1)","(LOTR1, Sixth Sense, Green Mile)",0.1,0.1,0.1,1.000000,10.000000,0.09,inf


**Decreasing the threshold**

In [28]:
association_rules(df=apri_df, metric='confidence',min_threshold=0.5,support_only=False).shape

(216, 9)

**the no of associates increases**

In [29]:
association_rules(df=apri_df, metric='confidence',min_threshold=0.4,support_only=False).shape

(216, 9)

**let us change the metric**

In [30]:
association_rules(df=apri_df, metric='lift',min_threshold=0.8,support_only=False).shape

(246, 9)

In [31]:
association_rules(df=apri_df, metric='lift',min_threshold=1,support_only=False).shape

(238, 9)

In [32]:
association_rules(df=apri_df, metric='lift',min_threshold=2.5,support_only=False).shape

(170, 9)