# CODECLAUSE DATA SCIENCE INTERNSHIP

#### ALLOCATED PROJECT 2: MARKET BASKET ANALYSIS IN PYTHON USING APRIORI ALGORITHM

##### By : TULIKA ROY

In [1]:
import pandas as pd
import mlxtend
from mlxtend.frequent_patterns import apriori, association_rules

#### Dataset

In [4]:
dataset = [ ['Milk','Eggs','Bread','Butter' ],
           ['Milk','Cookies','Butter'],
           ['Milk','Eggs','Cookies','Bread','Butter'],
           ['Cookies','Eggs','Bread'],
           ['Milk','Eggs','Bread'],
           ['Milk','Bread','Butter'],
           ['Eggs','Butter'],
           ['Cookies','Bread','Butter'],
           ['Cookies','Bread','Butter','Eggs']]
           
           

#### Transforming the dataset into a one-hot encoded format

In [6]:
from mlxtend.preprocessing import TransactionEncoder

In [8]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary,columns = te.columns_)

In [9]:
df

Unnamed: 0,Bread,Butter,Cookies,Eggs,Milk
0,True,True,False,True,True
1,False,True,True,False,True
2,True,True,True,True,True
3,True,False,True,True,False
4,True,False,False,True,True
5,True,True,False,False,True
6,False,True,False,True,False
7,True,True,True,False,False
8,True,True,True,True,False


#### Applying the apriori algorithm to find the frequent itemset

In [10]:
frequent_itemset = apriori(df,min_support=0.3,use_colnames=True)

#### Generating the association rule

In [14]:
rules = association_rules(frequent_itemset,metric='confidence',min_threshold=0.5)

#### Sort the association rule by confidence in descending order

In [15]:
rules = rules.sort_values(by=['confidence'],ascending=False)

#### Printing the frequent itemsets

In [17]:
print('Frequent Itemsets:')
print(frequent_itemset)

Frequent Itemsets:
     support                  itemsets
0   0.777778                   (Bread)
1   0.777778                  (Butter)
2   0.555556                 (Cookies)
3   0.666667                    (Eggs)
4   0.555556                    (Milk)
5   0.555556           (Butter, Bread)
6   0.444444          (Bread, Cookies)
7   0.555556             (Bread, Eggs)
8   0.444444             (Bread, Milk)
9   0.444444         (Butter, Cookies)
10  0.444444            (Butter, Eggs)
11  0.444444            (Butter, Milk)
12  0.333333           (Eggs, Cookies)
13  0.333333              (Eggs, Milk)
14  0.333333  (Butter, Bread, Cookies)
15  0.333333     (Butter, Bread, Eggs)
16  0.333333     (Butter, Bread, Milk)
17  0.333333    (Bread, Eggs, Cookies)
18  0.333333       (Bread, Eggs, Milk)


#### Printing the association rules

In [18]:
print("\nAssociation Rules:")
print(rules)


Association Rules:
          antecedents       consequents  antecedent support  \
37       (Eggs, Milk)           (Bread)            0.333333   
32    (Eggs, Cookies)           (Bread)            0.333333   
5              (Eggs)           (Bread)            0.666667   
3           (Cookies)           (Bread)            0.555556   
7              (Milk)           (Bread)            0.555556   
9           (Cookies)          (Butter)            0.555556   
13             (Milk)          (Butter)            0.555556   
20   (Bread, Cookies)          (Butter)            0.444444   
19  (Butter, Cookies)           (Bread)            0.444444   
36      (Bread, Milk)            (Eggs)            0.444444   
31   (Bread, Cookies)            (Eggs)            0.444444   
28      (Bread, Milk)          (Butter)            0.444444   
27     (Butter, Milk)           (Bread)            0.444444   
23     (Butter, Eggs)           (Bread)            0.444444   
1             (Bread)          (But

##### Print additional details of the associative rules

In [20]:
print("\nAssociative Rules Details:")
for i, rule in rules.iterrows():
    antecedents = ','.join(list(rule['antecedents']))
    consequents = ','.join(list(rule['consequents']))
    support = rule['support']
    confidence = rule['confidence']
    lift = rule['lift']
    leverage = rule['leverage']
    conviction = rule['conviction']
    print(f"Rule {i+1}:")
    print(f"Antecedents {antecedents}")

    print(f"Consequents {consequents}")
    print(f"support {support:.3f}")
    print(f"Confidence {confidence:.3f}")
    print(f"Lift {lift:.3f}")
    print(f"Leverage {leverage:.3f}")
    print(f"Conviction {conviction:.3f}")
    print("\n")
    
    
    
    
    


Associative Rules Details:
Rule 38:
Antecedents Eggs,Milk
Consequents Bread
support 0.333
Confidence 1.000
Lift 1.286
Leverage 0.074
Conviction inf


Rule 33:
Antecedents Eggs,Cookies
Consequents Bread
support 0.333
Confidence 1.000
Lift 1.286
Leverage 0.074
Conviction inf


Rule 6:
Antecedents Eggs
Consequents Bread
support 0.556
Confidence 0.833
Lift 1.071
Leverage 0.037
Conviction 1.333


Rule 4:
Antecedents Cookies
Consequents Bread
support 0.444
Confidence 0.800
Lift 1.029
Leverage 0.012
Conviction 1.111


Rule 8:
Antecedents Milk
Consequents Bread
support 0.444
Confidence 0.800
Lift 1.029
Leverage 0.012
Conviction 1.111


Rule 10:
Antecedents Cookies
Consequents Butter
support 0.444
Confidence 0.800
Lift 1.029
Leverage 0.012
Conviction 1.111


Rule 14:
Antecedents Milk
Consequents Butter
support 0.444
Confidence 0.800
Lift 1.029
Leverage 0.012
Conviction 1.111


Rule 21:
Antecedents Bread,Cookies
Consequents Butter
support 0.333
Confidence 0.750
Lift 0.964
Leverage -0.012
Convic