# Association Rule mining 

### Installing dependencies

In [1]:
%pip install apyori

Note: you may need to restart the kernel to use updated packages.


### Restart and run all cells

### Reading the dataset

In [2]:
import pandas as pd
df = pd.read_csv('store_data.csv')
df.head()

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,


In [3]:
df = pd.read_csv('store_data.csv',header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [4]:
df.shape

(7501, 20)

### Preprocessing data for ARM

In [5]:
records = []
for i in range(df.shape[0]):
    records.append([str(df.values[i,j]) for j in range(df.shape[1])])

In [6]:
print(records[0:5])

[['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil'], ['burgers', 'meatballs', 'eggs', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan'], ['chutney', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan'], ['turkey', 'avocado', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan'], ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']]


### Applying Apriori Algorithm

![image.png](attachment:image.png)

In [7]:
from apyori import apriori
association_rules = apriori(records, min_support=0.0045, min_confidence=0.2, min_lift=3, min_length=2)
association_results = list(association_rules)

### Total rules mined

In [8]:
print(len(association_results))

48


In [9]:
print(association_results[0])

RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)])


In [10]:
for item in association_results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")

Rule: chicken -> light cream
Support: 0.004532728969470737
Confidence: 0.29059829059829057
Lift: 4.84395061728395
Rule: mushroom cream sauce -> escalope
Support: 0.005732568990801226
Confidence: 0.3006993006993007
Lift: 3.790832696715049
Rule: pasta -> escalope
Support: 0.005865884548726837
Confidence: 0.3728813559322034
Lift: 4.700811850163794
Rule: herb & pepper -> ground beef
Support: 0.015997866951073192
Confidence: 0.3234501347708895
Lift: 3.2919938411349285
Rule: tomato sauce -> ground beef
Support: 0.005332622317024397
Confidence: 0.3773584905660377
Lift: 3.840659481324083
Rule: whole wheat pasta -> olive oil
Support: 0.007998933475536596
Confidence: 0.2714932126696833
Lift: 4.122410097642296
Rule: shrimp -> pasta
Support: 0.005065991201173177
Confidence: 0.3220338983050847
Lift: 4.506672147735896
Rule: chicken -> nan
Support: 0.004532728969470737
Confidence: 0.29059829059829057
Lift: 4.84395061728395
Rule: shrimp -> frozen vegetables
Support: 0.005332622317024397
Confidence: 0.

### Creating dataframe for above results

In [11]:
rule = []
support = []
confidence = []
lift = []
for item in association_results:
    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    r = items[0] + " -> " + items[1]
    rule.append(r)

    #second index of the inner list
    s = item[1]
    support.append(s)

    #third index of the list located at 0th
    #of the third index of the inner list
    c = item[2][0][2]
    confidence.append(c)

    l = item[2][0][3]
    lift.append(l)

### Creating Dictionary for dataframe

In [12]:
dct = {'Rule':rule,
       'Support':support,
       'Confidence':confidence,
       'Lift':lift}

In [13]:
df_rules = pd.DataFrame(dct)
df_rules

Unnamed: 0,Rule,Support,Confidence,Lift
0,chicken -> light cream,0.004533,0.290598,4.843951
1,mushroom cream sauce -> escalope,0.005733,0.300699,3.790833
2,pasta -> escalope,0.005866,0.372881,4.700812
3,herb & pepper -> ground beef,0.015998,0.32345,3.291994
4,tomato sauce -> ground beef,0.005333,0.377358,3.840659
5,whole wheat pasta -> olive oil,0.007999,0.271493,4.12241
6,shrimp -> pasta,0.005066,0.322034,4.506672
7,chicken -> nan,0.004533,0.290598,4.843951
8,shrimp -> frozen vegetables,0.005333,0.232558,3.254512
9,cooking oil -> ground beef,0.004799,0.571429,3.281995


### Save dataframe to csv

In [17]:
df_rules.to_csv('AssociationRulesOutput.csv',index=False)