# Market Basket Optimisation using Apriori

In [48]:
# make sure you have "apyori" package installed.
# to install it run:
! pip install apyori



## Importing libraries

In [49]:
import pandas as pd

## Data Preprocessing

In [50]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header=None)
print(dataset.shape)
dataset.head()

(7501, 20)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [51]:
# Looks like we have a lot of null values in the dataset.

# lets fill them
dataset.fillna(0, inplace=True)
print(dataset.isna().sum().sum())

0


In [52]:
# for using aprori , need to convert data in list format.
# transaction = [['apple','almonds'],['apple'],['banana','apple']]...

transactions = []
for i in range(dataset.shape[0]):
    transactions.append([str(dataset.values[i, j]) for j in range(dataset.shape[1]) if str(dataset.values[i, j]) != '0'])

## Training Apriori model on the dataset

In [53]:
from apyori import apriori
rules = apriori(transactions=transactions, min_support=0.003, min_confidence=0.2, min_lift=3, min_length=2)

# min_support = 0.003 -> means selecting items with min support of 0.3%
# min_confidance = 0.2 -> means min confidance of 20% 
# min_lift = 3  
# min_length = 2 -> means no. of items in the transaction should be 2

## Displaying the results

In [54]:
results = list(rules)
print(results)

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]), RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]), RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]), RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0.24

## Putting results well orgainised into a dataframe

In [55]:
def inspect(results):
    lhs = [tuple(result[2][0][0]) for result in results]
    rhs = [tuple(result[2][0][1]) for result in results]
    supports = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts = [result[2][0][3] for result in results]
    return(list(zip(lhs, rhs, supports, confidences, lifts)))

resultsDf = pd.DataFrame(inspect(results), columns=['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

## Displaying the results (Non Sorted)

In [56]:
print(resultsDf)

                      Left Hand Side             Right Hand Side   Support  \
0                     (light cream,)                  (chicken,)  0.004533   
1            (mushroom cream sauce,)                 (escalope,)  0.005733   
2                           (pasta,)                 (escalope,)  0.005866   
3                   (fromage blanc,)                    (honey,)  0.003333   
4                   (herb & pepper,)              (ground beef,)  0.015998   
..                               ...                         ...       ...   
75          (olive oil, ground beef)  (spaghetti, mineral water)  0.003066   
76           (pancakes, ground beef)  (spaghetti, mineral water)  0.003066   
77           (tomatoes, ground beef)  (spaghetti, mineral water)  0.003066   
78  (spaghetti, milk, mineral water)                (olive oil,)  0.003333   
79                  (tomatoes, milk)  (spaghetti, mineral water)  0.003333   

    Confidence      Lift  
0     0.290598  4.843951  
1     0.3

## Displaying the results (Sorted by descending lift)

In [57]:
print(resultsDf.nlargest(n=70, columns='Lift'))

                                 Left Hand Side           Right Hand Side  \
70                    (frozen vegetables, soup)     (milk, mineral water)   
69               (frozen vegetables, olive oil)     (milk, mineral water)   
52           (mineral water, whole wheat pasta)              (olive oil,)   
44                              (tomato sauce,)  (spaghetti, ground beef)   
3                              (fromage blanc,)                  (honey,)   
..                                          ...                       ...   
29                             (tomatoes, milk)      (frozen vegetables,)   
27                    (frozen vegetables, milk)              (olive oil,)   
48                            (spaghetti, milk)              (olive oil,)   
58  (frozen vegetables, chocolate, ground beef)              (spaghetti,)   
67       (frozen vegetables, milk, ground beef)              (spaghetti,)   

     Support  Confidence      Lift  
70  0.003066    0.383333  7.987176  
6

In [58]:
resultsDf.to_csv('Market_Basket_Optimisation_Results.csv')