In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy.lib import type_check
import seaborn as sns
from itertools import combinations
  
df = pd.read_csv("bread_basket.csv", nrows=300)
df.head(10)

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,30-10-2016 09:58,morning,weekend
1,2,Scandinavian,30-10-2016 10:05,morning,weekend
2,2,Scandinavian,30-10-2016 10:05,morning,weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend
5,3,Cookies,30-10-2016 10:07,morning,weekend
6,4,Muffin,30-10-2016 10:08,morning,weekend
7,5,Coffee,30-10-2016 10:13,morning,weekend
8,5,Pastry,30-10-2016 10:13,morning,weekend
9,5,Bread,30-10-2016 10:13,morning,weekend


In [None]:
dictTrans = {}
for i in range(len(df['Transaction'])):
  if df['Transaction'][i] not in dictTrans:
    dictTrans[df['Transaction'][i]] = [df['Item'][i]]
  else:
    dictTrans[df['Transaction'][i]].extend([df['Item'][i]])
print(dictTrans)

{1: ['Bread'], 2: ['Scandinavian', 'Scandinavian'], 3: ['Hot chocolate', 'Jam', 'Cookies'], 4: ['Muffin'], 5: ['Coffee', 'Pastry', 'Bread'], 6: ['Medialuna', 'Pastry', 'Muffin'], 7: ['Medialuna', 'Pastry', 'Coffee', 'Tea'], 8: ['Pastry', 'Bread'], 9: ['Bread', 'Muffin'], 10: ['Scandinavian', 'Medialuna'], 11: ['Bread', 'Medialuna', 'Bread'], 12: ['Jam', 'Coffee', 'Tartine', 'Pastry', 'Tea'], 13: ['Basket', 'Bread', 'Coffee'], 14: ['Bread', 'Medialuna', 'Pastry'], 15: ['Mineral water', 'Scandinavian'], 16: ['Bread', 'Medialuna', 'Coffee'], 17: ['Hot chocolate'], 18: ['Farm House'], 19: ['Farm House', 'Bread'], 20: ['Bread', 'Medialuna'], 21: ['Coffee', 'Coffee', 'Medialuna', 'Bread'], 22: ['Jam'], 23: ['Scandinavian', 'Muffin'], 24: ['Bread'], 25: ['Scandinavian'], 26: ['Fudge'], 27: ['Scandinavian'], 28: ['Coffee', 'Bread'], 29: ['Bread', 'Jam'], 30: ['Bread'], 31: ['Basket'], 32: ['Scandinavian', 'Muffin'], 33: ['Coffee'], 34: ['Coffee', 'Muffin'], 35: ['Muffin', 'Scandinavian'], 36: 

In [None]:
def loadDataSet():
  return dictTrans.values()

In [None]:
dataSet = loadDataSet()
dataSet

In [None]:
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1:
                C1.append([item])
                
    C1.sort()
    return list(map(frozenset, C1))#use frozen set so we
                            #can use it as a key in a dict    

In [None]:
def scanD(D, Ck, minSupport):
    ssCnt = {}
    for tid in D:
        for can in Ck:
            if can.issubset(tid):
                if not can in ssCnt: ssCnt[can]=1
                else: ssCnt[can] += 1
    numItems = float(len(D))
    retList = []
    supportData = {}
    for key in ssCnt:
        support = ssCnt[key]/numItems
        if support >= minSupport:
            retList.insert(0,key)
        supportData[key] = support
    return retList, supportData

In [None]:
D = list(map(set,dataSet))

In [None]:
def aprioriGen(Lk, k): #creates Ck
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i+1, lenLk): 
            L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]
            L1.sort(); L2.sort()
            if L1==L2: #if first k-2 elements are equal
                retList.append(Lk[i] | Lk[j]) #set union
    return retList

In [None]:
def apriori(dataSet, minSupport = 0.01):
    C1 = createC1(dataSet)
    D = list(map(set, dataSet))
    L1, supportData = scanD(D, C1, minSupport)
    L = [L1]
    k = 2
    while (len(L[k-2]) > 0):
        Ck = aprioriGen(L[k-2], k)
        Lk, supK = scanD(D, Ck, minSupport)#scan DB to get Lk
        supportData.update(supK)
        L.append(Lk)
        k += 1
    return L, supportData

In [None]:
dataset = loadDataSet()
L,suppData = apriori(dataset)

In [None]:
L[2]

[frozenset({'Coffee', 'Juice', 'Pastry'}),
 frozenset({'Coffee', 'Frittata', 'Tea'}),
 frozenset({'Bread', 'Coffee', 'Medialuna'}),
 frozenset({'Coffee', 'Tartine', 'Tea'}),
 frozenset({'Coffee', 'Pastry', 'Tea'}),
 frozenset({'Coffee', 'Medialuna', 'Pastry'}),
 frozenset({'Bread', 'Coffee', 'Pastry'})]

In [None]:
def generateRules(L, supportData, minConf=0.7):  #supportData is a dict coming from scanD
    bigRuleList = []
    for i in range(1, len(L)):#only get the sets with two or more items
        for freqSet in L[i]:
            H1 = [frozenset([item]) for item in freqSet]
            if (i > 1):
                rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)
            else:
                calcConf(freqSet, H1, supportData, bigRuleList, minConf)
    return bigRuleList     

In [None]:
def calcConf(freqSet, H, supportData, brl, minConf=0.7):
    prunedH = [] #create new list to return
    for conseq in H:
        conf = supportData[freqSet]/supportData[freqSet-conseq] #calc confidence
        if conf >= minConf: 
            print (freqSet-conseq,'-->',conseq,'conf:',conf)
            brl.append((freqSet-conseq, conseq, conf))
            prunedH.append(conseq)
    return prunedH

In [None]:
def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7):
    m = len(H[0])
    if (len(freqSet) > (m + 1)): #try further merging
        Hmp1 = aprioriGen(H, m+1)#create Hm+1 new candidates
        Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf)
        if (len(Hmp1) > 1):    #need at least two sets to merge
            rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)

In [None]:
rules= generateRules(L,suppData, minConf=0.3)

frozenset({'Soup'}) --> frozenset({'Hearty & Seasonal'}) conf: 0.4
frozenset({'Soup'}) --> frozenset({'Coffee'}) conf: 0.4
frozenset({'Cake'}) --> frozenset({'Coffee'}) conf: 0.8333333333333333
frozenset({'Mineral water'}) --> frozenset({'Hearty & Seasonal'}) conf: 0.6666666666666666
frozenset({'Cookies'}) --> frozenset({'Coffee'}) conf: 0.4285714285714286
frozenset({'Victorian Sponge'}) --> frozenset({'Tea'}) conf: 0.6666666666666666
frozenset({'Soup'}) --> frozenset({'Tea'}) conf: 0.4
frozenset({'Frittata'}) --> frozenset({'Coffee'}) conf: 0.6666666666666666
frozenset({'Frittata'}) --> frozenset({'Tea'}) conf: 0.3333333333333333
frozenset({'Hearty & Seasonal'}) --> frozenset({'Coffee'}) conf: 0.375
frozenset({'Frittata'}) --> frozenset({'Hearty & Seasonal'}) conf: 0.3333333333333333
frozenset({'Victorian Sponge'}) --> frozenset({'Bread'}) conf: 0.6666666666666666
frozenset({'Juice'}) --> frozenset({'Coffee'}) conf: 0.6249999999999999
frozenset({'Muffin'}) --> frozenset({'Coffee'}) co