In [1]:
def load_dataset():
    "Load the sample dataset."
    return [['laptop', 'earphones','laptop bag', 'mouse', 'pendrive'],
            ['mobile', 'cover', 'earphones', 'pendrive' ],
            ['laptop', 'laptop bag', 'cover', 'pendrive', 'screenguard'],
            ['earphones', 'mouse', 'pendrive', 'cover', 'screenguard'],
            ['laptop bag', 'cover', 'pendrive', 'laptop', 'screenguard'],
            ['laptop','laptop bag','mobile','earphones','pendrive'],
            ['laptop','laptop bag','mouse','earphones','pendrive'],
            ['mobile','laptop bag','laptop','laptop bag'],
            ['laptop','laptop bag','earphones', 'screenguard'],
            ['laptop','laptop bag','earphones','pendrive'],
            ['earphones','laptop','laptop bag','cover','mouse'],
            ['laptop','laptop bag','mobile','pendrive'],
            ['pendrive','mobile','cover','laptop','mouse'],
            ['laptop','laptop bag','mouse','pendrive','earphones'],
            ['earphones','laptop','mobile','laptop bag', 'screenguard']]

In [2]:
from numpy import *

In [3]:
d=load_dataset()

In [4]:
d

[['laptop', 'earphones', 'laptop bag', 'mouse', 'pendrive'],
 ['mobile', 'cover', 'earphones', 'pendrive'],
 ['laptop', 'laptop bag', 'cover', 'pendrive', 'screenguard'],
 ['earphones', 'mouse', 'pendrive', 'cover', 'screenguard'],
 ['laptop bag', 'cover', 'pendrive', 'laptop', 'screenguard'],
 ['laptop', 'laptop bag', 'mobile', 'earphones', 'pendrive'],
 ['laptop', 'laptop bag', 'mouse', 'earphones', 'pendrive'],
 ['mobile', 'laptop bag', 'laptop', 'laptop bag'],
 ['laptop', 'laptop bag', 'earphones', 'screenguard'],
 ['laptop', 'laptop bag', 'earphones', 'pendrive'],
 ['earphones', 'laptop', 'laptop bag', 'cover', 'mouse'],
 ['laptop', 'laptop bag', 'mobile', 'pendrive'],
 ['pendrive', 'mobile', 'cover', 'laptop', 'mouse'],
 ['laptop', 'laptop bag', 'mouse', 'pendrive', 'earphones'],
 ['earphones', 'laptop', 'mobile', 'laptop bag', 'screenguard']]

In [5]:
def create_C1(dataset):
    "Create the list of candidate item sets of size one."
    c1=[]
    for transaction in dataset:
        for item in transaction:
            if not [item] in c1:
                c1.append([item])
    c1.sort()
    return list(map(frozenset,c1))

In [6]:
X=load_dataset()
create_C1(X)

[frozenset({'cover'}),
 frozenset({'earphones'}),
 frozenset({'laptop'}),
 frozenset({'laptop bag'}),
 frozenset({'mobile'}),
 frozenset({'mouse'}),
 frozenset({'pendrive'}),
 frozenset({'screenguard'})]

In [7]:
def scanD(dataset,candidates,min_support):
    "Returns all candidates that meets a minimum support level"
    sscnt={}
    for tid in dataset:
        for can in candidates:
            if can.issubset(tid):
                sscnt.setdefault(can,0)
                sscnt[can]+=1
    #print("----------",sscnt)
    num_items=float(len(dataset))
    retlist=[]
    support_data={}
    for key in sscnt:
        support=sscnt[key]/num_items
        if support>=min_support:
            retlist.insert(0,key)
        support_data[key]=support
    return retlist,support_data

In [8]:
dataset=load_dataset()
dataset

[['laptop', 'earphones', 'laptop bag', 'mouse', 'pendrive'],
 ['mobile', 'cover', 'earphones', 'pendrive'],
 ['laptop', 'laptop bag', 'cover', 'pendrive', 'screenguard'],
 ['earphones', 'mouse', 'pendrive', 'cover', 'screenguard'],
 ['laptop bag', 'cover', 'pendrive', 'laptop', 'screenguard'],
 ['laptop', 'laptop bag', 'mobile', 'earphones', 'pendrive'],
 ['laptop', 'laptop bag', 'mouse', 'earphones', 'pendrive'],
 ['mobile', 'laptop bag', 'laptop', 'laptop bag'],
 ['laptop', 'laptop bag', 'earphones', 'screenguard'],
 ['laptop', 'laptop bag', 'earphones', 'pendrive'],
 ['earphones', 'laptop', 'laptop bag', 'cover', 'mouse'],
 ['laptop', 'laptop bag', 'mobile', 'pendrive'],
 ['pendrive', 'mobile', 'cover', 'laptop', 'mouse'],
 ['laptop', 'laptop bag', 'mouse', 'pendrive', 'earphones'],
 ['earphones', 'laptop', 'mobile', 'laptop bag', 'screenguard']]

In [9]:
c1=create_C1(dataset)
c1

[frozenset({'cover'}),
 frozenset({'earphones'}),
 frozenset({'laptop'}),
 frozenset({'laptop bag'}),
 frozenset({'mobile'}),
 frozenset({'mouse'}),
 frozenset({'pendrive'}),
 frozenset({'screenguard'})]

In [10]:
L1,SD=scanD(dataset,c1,0.5)
L1
SD

{frozenset({'earphones'}): 0.6666666666666666,
 frozenset({'laptop'}): 0.8666666666666667,
 frozenset({'laptop bag'}): 0.8,
 frozenset({'mouse'}): 0.4,
 frozenset({'pendrive'}): 0.7333333333333333,
 frozenset({'cover'}): 0.4,
 frozenset({'mobile'}): 0.4,
 frozenset({'screenguard'}): 0.3333333333333333}

In [11]:
L1

[frozenset({'pendrive'}),
 frozenset({'laptop bag'}),
 frozenset({'laptop'}),
 frozenset({'earphones'})]

In [12]:
SD

{frozenset({'earphones'}): 0.6666666666666666,
 frozenset({'laptop'}): 0.8666666666666667,
 frozenset({'laptop bag'}): 0.8,
 frozenset({'mouse'}): 0.4,
 frozenset({'pendrive'}): 0.7333333333333333,
 frozenset({'cover'}): 0.4,
 frozenset({'mobile'}): 0.4,
 frozenset({'screenguard'}): 0.3333333333333333}

In [13]:
def aprioriGen(Lk,k):#creates CK
    retList=[]
    lenLk=len(Lk)
    #print("lenoflk",list(Lk[3])[:0])
    for i in range(lenLk):
        for j in range(i+1,lenLk):
            L1=list(Lk[i])[:k-2]
            L2=list(Lk[j])[:k-2]
            L1.sort()
            L2.sort()
            if L1==L2: #if first k-2 elements are equal
                retList.append(Lk[i]|Lk[j])
                #print(retList)
    return retList

In [14]:
L1,supportData=scanD(dataset,c1,0.5)
L=[L1]
k=2
print("+++++",L[0])
Ck=aprioriGen(L[k-2],k)
print(Ck)

+++++ [frozenset({'pendrive'}), frozenset({'laptop bag'}), frozenset({'laptop'}), frozenset({'earphones'})]
[frozenset({'pendrive', 'laptop bag'}), frozenset({'pendrive', 'laptop'}), frozenset({'earphones', 'pendrive'}), frozenset({'laptop', 'laptop bag'}), frozenset({'earphones', 'laptop bag'}), frozenset({'earphones', 'laptop'})]


In [15]:
def apriori(dataset,minSupport=0.40):
    C1=create_C1(dataset)
    D=list(map(set,dataset))#creating set
    L1,supportData=scanD(D,C1,minSupport)
    L=[L1]
    k=2
    while(len(L[k-2])>0):
        Ck=aprioriGen(L[k-2],k)
        Lk,supK=scanD(D,Ck,minSupport)#scan DB to get
        supportData.update(supK)
        L.append(Lk)
        k+=1
    return L,supportData

In [16]:
L,SD=apriori(load_dataset())

In [17]:
L

[[frozenset({'mobile'}),
  frozenset({'cover'}),
  frozenset({'pendrive'}),
  frozenset({'mouse'}),
  frozenset({'laptop bag'}),
  frozenset({'laptop'}),
  frozenset({'earphones'})],
 [frozenset({'earphones', 'laptop'}),
  frozenset({'earphones', 'laptop bag'}),
  frozenset({'laptop', 'laptop bag'}),
  frozenset({'earphones', 'pendrive'}),
  frozenset({'laptop', 'pendrive'}),
  frozenset({'laptop bag', 'pendrive'})],
 [frozenset({'laptop', 'laptop bag', 'pendrive'}),
  frozenset({'earphones', 'laptop', 'laptop bag'})],
 []]

In [18]:
SD

{frozenset({'earphones'}): 0.6666666666666666,
 frozenset({'laptop'}): 0.8666666666666667,
 frozenset({'laptop bag'}): 0.8,
 frozenset({'mouse'}): 0.4,
 frozenset({'pendrive'}): 0.7333333333333333,
 frozenset({'cover'}): 0.4,
 frozenset({'mobile'}): 0.4,
 frozenset({'screenguard'}): 0.3333333333333333,
 frozenset({'mouse', 'pendrive'}): 0.3333333333333333,
 frozenset({'laptop bag', 'pendrive'}): 0.5333333333333333,
 frozenset({'laptop', 'pendrive'}): 0.6,
 frozenset({'earphones', 'pendrive'}): 0.4666666666666667,
 frozenset({'laptop bag', 'mouse'}): 0.26666666666666666,
 frozenset({'laptop', 'mouse'}): 0.3333333333333333,
 frozenset({'earphones', 'mouse'}): 0.3333333333333333,
 frozenset({'laptop', 'laptop bag'}): 0.8,
 frozenset({'earphones', 'laptop bag'}): 0.5333333333333333,
 frozenset({'earphones', 'laptop'}): 0.5333333333333333,
 frozenset({'cover', 'mobile'}): 0.13333333333333333,
 frozenset({'mobile', 'pendrive'}): 0.26666666666666666,
 frozenset({'earphones', 'mobile'}): 0.2,
