In [1]:
import numpy as np
from apriori_algo import *

# SETTINGS 

In [2]:
path = 'data_algo.txt'
order = ['I' + str(i) for i in range(1,6)]
min_support = 2/9

In [3]:
transactions = load_transactions(path,order)
num_trans = len(transactions)
transactions

[['I1', 'I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I4'],
 ['I1', 'I3'],
 ['I2', 'I3'],
 ['I1', 'I3'],
 ['I1', 'I2', 'I3', 'I5'],
 ['I1', 'I2', 'I3']]

## INITIALIZATION - Create L1 - First part of the loop

In [4]:
# C - candidate itemsets
# L - frequent itemsets

C = {}
L = {}
itemset_size = 1
discarded = {itemset_size: []}
C.update({itemset_size : [ [f] for f in order]})

In [5]:
supp_count_L = {}
f, sup, new_discarded = get_frequent(C[itemset_size], transactions, min_support, discarded)
discarded.update({itemset_size : new_discarded})
L.update({itemset_size :f})
supp_count_L.update({itemset_size : sup})

## The rest of the loop

In [6]:
print(f'Table C{1}: \n')
print_table(C[1], [count_occurences(it, transactions) for it in C[1]])
print(f'Table L{1}: \n')
print_table(L[1], [count_occurences(it, transactions) for it in C[1]])
print('---------------------------------------------------------------')

k = itemset_size + 1
convergence = False
while not convergence:
    C.update({ k : join_set_itemsets(L[k-1], order) })
    
    print(f'Table C{k}: \n')
    print_table(C[k], [count_occurences(it, transactions) for it in C[k]])
    
    f, sup, new_discarded = get_frequent(C[k], transactions, min_support, discarded)
    
    discarded.update({k : new_discarded})
    L.update({k : f})
    supp_count_L.update({k : sup})
    
    if len(L[k]) == 0:
        convergence = True
    
    else:
        print(f'Table L{k}: \n')
        print_table(L[k], [count_occurences(it, transactions) for it in C[k]])
        print('---------------------------------------------------------------')
    
    k+=1

Table C1: 

Itemsets | Frequency
['I1'] : 6
['I2'] : 7
['I3'] : 6
['I4'] : 2
['I5'] : 2



Table L1: 

Itemsets | Frequency
['I1'] : 6
['I2'] : 7
['I3'] : 6
['I4'] : 2
['I5'] : 2



---------------------------------------------------------------
Table C2: 

Itemsets | Frequency
['I1', 'I2'] : 4
['I1', 'I3'] : 4
['I1', 'I4'] : 1
['I1', 'I5'] : 2
['I2', 'I3'] : 4
['I2', 'I4'] : 2
['I2', 'I5'] : 2
['I3', 'I4'] : 0
['I3', 'I5'] : 1
['I4', 'I5'] : 0



Table L2: 

Itemsets | Frequency
['I1', 'I2'] : 4
['I1', 'I3'] : 4
['I1', 'I5'] : 1
['I2', 'I3'] : 2
['I2', 'I4'] : 4
['I2', 'I5'] : 2



---------------------------------------------------------------
Table C3: 

Itemsets | Frequency
['I1', 'I2', 'I3'] : 2
['I1', 'I2', 'I5'] : 2
['I1', 'I3', 'I5'] : 1
['I2', 'I3', 'I4'] : 0
['I2', 'I3', 'I5'] : 1
['I2', 'I4', 'I5'] : 0



Table L3: 

Itemsets | Frequency
['I1', 'I2', 'I3'] : 2
['I1', 'I2', 'I5'] : 2



---------------------------------------------------------------
Table C4: 

Itemsets | Fre