Algoritmo Apriori

In [1]:
class TransactionDataset:
    def __init__(self, transactions):
        self.transactions = transactions

    def get_transactions(self):
        return self.transactions


class Apriori:
    def __init__(self, transaction_dataset, minsupport):
        self.minsupport = minsupport
        self.C1 = {}
        self.L = []
        self.k = 2
        self.transaction_dataset = transaction_dataset

    def fit(self):
        """Find frequent itemsets in the given transaction dataset"""
        self._scan_transactions()
        self.L1 = self._apriori_prune(self.C1)
        self.L = self.L1.copy()
        print('====================================')
        print('Frequent 1-itemset is', self.L1)
        print('====================================')
        while self.L:
            C = self._apriori_count_subset(self.L)
            frequent_itemset = self._apriori_prune(C)
            if not frequent_itemset:
                break
            print('====================================')
            print(f'Frequent {self.k}-itemset is', frequent_itemset)
            print('====================================')
            self.L = self._apriori_gen(frequent_itemset)
            self.k += 1

    def _scan_transactions(self):
        """Scan the transaction dataset and generate C1"""
        for transaction in self.transaction_dataset.get_transactions():
            for item in transaction:
                if item in self.C1:
                    self.C1[item] += 1
                else:
                    self.C1[item] = 1
        self.L1 = self._apriori_prune(self.C1)

    def _apriori_gen(self, itemset):
        """Generate new candidate itemsets by joining existing ones"""
        candidates = []
        for i in range(len(itemset)):
            for j in range(i + 1, len(itemset)):
                element = itemset[i]
                element1 = itemset[j]
                if element[0:(len(element) - 1)] == element1[0:(len(element1) - 1)]:
                    unionset = element[0:(len(element) - 1)] + element1[len(element1) - 1] + element[
                        len(element) - 1]  # Combine (k-1)-Itemset to k-Itemset
                    unionset = ''.join(sorted(unionset))  # Sort itemset by dict order
                    candidates.append(unionset)
        return candidates

    def _apriori_prune(self, itemset):
        """Prune infrequent itemsets"""
        frequent_itemset = []
        for item in itemset:
            if itemset[item] >= self.minsupport:
                frequent_itemset.append(item)
        return sorted(frequent_itemset)

    def _apriori_count_subset(self, itemset):
        """Count the support of candidate itemsets"""
        Lk = {}
        transactions = self.transaction_dataset.get_transactions()
        for transaction in transactions:
            for i in range(len(itemset)):
                key = itemset[i]
                if key not in Lk:
                    Lk[key] = 0
                flag = True
                for k in key:
                    if k not in transaction:
                        flag = False
                if flag:
                    Lk[key] += 1
        return Lk

import csv
def main():
    transactions = []
    with open('../../datasets/transactions.csv', 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            transactions.append(row)

    transaction_dataset = TransactionDataset(transactions)
    apriori = Apriori(transaction_dataset, minsupport=3)
    apriori.fit()


main()


Frequent 1-itemset is ['a', 'b', 'c', 'f', 'm', 'p']
Frequent 2-itemset is ['a', 'b', 'c', 'f', 'm', 'p']
Frequent 3-itemset is ['ac', 'af', 'am', 'cf', 'cm', 'cp', 'fm']
Frequent 4-itemset is ['acf', 'acm', 'afm', 'cfm']
Frequent 5-itemset is ['acfm']
