In [1]:
class Apriori:
    def __init__(self, transactions, minimumsupport):
        self.transactions = transactions
        self.minimumsupport = minimumsupport
        self.freqitemset_support= {}

    def supportcalculations(self, itemset):
        #Calculate the support of an itemset of the dataset
        cal = sum(1 for transaction in self.transactions if itemset.issubset(transaction))
        return cal

    def size1_itemset(self):
        #Generate 1-itemsets
        counter = {}
        for transaction in self.transactions:
            for item in transaction:
                if item in counter:
                    counter[item] += 1
                else:
                    counter[item] = 1
        return counter

    def frequentitemsets_fk1_f1(self, fk_minus_1, f1):
        #Generate candidates using Fk-1 x F1 method
        candidate_itemsets = set()
        for itemset in fk_minus_1:
            for new in f1:
                newcandidate = itemset.union({new})
                if len(newcandidate) == len(itemset) + 1:
                    candidate_itemsets.add(frozenset(newcandidate))
        return candidate_itemsets

    def frequentitemsets_fk1_fk1(self, fk_minus_1):
        #Generate candidates using Fk-1 x Fk-1 method
        candidate_itemsets = set()
        for ctr in range(len(fk_minus_1)):
            for ctr2 in range(ctr+1, len(fk_minus_1)):
                newcandidate = fk_minus_1[ctr].union(fk_minus_1[ctr2])
                if len(newcandidate) == len(fk_minus_1[ctr]) + 1:
                    candidate_itemsets.add(frozenset(newcandidate))
        return candidate_itemsets

    def run(self, method):
        # Initialization
        candidate_1itemsets = self.size1_itemset()
        self.freqitemset_support = {frozenset({item}): support for item, support in candidate_1itemsets.items() if support >= self.minimumsupport}
        
        curr_freqitemsets = list(self.freqitemset_support.keys())
        Candidatestotal = 0

        while curr_freqitemsets:
            if method == "F1":
                newcandidates = self.frequentitemsets_fk1_f1(curr_freqitemsets, candidate_1itemsets.keys())
            elif method == "Fk-1":
                newcandidates = self.frequentitemsets_fk1_fk1(curr_freqitemsets)
            else:
                raise ValueError("Invalid method!")

            Candidatestotal += len(newcandidates)
            
            curr_freqitemsets = []
            for candidate in newcandidates:
                support = self.supportcalculations(candidate)
                if support >= self.minimumsupport:
                    self.freqitemset_support[frozenset(candidate)] = support
                    curr_freqitemsets.append(candidate)

        return self.freqitemset_support, Candidatestotal

class AssociationRules:
    def __init__(self, freqitemset_support):
        self.freqitemset_support = freqitemset_support

    def supportcalculations(self, itemset):
        #Retrieve the support of an itemset
        return self.freqitemset_support.get(itemset, 0)
    
    def confidencecalculations(self, antecedent, consequent):
        #Calculate the confidence of a rule.
        combined_support = self.supportcalculations(antecedent.union(consequent))
        antecedent_support = self.supportcalculations(antecedent)
        if antecedent_support == 0:
            return 0
        return combined_support / antecedent_support

    def find_association_rules(self, minimumconfidence=0.7):
        #Generate association rules from the frequent itemsets
        associationrules = []
        for itemset in self.freqitemset_support.keys():
            if len(itemset) > 1:
                for item in itemset:
                    antecedent = itemset - {item}
                    consequent = {item}
                    confidence = self.confidencecalculations(antecedent, consequent)
                    if confidence >= minimumconfidence:
                        associationrules.append((antecedent, consequent, confidence))
        return associationrules

    def display_rules(self, minimumconfidence=0.7):
        #Display the generated association rules
        associationrules = self.find_association_rules(minimumconfidence=minimumconfidence)
        for antecedent, consequent, confidence in associationrules:
            print(f"{antecedent} => {consequent} (Confidence: {confidence:.2f})")


sampledata = [
    {"Milk", "Beer", "Diapers"},
    {"Bread", "Butter", "Milk"},
    {"Milk", "Diapers", "Cookies"},
    {"Bread", "Butter", "Cookies"},
    {"Beer", "Cookies", "Diapers"},
    {"Milk", "Diapers", "Bread", "Butter"},
    {"Bread", "Butter", "Diapers"},
    {"Beer", "Diapers"},
    {"Milk", "Diapers", "Bread", "Butter"},
    {"Beer", "Cookies"}
]


minimumsupport = 2  

apriori = Apriori(sampledata, minimumsupport)
freqitemset_support = apriori.run(method="F1")


print("Using Fk-1 x F1 Method:")
freqitemsets_supportf1, total_f1 = apriori.run(method="F1")
print("Frequent Itemsets of the data with Support:")
for itemset, support in freqitemsets_supportf1.items():
    print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered of the data: {total_f1}\n")

print("Using Fk-1 x Fk-1 Method:")
freqitemsets_supportfk1, totalfk1 = apriori.run(method="Fk-1")
print("Frequent Itemsets of the data with Support:")
for itemset, support in freqitemsets_supportfk1.items():
    print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered of the data: {totalfk1}\n")


savingsobserved = total_f1 - totalfk1
if savingsobserved > 0:
    print(f"The Fk-1 x F1 method generated {savingsobserved} more candidate itemsets than the Fk-1 x Fk-1 method.")
elif savingsobserved < 0:
    print(f"The Fk-1 x F1 method generated {-savingsobserved} fewer candidate itemsets than the Fk-1 x Fk-1 method.")
else:
    print("Both methods generated the same number of candidate itemsets.")


Using Fk-1 x F1 Method:
Frequent Itemsets of the data with Support:
frozenset({'Diapers'}): 7
frozenset({'Milk'}): 5
frozenset({'Beer'}): 4
frozenset({'Butter'}): 5
frozenset({'Bread'}): 5
frozenset({'Cookies'}): 4
frozenset({'Butter', 'Bread'}): 5
frozenset({'Butter', 'Milk'}): 3
frozenset({'Milk', 'Bread'}): 3
frozenset({'Butter', 'Diapers'}): 3
frozenset({'Cookies', 'Diapers'}): 2
frozenset({'Bread', 'Diapers'}): 3
frozenset({'Beer', 'Diapers'}): 3
frozenset({'Cookies', 'Beer'}): 2
frozenset({'Milk', 'Diapers'}): 4
frozenset({'Butter', 'Milk', 'Diapers'}): 2
frozenset({'Milk', 'Bread', 'Diapers'}): 2
frozenset({'Butter', 'Milk', 'Bread'}): 3
frozenset({'Butter', 'Bread', 'Diapers'}): 3
frozenset({'Butter', 'Milk', 'Bread', 'Diapers'}): 2
Total Candidate Itemsets Considered of the data: 46

Using Fk-1 x Fk-1 Method:
Frequent Itemsets of the data with Support:
frozenset({'Diapers'}): 7
frozenset({'Milk'}): 5
frozenset({'Beer'}): 4
frozenset({'Butter'}): 5
frozenset({'Bread'}): 5
froze

In [2]:
import pandas as pd

file_path = 'groceries.csv' 
newdf = pd.read_csv(file_path)

if 'Item(s)' in newdf.columns:
    newdf = newdf.drop(columns=['Item(s)'])
transactions = [set(row.dropna().tolist()) for _, row in newdf.iterrows()]

print(transactions)

[{'semi-finished bread', 'margarine', 'citrus fruit', 'ready soups'}, {'tropical fruit', 'yogurt', 'coffee'}, {'whole milk'}, {'meat spreads', 'pip fruit', 'cream cheese', 'yogurt'}, {'condensed milk', 'whole milk', 'other vegetables', 'long life bakery product'}, {'whole milk', 'rice', 'abrasive cleaner', 'butter', 'yogurt'}, {'rolls/buns'}, {'liquor (appetizer)', 'bottled beer', 'rolls/buns', 'other vegetables', 'UHT-milk'}, {'potted plants'}, {'whole milk', 'cereals'}, {'tropical fruit', 'bottled water', 'chocolate', 'white bread', 'other vegetables'}, {'curd', 'tropical fruit', 'whole milk', 'dishes', 'bottled water', 'flour', 'butter', 'yogurt', 'citrus fruit'}, {'beef'}, {'soda', 'rolls/buns', 'frankfurter'}, {'tropical fruit', 'chicken'}, {'sugar', 'newspapers', 'fruit/vegetable juice', 'butter'}, {'fruit/vegetable juice'}, {'packaged fruit/vegetables'}, {'chocolate'}, {'specialty bar'}, {'other vegetables'}, {'pastry', 'butter milk'}, {'whole milk'}, {'tropical fruit', 'cream c

In [3]:
apriori = Apriori(transactions[:500], minimumsupport=2)
frequent_itemsets_with_support = apriori.run(method="F1")


print("Using Fk-1 x F1 Method:")
freqitemsets_supportf1, total_f1 = apriori.run(method="F1")
# print("Frequent Itemsets with Support:")
# for itemset, support in frequent_itemsets_with_support_f1.items():
#     print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered: {total_f1}\n")

print("Using Fk-1 x Fk-1 Method:")
freqitemsets_supportfk1, totalfk1 = apriori.run(method="Fk-1")
# print("Frequent Itemsets with Support:")
# for itemset, support in frequent_itemsets_with_support_fk_minus_1.items():
#     print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered: {totalfk1}\n")

# Discuss observed savings
observedsavings = total_f1 - totalfk1
if observedsavings > 0:
    print(f"The Fk-1 x F1 method generated {observedsavings} more candidate itemsets than the Fk-1 x Fk-1 method.")
elif observedsavings < 0:
    print(f"The Fk-1 x F1 method generated {-observedsavings} fewer candidate itemsets than the Fk-1 x Fk-1 method.")
else:
    print("Both methods generated the same number of candidate itemsets.")

association_rules_generator = AssociationRules(freqitemsets_supportf1)

print("Association Rules with Minimum Confidence 0.2 Using Fk-1 x F1 Method:")
association_rules_generator.display_rules(minimumconfidence=0.2)

association_rules_generator = AssociationRules(freqitemsets_supportfk1)

print("Association Rules with Minimum Confidence 0.2 Using Fk-1 x F1 Method:")
association_rules_generator.display_rules(minimumconfidence=0.2)

Using Fk-1 x F1 Method:
Total Candidate Itemsets Considered: 459891

Using Fk-1 x Fk-1 Method:
Total Candidate Itemsets Considered: 44403

The Fk-1 x F1 method generated 415488 more candidate itemsets than the Fk-1 x Fk-1 method.
Association Rules with Minimum Confidence 0.2 Using Fk-1 x F1 Method:
frozenset({'canned vegetables'}) => {'rolls/buns'} (Confidence: 0.67)
frozenset({'spices'}) => {'sausage'} (Confidence: 0.40)
frozenset({'waffles'}) => {'domestic eggs'} (Confidence: 0.20)
frozenset({'softener'}) => {'soda'} (Confidence: 0.67)
frozenset({'grapes'}) => {'root vegetables'} (Confidence: 0.29)
frozenset({'frozen dessert'}) => {'tropical fruit'} (Confidence: 0.33)
frozenset({'flour'}) => {'curd'} (Confidence: 0.25)
frozenset({'canned fruit'}) => {'whole milk'} (Confidence: 1.00)
frozenset({'butter milk'}) => {'yogurt'} (Confidence: 0.29)
frozenset({'specialty fat'}) => {'frozen vegetables'} (Confidence: 0.67)
frozenset({'pickled vegetables'}) => {'curd'} (Confidence: 0.25)
frozen

In [4]:
apriori = Apriori(transactions[:500], minimumsupport=4)
frequent_itemsets_with_support = apriori.run(method="F1")


print("Using Fk-1 x F1 Method:")
freqitemsets_supportf1, total_f1 = apriori.run(method="F1")
# print("Frequent Itemsets with Support:")
# for itemset, support in frequent_itemsets_with_support_f1.items():
#     print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered: {total_f1}\n")

print("Using Fk-1 x Fk-1 Method:")
freqitemsets_supportfk1, totalfk1 = apriori.run(method="Fk-1")
# print("Frequent Itemsets with Support:")
# for itemset, support in frequent_itemsets_with_support_fk_minus_1.items():
#     print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered: {totalfk1}\n")

# Discuss observed savings
observedsavings = total_f1 - totalfk1
if observedsavings > 0:
    print(f"The Fk-1 x F1 method generated {observedsavings} more candidate itemsets than the Fk-1 x Fk-1 method.")
elif observedsavings < 0:
    print(f"The Fk-1 x F1 method generated {-observedsavings} fewer candidate itemsets than the Fk-1 x Fk-1 method.")
else:
    print("Both methods generated the same number of candidate itemsets.")

association_rules_generator = AssociationRules(freqitemsets_supportf1)

print("Association Rules with Minimum Confidence 0.5 Using Fk-1 x F1 Method:")
association_rules_generator.display_rules(minimumconfidence=0.5)

association_rules_generator = AssociationRules(freqitemsets_supportfk1)

print("Association Rules with Minimum Confidence 0.5 Using Fk-1 x F1 Method:")
association_rules_generator.display_rules(minimumconfidence=0.5)

Using Fk-1 x F1 Method:
Total Candidate Itemsets Considered: 68318

Using Fk-1 x Fk-1 Method:
Total Candidate Itemsets Considered: 10054

The Fk-1 x F1 method generated 58264 more candidate itemsets than the Fk-1 x Fk-1 method.
Association Rules with Minimum Confidence 0.5 Using Fk-1 x F1 Method:
frozenset({'frozen dessert'}) => {'whole milk'} (Confidence: 0.83)
frozenset({'flour'}) => {'root vegetables'} (Confidence: 0.50)
frozenset({'abrasive cleaner'}) => {'whole milk'} (Confidence: 1.00)
frozenset({'chewing gum'}) => {'rolls/buns'} (Confidence: 0.56)
frozenset({'domestic eggs'}) => {'whole milk'} (Confidence: 0.56)
frozenset({'cake bar'}) => {'curd'} (Confidence: 0.57)
frozenset({'soft cheese'}) => {'whole milk'} (Confidence: 0.67)
frozenset({'pip fruit'}) => {'whole milk'} (Confidence: 0.50)
frozenset({'sliced cheese'}) => {'rolls/buns'} (Confidence: 0.73)
frozenset({'cake bar'}) => {'shopping bags'} (Confidence: 0.57)
frozenset({'sugar'}) => {'whole milk'} (Confidence: 0.50)
froz

In [5]:
apriori = Apriori(transactions[:500], minimumsupport=5)
frequent_itemsets_with_support = apriori.run(method="F1")


print("Using Fk-1 x F1 Method:")
freqitemsets_supportf1, total_f1 = apriori.run(method="F1")
# print("Frequent Itemsets with Support:")
# for itemset, support in frequent_itemsets_with_support_f1.items():
#     print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered: {total_f1}\n")

print("Using Fk-1 x Fk-1 Method:")
freqitemsets_supportfk1, totalfk1 = apriori.run(method="Fk-1")
# print("Frequent Itemsets with Support:")
# for itemset, support in frequent_itemsets_with_support_fk_minus_1.items():
#     print(f"{itemset}: {support}")
print(f"Total Candidate Itemsets Considered: {totalfk1}\n")

# Discuss observed savings
observedsavings = total_f1 - totalfk1
if observedsavings > 0:
    print(f"The Fk-1 x F1 method generated {observedsavings} more candidate itemsets than the Fk-1 x Fk-1 method.")
elif observedsavings < 0:
    print(f"The Fk-1 x F1 method generated {-observedsavings} fewer candidate itemsets than the Fk-1 x Fk-1 method.")
else:
    print("Both methods generated the same number of candidate itemsets.")

association_rules_generator = AssociationRules(freqitemsets_supportf1)

print("Association Rules with Minimum Confidence 0.3 Using Fk-1 x F1 Method:")
association_rules_generator.display_rules(minimumconfidence=0.3)

association_rules_generator = AssociationRules(freqitemsets_supportfk1)

print("Association Rules with Minimum Confidence 0.3 Using Fk-1 x F1 Method:")
association_rules_generator.display_rules(minimumconfidence=0.3)

Using Fk-1 x F1 Method:
Total Candidate Itemsets Considered: 43592

Using Fk-1 x Fk-1 Method:
Total Candidate Itemsets Considered: 6415

The Fk-1 x F1 method generated 37177 more candidate itemsets than the Fk-1 x Fk-1 method.
Association Rules with Minimum Confidence 0.3 Using Fk-1 x F1 Method:
frozenset({'hygiene articles'}) => {'rolls/buns'} (Confidence: 0.46)
frozenset({'ham'}) => {'whipped/sour cream'} (Confidence: 0.42)
frozenset({'frozen dessert'}) => {'whole milk'} (Confidence: 0.83)
frozenset({'frankfurter'}) => {'rolls/buns'} (Confidence: 0.46)
frozenset({'sugar'}) => {'rolls/buns'} (Confidence: 0.40)
frozenset({'pork'}) => {'whole milk'} (Confidence: 0.30)
frozenset({'butter milk'}) => {'rolls/buns'} (Confidence: 0.35)
frozenset({'root vegetables'}) => {'whole milk'} (Confidence: 0.44)
frozenset({'butter'}) => {'rolls/buns'} (Confidence: 0.30)
frozenset({'curd'}) => {'rolls/buns'} (Confidence: 0.32)
frozenset({'other vegetables'}) => {'whole milk'} (Confidence: 0.36)
frozens