In [3]:
import pandas as pd
import itertools

In [4]:
def findFrequentItemsets(transactions, itemsets, minSupport):
    frequentItemsets = {}
    for itemset in itemsets.keys():
            # counts the number of times that the itemset can be found in the transactions array
            for transaction in transactions:
                if isinstance(itemset, str):
                    if itemset in set(transaction):
                        itemsets[itemset] += 1
                elif set(itemset).issubset(set(transaction)):
                    itemsets[itemset] += 1
            # check to see if it is frequent or not. if it is then add the itemset to the frequent sets array
            support = itemsets[itemset]/len(transactions)
            if support >= minSupport:
                frequentItemsets[itemset] = support
    return frequentItemsets

In [14]:
class AssociationRule:
    def __init__(self, a, b, confidence):
        self.a = a
        self.b = b
        self.confidence = confidence
    def __str__(self):
        retstr = ''
        if len(self.a)==1:
            retstr+=self.a[0]
        else:
            retstr+=str(self.a)
        retstr += " implies "
        if len(self.b)==1:
            retstr+=self.b[0]
        else:
            retstr+=str(self.b)
        retstr+=f' with {self.confidence} confidence'
        return retstr

In [15]:
def generateAssociationRules(frequentItemsets, minConfidence):
    associationRules = set()
    for itemSet in frequentItemsets.keys():
        if not isinstance(itemSet, str):
            #here instead of iterating through each item, i need to iterate through each combination of items in the itemset
            #for example, if there is a itemset with 3 items, it should try to see if 2 items imply the other
            for item in itemSet:
                confidence = frequentItemsets[itemSet]/frequentItemsets[item]
                if confidence >= minConfidence:
                    associationRules.add(AssociationRule(item, tuple(set(itemSet)-set([item])), confidence))
    return associationRules

In [7]:
# transactions should be a 2 dimensional array of all the transactions
# min support and min confidence should be as a decimal, not a percentage
def bruteForce(transactions, minSupport, minConfidence):
    frequentSets = {}
    items = set()
    for transaction in transactions:
        for item in transaction:
            items.add(item)
    k = 1
    itemsets = {item: 0 for item in items}
    while True:
        f = findFrequentItemsets(transactions, itemsets, minSupport)
        if len(f.keys()) == 0:
            break
        frequentSets.update(f)
        k+=1
        itemsets = {item: 0 for item in set(itertools.combinations(items, k))}
    return generateAssociationRules(frequentSets, minConfidence)
    # do the confidence equations for all the combinations and return the association rules

In [16]:
i = bruteForce([
  ['Basketball', 'Basketball Shoes', 'Gatorade Bottle'],
  ['Swim Cap', 'Swim Goggles'],
  ['Running Shoes', 'Electrolyte Gels', 'Gatorade Bottle'],
  ['Golf Balls', 'Golf Shoes'],
  ['Protein Powder', 'Electrolyte Gels'],
  ['Basketball', 'Gatorade Bottle'],
  ['Running Shoes', 'Gatorade Bottle'],
  ['Swim Cap', 'Swim Goggles'],
  ['Swim Cap', 'Gatorade Bottle'],
  ['Swim Goggles', 'Gatorade Bottle'],
  ['Basketball Shoes', 'Gatorade Bottle'],
  ['Golf Shoes', 'Golf Balls'],
  ['Running Shoes', 'Protein Powder', 'Electrolyte Gels'],
  ['Basketball', 'Basketball Shoes'],
  ['Running Shoes', 'Electrolyte Gels'],
  ['Swim Cap', 'Swim Goggles', 'Gatorade Bottle'],
  ['Golf Shoes', 'Golf Balls'],
  ['Protein Powder', 'Gatorade Bottle'],
  ['Basketball', 'Protein Powder'],
  ['Running Shoes', 'Electrolyte Gels']
], 0.05, 0.25)

for rule in i:
    print(rule)

Running Shoes implies Electrolyte Gels with 0.8 confidence
Basketball implies Gatorade Bottle with 0.5 confidence
Protein Powder implies Gatorade Bottle with 0.25 confidence
Running Shoes implies Gatorade Bottle with 0.4 confidence
Swim Goggles implies Gatorade Bottle with 0.5 confidence
Swim Cap implies Gatorade Bottle with 0.5 confidence
Protein Powder implies ('Running Shoes', 'Electrolyte Gels') with 0.25 confidence
Protein Powder implies Electrolyte Gels with 0.5 confidence
Swim Goggles implies ('Gatorade Bottle', 'Swim Cap') with 0.25 confidence
Swim Cap implies ('Swim Goggles', 'Gatorade Bottle') with 0.25 confidence
Basketball Shoes implies ('Basketball', 'Gatorade Bottle') with 0.33333333333333337 confidence
Protein Powder implies Basketball with 0.25 confidence
Basketball implies ('Gatorade Bottle', 'Basketball Shoes') with 0.25 confidence
Basketball implies Protein Powder with 0.25 confidence
Golf Shoes implies Golf Balls with 1.0 confidence
Electrolyte Gels implies Protein 