In [1]:
from collections import defaultdict

# Simplified transaction dataset
transactions = [
    ['B', 'E'],             # U100
    ['D'],                  # U200
    ['X'],                  # U300
    ['W', 'O'],             # U400
    ['B', 'E', 'D'],        # U500
    ['B', 'D', 'X'],        # U600
    ['E', 'X'],             # U700
    ['X', 'W'],             # U800
    ['B', 'E', 'W', 'O'],   # U900
    ['D', 'W', 'O']         # U1000
]

In [2]:
class SimpleApriori:
    def __init__(self, min_support=0.2, min_confidence=0.5):
        self.min_support = min_support
        self.min_confidence = min_confidence
        self.frequent_itemsets = {}
        self.transaction_count = len(transactions)
        self.rules = []

    def _get_support(self, itemset):
        """Calculate the support for an itemset"""
        count = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))
        return count / self.transaction_count

    def find_frequent_itemsets(self):
        """Find frequent itemsets"""
        # Initial pass to get 1-itemsets
        C1 = defaultdict(int)
        for transaction in transactions:
            for item in transaction:
                C1[item] += 1
        
        F1 = {tuple([item]): count for item, count in C1.items() 
              if count / self.transaction_count >= self.min_support}
        self.frequent_itemsets[1] = F1

        print(f"Frequent 1-itemsets: {F1}")

        k = 2
        while True:
            # Generate candidate k-itemsets from F(k-1)
            Fk_1 = list(self.frequent_itemsets[k-1].keys())
            Ck = defaultdict(int)
            for i in range(len(Fk_1)):
                for j in range(i+1, len(Fk_1)):
                    candidate = tuple(sorted(set(Fk_1[i]) | set(Fk_1[j])))
                    if len(candidate) == k:
                        Ck[candidate] = 0

            # Calculate support for each candidate
            for transaction in transactions:
                transaction_set = set(transaction)
                for candidate in Ck:
                    if set(candidate).issubset(transaction_set):
                        Ck[candidate] += 1

            # Filter candidates by min_support
            Fk = {candidate: count for candidate, count in Ck.items() 
                  if count / self.transaction_count >= self.min_support}
            
            if not Fk:
                break

            self.frequent_itemsets[k] = Fk
            print(f"Frequent {k}-itemsets: {Fk}")
            k += 1

        return self.frequent_itemsets

    def generate_rules(self):
        """Generate association rules from frequent itemsets"""
        print("\nGenerating Association Rules...")
        for k, itemsets in self.frequent_itemsets.items():
            if k < 2:  # No rules for 1-itemsets
                continue
            for itemset, count in itemsets.items():
                itemset_support = count / self.transaction_count
                subsets = self._get_subsets(itemset)

                for antecedent in subsets:
                    consequent = tuple(set(itemset) - set(antecedent))
                    if not consequent:
                        continue

                    antecedent_support = self._get_support(antecedent)
                    confidence = itemset_support / antecedent_support

                    if confidence >= self.min_confidence:
                        consequent_support = self._get_support(consequent)
                        lift = confidence / consequent_support
                        self.rules.append({
                            'antecedent': antecedent,
                            'consequent': consequent,
                            'support': itemset_support,
                            'confidence': confidence,
                            'lift': lift
                        })

        print(f"Total Rules Generated: {len(self.rules)}")
        return self.rules

    def _get_subsets(self, itemset):
        """Generate all non-empty subsets of an itemset"""
        subsets = []
        itemset = list(itemset)
        for i in range(1, 1 << len(itemset)):  # Binary combinations
            subset = [itemset[j] for j in range(len(itemset)) if (i & (1 << j))]
            if subset and len(subset) < len(itemset):
                subsets.append(tuple(subset))
        return subsets


In [3]:
# Run the SimpleApriori algorithm
apriori = SimpleApriori(min_support=0.2, min_confidence=0.5)  # Minimum support of 20%, confidence 50%
frequent_itemsets = apriori.find_frequent_itemsets()


Frequent 1-itemsets: {('B',): 4, ('E',): 4, ('D',): 4, ('X',): 4, ('W',): 4, ('O',): 3}
Frequent 2-itemsets: {('B', 'E'): 3, ('B', 'D'): 2, ('O', 'W'): 3}


In [4]:
# Generate association rules
rules = apriori.generate_rules()


Generating Association Rules...
Total Rules Generated: 6


In [5]:
# Print frequent itemsets
print("\nFrequent Itemsets:")
for k, itemsets in frequent_itemsets.items():
    print(f"{k}-itemsets:")
    for items, count in itemsets.items():
        support = count / apriori.transaction_count
        print(f"  {items}: Support = {support:.2f}")


Frequent Itemsets:
1-itemsets:
  ('B',): Support = 0.40
  ('E',): Support = 0.40
  ('D',): Support = 0.40
  ('X',): Support = 0.40
  ('W',): Support = 0.40
  ('O',): Support = 0.30
2-itemsets:
  ('B', 'E'): Support = 0.30
  ('B', 'D'): Support = 0.20
  ('O', 'W'): Support = 0.30


In [6]:
# Print top rules by confidence
print("\nTop Association Rules:")
sorted_rules = sorted(rules, key=lambda x: x['confidence'], reverse=True)
for idx, rule in enumerate(sorted_rules[:5]):
    antecedent = ', '.join(rule['antecedent'])
    consequent = ', '.join(rule['consequent'])
    print(f"Rule {idx + 1}: {{{antecedent}}} => {{{consequent}}}")
    print(f"   Support: {rule['support']:.2f}, Confidence: {rule['confidence']:.2f}, Lift: {rule['lift']:.2f}")


Top Association Rules:
Rule 1: {O} => {W}
   Support: 0.30, Confidence: 1.00, Lift: 2.50
Rule 2: {B} => {E}
   Support: 0.30, Confidence: 0.75, Lift: 1.87
Rule 3: {E} => {B}
   Support: 0.30, Confidence: 0.75, Lift: 1.87
Rule 4: {W} => {O}
   Support: 0.30, Confidence: 0.75, Lift: 2.50
Rule 5: {B} => {D}
   Support: 0.20, Confidence: 0.50, Lift: 1.25
