Two programs to implement
1. Apply apriori algorithm by importing apriori from mlxtend library 
2. Manually implement apriori algorithm

In [2]:
pip install mlxtend


Collecting mlxtend
  Obtaining dependency information for mlxtend from https://files.pythonhosted.org/packages/1c/07/512f6a780239ad6ce06ce2aa7b4067583f5ddcfc7703a964a082c706a070/mlxtend-0.23.1-py3-none-any.whl.metadata
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   - -------------------------------------- 0.1/1.4 MB 1.7 MB/s eta 0:00:01
   ---- ----------------------------------- 0.2/1.4 MB 1.6 MB/s eta 0:00:01
   ----- ---------------------------------- 0.2/1.4 MB 1.4 MB/s eta 0:00:01
   ------- -------------------------------- 0.3/1.4 MB 1.6 MB/s eta 0:00:01
   --------- ------------------------------ 0.4/1.4 MB 1.6 MB/s eta 0:00:01
   ------------ --------------------------- 0.5/1.4 MB 1.6 MB/s eta 0:00:01
   --------------- ------------------------ 0.6/1.4 MB 1.7 MB/s eta 0:00:01
   ---------------- ----------------------- 0.6/1.4 MB 1.7 MB/s

In [6]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pandas as pd

# Sample transaction dataset
data = {'Transaction_ID': [1, 2, 3, 4, 5],
        'Items': [['bread', 'milk'],
                  ['bread', 'diaper', 'beer', 'eggs'],
                  ['milk', 'diaper', 'beer', 'cola'],
                  ['bread', 'milk', 'diaper', 'beer'],
                  ['bread', 'milk', 'diaper', 'cola']]}
df = pd.DataFrame(data)


encoded_df = pd.get_dummies(df['Items'].apply(pd.Series).stack()).groupby(level=0).sum().astype(bool)


frequent_itemsets = apriori(encoded_df, min_support=0.5, use_colnames=True)


rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.7)

print("Frequent Itemsets:")
print(frequent_itemsets)
print("\nAssociation Rules:")
rules


Frequent Itemsets:
   support         itemsets
0      0.6           (beer)
1      0.8          (bread)
2      0.8         (diaper)
3      0.8           (milk)
4      0.6   (diaper, beer)
5      0.6  (bread, diaper)
6      0.6    (milk, bread)
7      0.6   (milk, diaper)

Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(diaper),(beer),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1.0
1,(beer),(diaper),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
2,(bread),(diaper),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
3,(diaper),(bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
4,(milk),(bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
5,(bread),(milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
6,(milk),(diaper),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
7,(diaper),(milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25


In [7]:
encoded_df

Unnamed: 0,beer,bread,cola,diaper,eggs,milk
0,False,True,False,False,False,True
1,True,True,False,True,True,False
2,True,False,True,True,False,True
3,True,True,False,True,False,True
4,False,True,True,True,False,True


In [8]:
def generate_candidates(prev_candidates, k):
    """Generate candidate itemsets of size k from previous itemsets of size k-1."""
    candidates = set()
    for i in range(len(prev_candidates)):
        for j in range(i + 1, len(prev_candidates)):
            itemset1 = prev_candidates[i]
            itemset2 = prev_candidates[j]
            if itemset1[:-1] == itemset2[:-1]:
                candidate = tuple(sorted(set(itemset1) | set(itemset2)))
                if len(candidate) == k:
                    candidates.add(candidate)
    return list(candidates)

def generate_frequent_itemsets(data, min_support):
    """Generate frequent itemsets from the input data."""
    # Step 1: Initialize frequent itemsets and candidate itemsets
    frequent_itemsets = []
    candidate_itemsets = [tuple([item]) for item in set.union(*data)]

    # Step 2: Generate frequent itemsets
    k = 1
    while candidate_itemsets:
        # Step 2a: Prune candidate itemsets that do not meet minimum support
        candidate_counts = {candidate: 0 for candidate in candidate_itemsets}
        for transaction in data:
            for candidate in candidate_itemsets:
                if set(candidate).issubset(transaction):
                    candidate_counts[candidate] += 1
        frequent_itemsets.extend([itemset for itemset, count in candidate_counts.items()
                                  if count >= min_support * len(data)])

        # Step 2b: Generate candidate itemsets of size k+1
        k += 1
        candidate_itemsets = generate_candidates(frequent_itemsets, k)

    return frequent_itemsets

# Example usage:
data = [
    {"bread", "milk"},
    {"bread", "diaper", "beer", "eggs"},
    {"milk", "diaper", "beer", "cola"},
    {"bread", "milk", "diaper", "beer"},
    {"bread", "milk", "diaper", "cola"}
]
min_support = 0.5
frequent_itemsets = generate_frequent_itemsets(data, min_support)
print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(itemset)


Frequent Itemsets:
('beer',)
('bread',)
('milk',)
('diaper',)
('bread', 'milk')
('bread', 'diaper')
('diaper', 'milk')
('beer', 'diaper')


**Apriori Function**

In [9]:
def apriori(itemSetList, minSup, minConf):
    C1ItemSet = getItemSetFromList(itemSetList)
    # Final result, global frequent itemset
    globalFreqItemSet = dict()
    # Storing global itemset with support count
    globalItemSetWithSup = defaultdict(int)

    L1ItemSet = getAboveMinSup(C1ItemSet, itemSetList, minSup, globalItemSetWithSup)
    currentLSet = L1ItemSet
    k = 2

    # Calculating frequent item set
    while(currentLSet):
        # Storing frequent itemset
        globalFreqItemSet[k-1] = currentLSet
        # Self-joining Lk
        candidateSet = getUnion(currentLSet, k)
        # Perform subset testing and remove pruned supersets
        candidateSet = pruning(candidateSet, currentLSet, k-1)
        # Scanning itemSet for counting support
        currentLSet = getAboveMinSup(candidateSet, itemSetList, minSup, globalItemSetWithSup)
        k += 1

    rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)
    rules.sort(key=lambda x: x[2])

    return globalFreqItemSet, rules

In [10]:
def getUnion(itemSet, length):
    return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])

In [11]:
def pruning(candidateSet, prevFreqSet, length):
    tempCandidateSet = candidateSet.copy()
    for item in candidateSet:
        subsets = combinations(item, length)
        for subset in subsets:
            # if the subset is not in previous K-frequent get, then remove the set
            if(frozenset(subset) not in prevFreqSet):
                tempCandidateSet.remove(item)
                break
    return tempCandidateSet

In [12]:
def getAboveMinSup(itemSet, itemSetList, minSup, globalItemSetWithSup):
    freqItemSet = set()
    localItemSetWithSup = defaultdict(int)

    for item in itemSet:
        for itemSet in itemSetList:
            if item.issubset(itemSet):
                globalItemSetWithSup[item] += 1
                localItemSetWithSup[item] += 1

    for item, supCount in localItemSetWithSup.items():
        support = float(supCount / len(itemSetList))
        if(support >= minSup):
            freqItemSet.add(item)

    return freqItemSet