In [8]:
import pandas as pd
import numpy as np
from itertools import combinations

# 1. Simulate Transaction Data
np.random.seed(42)  # For reproducibility
items = ['Bread', 'Milk', 'Eggs', 'Butter', 'Cheese', 'Yogurt', 'Apples', 'Bananas']
transactions = []
for _ in range(10):  # Generate 10 transactions
    num_items = np.random.randint(2, 6)  # 2-5 items per transaction
    transaction = np.random.choice(items, size=num_items, replace=False).tolist()
    transactions.append(transaction)

# Create a DataFrame to display transactions
transaction_df = pd.DataFrame({'Transaction': [f'T{i+1}' for i in range(len(transactions))], 'Items': transactions})
print("Simulated Transactions:")
print(transaction_df)

# 2. One-Hot Encoding
# Convert transactions to one-hot encoded format
def encode_transactions(transactions, items):
    encoded_vals = []
    for transaction in transactions:
        labels = {item: 0 for item in items}
        for item in transaction:
            labels[item] = 1
        encoded_vals.append(labels)
    return pd.DataFrame(encoded_vals)

one_hot_df = encode_transactions(transactions, items)
print("\nOne-Hot Encoded DataFrame:")
print(one_hot_df)

# 3. Custom Apriori Algorithm
def get_frequent_itemsets(df, min_support=0.3):
    n_transactions = len(df)
    min_support_count = min_support * n_transactions
    
    # Start with single items
    itemsets = {frozenset([item]): df[item].sum() / n_transactions for item in df.columns}
    frequent_itemsets = {k: v for k, v in itemsets.items() if v >= min_support}
    
    # Generate itemsets of increasing size
    k = 2
    while True:
        new_itemsets = {}
        # Generate combinations of frequent itemsets of size k-1
        prev_items = list(frequent_itemsets.keys())
        for i in range(len(prev_items)):
            for j in range(i + 1, len(prev_items)):
                # Combine itemsets if they share k-2 items
                items1, items2 = prev_items[i], prev_items[j]
                union = items1 | items2
                if len(union) == k:
                    # Calculate support
                    support = np.sum(df[list(union)].min(axis=1)) / n_transactions
                    if support >= min_support:
                        new_itemsets[union] = support
        if not new_itemsets:
            break
        frequent_itemsets.update(new_itemsets)
        k += 1
    
    # Convert to DataFrame
    frequent_itemsets_df = pd.DataFrame({
        'itemsets': [set(itemset) for itemset in frequent_itemsets.keys()],
        'support': list(frequent_itemsets.values())
    })
    return frequent_itemsets_df

frequent_itemsets = get_frequent_itemsets(one_hot_df, min_support=0.3)
print("\nFrequent Itemsets (min_support=0.3):")
print(frequent_itemsets)

# 4. Generate Association Rules
def generate_association_rules(frequent_itemsets, df, min_confidence=0.7):
    rules = []
    n_transactions = len(df)
    
    for _, row in frequent_itemsets.iterrows():
        itemset = row['itemsets']
        support_itemset = row['support']
        
        # Generate all possible non-empty subsets as antecedents
        for i in range(1, len(itemset)):
            for antecedent in combinations(itemset, i):
                antecedent = frozenset(antecedent)
                consequent = frozenset(itemset - antecedent)
                if not consequent:
                    continue
                
                # Calculate support of antecedent
                support_antecedent = np.sum(df[list(antecedent)].min(axis=1)) / n_transactions
                if support_antecedent == 0:
                    continue
                
                # Calculate confidence
                confidence = support_itemset / support_antecedent
                if confidence >= min_confidence:
                    rules.append({
                        'antecedents': set(antecedent),
                        'consequents': set(consequent),
                        'support': support_itemset,
                        'confidence': confidence
                    })
    
    rules_df = pd.DataFrame(rules)
    if rules_df.empty:
        return rules_df
    return rules_df[['antecedents', 'consequents', 'support', 'confidence']]

rules = generate_association_rules(frequent_itemsets, one_hot_df, min_confidence=0.7)
print("\nAssociation Rules (min_confidence=0.7):")
print(rules.head(2))

# 5. Explanation of one rule
if not rules.empty:
    first_rule = rules.iloc[0]
    antecedents = ', '.join(list(first_rule['antecedents']))
    consequents = ', '.join(list(first_rule['consequents']))
    confidence = first_rule['confidence']
    print(f"\nExplanation of Rule: {antecedents} -> {consequents}")
    print(f"This rule means that when {antecedents} is purchased, there is a {confidence*100:.1f}% chance that {consequents} will also be purchased. "
          f"In everyday terms, if a customer buys {antecedents}, they are highly likely to buy {consequents} as well, "
          f"suggesting a strong shopping pattern, possibly because these items are often used together (e.g., in a recipe).")
else:
    print("\nNo association rules found with the specified confidence threshold.")

Simulated Transactions:
  Transaction                                   Items
0          T1          [Bananas, Milk, Yogurt, Bread]
1          T2           [Bread, Milk, Yogurt, Butter]
2          T3                          [Cheese, Eggs]
3          T4  [Bananas, Eggs, Apples, Bread, Butter]
4          T5          [Bread, Bananas, Eggs, Cheese]
5          T6                   [Milk, Cheese, Bread]
6          T7                        [Yogurt, Butter]
7          T8                         [Eggs, Bananas]
8          T9                    [Milk, Cheese, Eggs]
9         T10                 [Bananas, Eggs, Cheese]

One-Hot Encoded DataFrame:
   Bread  Milk  Eggs  Butter  Cheese  Yogurt  Apples  Bananas
0      1     1     0       0       0       1       0        1
1      1     1     0       1       0       1       0        0
2      0     0     1       0       1       0       0        0
3      1     0     1       1       0       0       1        1
4      1     0     1       0       1       0