## Import Required Libraries

In [1]:
import pandas as pd
import itertools
import os
import time

## Load available CSVs

In [2]:
def get_csv_files(folder_path):
    return [f for f in os.listdir(folder_path)
            if f.endswith(".csv") and not f.startswith("frequent_itemsets_") and not f.startswith("rules_")]

folder_path = os.getcwd()
csv_files = get_csv_files(folder_path)

if not csv_files:
    print("No CSV files found!")
else:
    print("Available datasets:")
    for i, f in enumerate(csv_files, start=1):
        print(f"{i}. {f}")

Available datasets:
1. amazon.csv
2. bestbuy.csv
3. generic.csv
4. kmart.csv
5. nike.csv


## Dataset Selection

In [3]:
try:
    choice = int(input("\nSelect a dataset by number: "))
    if choice < 1 or choice > len(csv_files):
        raise ValueError
    dataset = csv_files[choice - 1]
    file_path = os.path.join(folder_path, dataset)
    print(f"Selected dataset: {dataset}")
except ValueError:
    print("Invalid input. Please run this cell again and select a valid dataset.")


Select a dataset by number:  4


Selected dataset: kmart.csv


## Load selected dataset

In [4]:
try:
    df = pd.read_csv(file_path, encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv(file_path, encoding='cp1252', on_bad_lines='skip')

df.columns = [c.strip() for c in df.columns]

if 'Items' not in df.columns:
    raise ValueError("The selected CSV must contain a column named 'Items'.")

transactions = [str(items).replace('[', '').replace(']', '').replace("'", '').split(',')
                for items in df['Items']]
transactions = [[item.strip() for item in t if item.strip()] for t in transactions]
print(f"Loaded {len(transactions)} transactions successfully!")

Loaded 20 transactions successfully!


## Input Minimum Support & Confidence

In [5]:
def get_valid_float(prompt):
    while True:
        try:
            val = float(input(prompt))
            if 0 < val <= 1:
                return val
            print("Please enter a number between 0 and 1 (exclusive of 0).")
        except ValueError:
            print("Invalid input. Please enter a number.")

min_support = get_valid_float("Enter minimum support (e.g., 0.2 for 20%): ")
min_confidence = get_valid_float("Enter minimum confidence (e.g., 0.6 for 60%): ")

print(f"Support: {min_support}, Confidence: {min_confidence}")

Enter minimum support (e.g., 0.2 for 20%):  0.3
Enter minimum confidence (e.g., 0.6 for 60%):  0.5


Support: 0.3, Confidence: 0.5


## Brute Force Algorithm

In [6]:
import itertools
import time

print("\nRunning Brute Force Algorithm ...")
def get_frequent_itemsets(transactions, min_support):
    items = sorted(set(itertools.chain.from_iterable(transactions)))
    all_frequent = {}
    k = 1
    num_transactions = len(transactions)
    
    while True:
        candidates = list(itertools.combinations(items, k))
        frequent = {}
        for c in candidates:
            count = sum(1 for t in transactions if set(c).issubset(set(t)))
            support = count / num_transactions
            if support >= min_support:
                frequent[c] = {"support": support, "count": count}
        if not frequent:
            break
        all_frequent[k] = frequent
        k += 1

    return all_frequent

def generate_rules(frequent_itemsets, min_confidence, transactions):
    rules = []
    num_transactions = len(transactions)
    flat_itemsets = {item: val for level in frequent_itemsets.values() for item, val in level.items()}

    for itemset, metrics in flat_itemsets.items():
        if len(itemset) < 2:
            continue
        for i in range(1, len(itemset)):
            for lhs in itertools.combinations(itemset, i):
                rhs = tuple(sorted(set(itemset) - set(lhs)))
                lhs_count = sum(1 for t in transactions if set(lhs).issubset(set(t)))
                if lhs_count == 0:
                    continue
                confidence = metrics["support"] / (lhs_count / num_transactions)
                if confidence >= min_confidence:
                    rules.append({
                        "lhs": lhs,
                        "rhs": rhs,
                        "support": metrics["support"],
                        "confidence": confidence,
                        "count": metrics["count"]
                    })
    return rules
    
start_total = time.perf_counter()

frequent_itemsets = get_frequent_itemsets(transactions, min_support)
rules = generate_rules(frequent_itemsets, min_confidence, transactions)

end_total = time.perf_counter()
brute_force_time = end_total - start_total

total_itemsets = sum(len(level) for level in frequent_itemsets.values())
for k in sorted(frequent_itemsets.keys()):
    print(f"\n=== {k}-itemsets ===")
    for itemset, metrics in sorted(frequent_itemsets[k].items(), key=lambda x: -x[1]["support"]):
        print(f"{itemset}: support={metrics['support']:.2f}, count={metrics['count']}")

print("\n=== Association Rules ===")
for rule in sorted(rules, key=lambda x: -x["confidence"]):
    print(f"{rule['lhs']} -> {rule['rhs']} "
          f"(support={rule['support']:.2f}, confidence={rule['confidence']:.2f}, count={rule['count']})")

print("\n=== Brute Force Summary ===")
print(f"Total frequent itemsets: {total_itemsets}")
print(f"Total association rules: {len(rules)}")
print(f"Total execution time: {brute_force_time:.6f} seconds")


Running Brute Force Algorithm ...

=== 1-itemsets ===
('Kids Bedding',): support=0.60, count=12
('Bed Skirts',): support=0.55, count=11
('Shams',): support=0.55, count=11
('Decorative Pillows',): support=0.50, count=10
('Sheets',): support=0.50, count=10
('Quilts',): support=0.40, count=8
('Bedding Collections',): support=0.35, count=7
('Bedspreads',): support=0.35, count=7
('Embroidered Bedspread',): support=0.30, count=6

=== 2-itemsets ===
('Bed Skirts', 'Kids Bedding'): support=0.50, count=10
('Kids Bedding', 'Sheets'): support=0.50, count=10
('Bed Skirts', 'Shams'): support=0.45, count=9
('Bed Skirts', 'Sheets'): support=0.45, count=9
('Kids Bedding', 'Shams'): support=0.45, count=9
('Bed Skirts', 'Bedspreads'): support=0.35, count=7
('Bedspreads', 'Kids Bedding'): support=0.35, count=7
('Bedspreads', 'Sheets'): support=0.35, count=7
('Shams', 'Sheets'): support=0.35, count=7
('Bedding Collections', 'Kids Bedding'): support=0.30, count=6
('Decorative Pillows', 'Quilts'): support=

## Apriori Algorithm

In [7]:
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd
import time

print("\nRunning Apriori Algorithm ...")

start_total = time.perf_counter()

all_items = sorted(set(item for trans in transactions for item in trans))
df = pd.DataFrame([{item: (item in trans) for item in all_items} for trans in transactions])
total_transactions = len(df)

frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
frequent_itemsets["count"] = (frequent_itemsets["support"] * total_transactions).astype(int)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
rules["count"] = (rules["support"] * total_transactions).astype(int)

end_total = time.perf_counter()
apriori_time = end_total - start_total

total_itemsets = len(frequent_itemsets)
for k in sorted(frequent_itemsets["itemsets"].apply(len).unique()):
    print(f"\n=== {k}-itemsets ===")
    subset = frequent_itemsets[frequent_itemsets["itemsets"].apply(len) == k]
    for _, row in subset.iterrows():
        print(f"{tuple(row['itemsets'])}: support={row['support']:.2f}, count={row['count']}")
    print()

print("\n=== Association Rules ===")
for _, r in rules.iterrows():
    print(f"{tuple(r['antecedents'])} -> {tuple(r['consequents'])} "
          f"(support={r['support']:.2f}, confidence={r['confidence']:.2f}, count={r['count']})")

print("\n=== Apriori Summary ===")
print(f"Total frequent itemsets: {total_itemsets}")
print(f"Total association rules: {len(rules)}")
print(f"Total execution time: {apriori_time:.6f} seconds")


Running Apriori Algorithm ...

=== 1-itemsets ===
('Bed Skirts',): support=0.55, count=11
('Bedding Collections',): support=0.35, count=7
('Bedspreads',): support=0.35, count=7
('Decorative Pillows',): support=0.50, count=10
('Embroidered Bedspread',): support=0.30, count=6
('Kids Bedding',): support=0.60, count=12
('Quilts',): support=0.40, count=8
('Shams',): support=0.55, count=11
('Sheets',): support=0.50, count=10


=== 2-itemsets ===
('Bedspreads', 'Bed Skirts'): support=0.35, count=7
('Kids Bedding', 'Bed Skirts'): support=0.50, count=10
('Bed Skirts', 'Shams'): support=0.45, count=9
('Bed Skirts', 'Sheets'): support=0.45, count=9
('Kids Bedding', 'Bedding Collections'): support=0.30, count=6
('Kids Bedding', 'Bedspreads'): support=0.35, count=7
('Bedspreads', 'Sheets'): support=0.35, count=7
('Decorative Pillows', 'Quilts'): support=0.30, count=6
('Kids Bedding', 'Shams'): support=0.45, count=9
('Kids Bedding', 'Sheets'): support=0.50, count=10
('Shams', 'Sheets'): support=0.3

## FP-Growth Algorithm

In [8]:
from mlxtend.frequent_patterns import fpgrowth, association_rules
import pandas as pd
import time

print("\nRunning FP-Growth Algorithm ...")

start_total = time.perf_counter()

all_items = sorted(set(item for trans in transactions for item in trans))
df = pd.DataFrame([{item: (item in trans) for item in all_items} for trans in transactions])
total_transactions = len(df)

frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True)
frequent_itemsets["count"] = (frequent_itemsets["support"] * total_transactions).astype(int)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
rules["count"] = (rules["support"] * total_transactions).astype(int)

end_total = time.perf_counter()
fpgrowth_time = end_total - start_total

total_itemsets = len(frequent_itemsets)
for k in sorted(frequent_itemsets["itemsets"].apply(len).unique()):
    print(f"\n=== {k}-itemsets ===")
    subset = frequent_itemsets[frequent_itemsets["itemsets"].apply(len) == k]
    for _, row in subset.iterrows():
        print(f"{tuple(row['itemsets'])}: support={row['support']:.2f}, count={row['count']}")
    print()

print("\n=== Association Rules ===")
for _, r in rules.iterrows():
    print(f"{tuple(r['antecedents'])} -> {tuple(r['consequents'])} "
          f"(support={r['support']:.2f}, confidence={r['confidence']:.2f}, count={r['count']})")

print("\n=== FP-Growth Summary ===")
print(f"Total frequent itemsets: {total_itemsets}")
print(f"Total association rules: {len(rules)}")
print(f"Total execution time: {fpgrowth_time:.6f} seconds")


Running FP-Growth Algorithm ...

=== 1-itemsets ===
('Decorative Pillows',): support=0.50, count=10
('Quilts',): support=0.40, count=8
('Embroidered Bedspread',): support=0.30, count=6
('Kids Bedding',): support=0.60, count=12
('Shams',): support=0.55, count=11
('Bed Skirts',): support=0.55, count=11
('Sheets',): support=0.50, count=10
('Bedspreads',): support=0.35, count=7
('Bedding Collections',): support=0.35, count=7


=== 2-itemsets ===
('Decorative Pillows', 'Quilts'): support=0.30, count=6
('Kids Bedding', 'Shams'): support=0.45, count=9
('Kids Bedding', 'Bed Skirts'): support=0.50, count=10
('Bed Skirts', 'Shams'): support=0.45, count=9
('Kids Bedding', 'Sheets'): support=0.50, count=10
('Bed Skirts', 'Sheets'): support=0.45, count=9
('Shams', 'Sheets'): support=0.35, count=7
('Bedspreads', 'Sheets'): support=0.35, count=7
('Bedspreads', 'Bed Skirts'): support=0.35, count=7
('Kids Bedding', 'Bedspreads'): support=0.35, count=7
('Kids Bedding', 'Bedding Collections'): support=0

## Timing Comparison

In [9]:
from tabulate import tabulate
timing_data = [
    ["Brute Force", f"{brute_force_time:.6f}"],
    ["Apriori", f"{apriori_time:.6f}"],
    ["FP-Growth", f"{fpgrowth_time:.6f}"]
]
table = tabulate(
    timing_data,
    headers=["Algorithm", "Execution Time (s)"],
    tablefmt="grid"
)
print("\n=== Timing Comparison of Algorithms ===\n")
print(table)


=== Timing Comparison of Algorithms ===

+-------------+----------------------+
| Algorithm   |   Execution Time (s) |
| Brute Force |             0.005006 |
+-------------+----------------------+
| Apriori     |             0.026791 |
+-------------+----------------------+
| FP-Growth   |             0.017132 |
+-------------+----------------------+
