In [1]:
import itertools
import time
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pyfpgrowth

# Function to read CSV file and convert it into a list of transactions
def read_transactions(file_path):
    transactions = []
    with open(file_path, 'r') as file:
        csv_reader = pd.read_csv(file)
        for index, row in csv_reader.iterrows():
            transactions.append(row['Transaction'].split(', '))
    return transactions




# Brute force method to find frequent itemsets
def brute_force(transactions, support_threshold):
    items = set(item for transaction in transactions for item in transaction)
    itemsets = []
    for i in range(1, len(items) + 1):
        itemsets.extend(itertools.combinations(items, i))
    frequent_itemsets = {}
    for itemset in itemsets:
        frequency = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))
        if frequency / len(transactions) >= support_threshold:
            frequent_itemsets[itemset] = frequency
    return frequent_itemsets

# Function to convert list of transactions into the right format for MLxtend
def convert_to_mlxtend_format(transactions):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    return df

# Function to run Apriori algorithm and find frequent itemsets
def run_apriori(transactions, support_threshold):
    df = convert_to_mlxtend_format(transactions)
    frequent_itemsets = apriori(df, min_support=support_threshold, use_colnames=True)
    return frequent_itemsets

# Function to run FP-Growth algorithm and find frequent itemsets
def run_fpgrowth(transactions, support_threshold):
    patterns = pyfpgrowth.find_frequent_patterns(transactions, support_threshold * len(transactions))
    frequent_itemsets = pd.DataFrame(list(patterns.items()), columns=['itemsets', 'support'])
    frequent_itemsets['itemsets'] = frequent_itemsets['itemsets'].apply(frozenset)
    return frequent_itemsets

# Function to generate association rules from frequent itemsets
def generate_rules(frequent_itemsets, transactions, confidence_threshold):
    df = convert_to_mlxtend_format(transactions)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_threshold,support_only=True)
    rules = rules[['antecedents', 'consequents', 'support', 'confidence']]
    return rules

# Function to compare results and performance time of all three algorithms
def compare_algorithms(file_path, support_threshold, confidence_threshold):
    transactions = read_transactions(file_path)
    start_time = time.time()
    brute_force_itemsets = brute_force(transactions, support_threshold)
    brute_force_time = time.time() - start_time
    start_time = time.time()
    apriori_itemsets = run_apriori(transactions, support_threshold)
    apriori_rules = generate_rules(apriori_itemsets, transactions, confidence_threshold)
    apriori_time = time.time() - start_time
    start_time = time.time()
    fpgrowth_itemsets = run_fpgrowth(transactions, support_threshold)
    fpgrowth_rules = generate_rules(fpgrowth_itemsets, transactions, confidence_threshold)
    fpgrowth_time = time.time() - start_time
    print(f"Brute Force Method: {len(brute_force_itemsets)} frequent itemsets found in {brute_force_time:.5f} seconds.")
    print(f"Apriori Algorithm: {len(apriori_itemsets)} frequent itemsets and {len(apriori_rules)} rules found in {apriori_time:.5f} seconds.")
    print(f"FP-Growth Algorithm: {len(fpgrowth_itemsets)} frequent itemsets and {len(fpgrowth_rules)} rules found in {fpgrowth_time:.5f} seconds.")
    return brute_force_itemsets, apriori_rules, fpgrowth_rules


In [2]:
# Main function to orchestrate the analysis
def main():
    support_threshold = float(input("Enter the support threshold (as a fraction): "))
    confidence_threshold = float(input("Enter the confidence threshold (as a fraction): "))

    dataset_options = {
        'amazon': 'Amazon.csv',
        'best_buy': 'Best_Buy.csv',
        'generic': 'Generic.csv',
        'k_mart': 'K-Mart.csv',
        'nike': 'Nike.csv'
    }
    selected_dataset = input(f"Select a dataset ({', '.join(dataset_options.keys())}): ").lower()

    if selected_dataset not in dataset_options:
        print("Invalid dataset selection. Exiting.")
        return

    file_path = dataset_options[selected_dataset]

    algorithm_options = ['apriori', 'fpgrowth', 'bruteforce']
    selected_algorithm = input(f"Select an algorithm ({', '.join(algorithm_options)}): ").lower()

    if selected_algorithm not in algorithm_options:
        print("Invalid algorithm selection. Exiting.")
        return

    print(f"\nProcessing {file_path}")

    if selected_algorithm == 'bruteforce':
        brute_force_itemsets, _, _ = compare_algorithms(file_path, support_threshold, confidence_threshold)
        print(f"Number of rules generated by Brute Force: {len(brute_force_itemsets)}")
    else:
        _, apriori_rules, fpgrowth_rules = compare_algorithms(file_path, support_threshold, confidence_threshold)
        print(f"Number of rules generated by {selected_algorithm.capitalize()}: {len(apriori_rules if selected_algorithm == 'apriori' else fpgrowth_rules)}")

if __name__ == "__main__":
    main()


Enter the support threshold (as a fraction): 0.2
Enter the confidence threshold (as a fraction): 0.4
Select a dataset (amazon, best_buy, generic, k_mart, nike): nike
Select an algorithm (apriori, fpgrowth, bruteforce): fpgrowth

Processing Nike.csv
Brute Force Method: 439 frequent itemsets found in 0.08944 seconds.
Apriori Algorithm: 439 frequent itemsets and 64 rules found in 0.01919 seconds.
FP-Growth Algorithm: 429 frequent itemsets and 9566 rules found in 0.03675 seconds.
Number of rules generated by Fpgrowth: 9566
