# Selecting the Dataset by user input

In [None]:
import time
import pandas as pd
from math import comb
from itertools import combinations
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.frequent_patterns import fpgrowth, association_rules

In [None]:
def load_and_display_dataset(choice):
    dataset_paths = {
        1: '/content/amazon.csv',
        2: '/content/bestbuy.csv',
        3: '/content/kmart.csv',
        4: '/content/nike.csv',
        5: '/content/generic.csv'
    }

    try:
        if choice in dataset_paths:
            df = pd.read_csv(dataset_paths[choice])
            return df
        else:
            print("Invalid choice. Please select a number between 1 and 5.")
            return None
    except FileNotFoundError:
        print(f"File not found for choice {choice}. Please check the file path and try again.")
        return None

try:
    choice = int(input("Please, Select your Dataset for \n 1 Amazon.\n 2 BestBuy.\n 3 K-Mart.\n 4 Nike.\n 5 Generic. \n"))
    df = load_and_display_dataset(choice)
    if df is not None:
        print(df)
except ValueError:
    print("Please enter a valid integer.")


  and should_run_async(code)


Please, Select your Dataset for 
 1 Amazon.
 2 BestBuy.
 3 K-Mart.
 4 Nike.
 5 Generic. 
1
   Transaction ID                                        Transaction
0          Trans1  A Beginner's Guide,  Java: The Complete Refere...
1          Trans2  A Beginner's Guide,  Java: The Complete Refere...
2          Trans3  A Beginner's Guide,  Java: The Complete Refere...
3          Trans4  Android Programming: The Big Nerd Ranch,  Head...
4          Trans5  Android Programming: The Big Nerd Ranch,  Begi...
5          Trans6  A Beginner's Guide,  Android Programming: The ...
6          Trans7  A Beginner's Guide,  Head First Java 2nd Editi...
7          Trans8  Java: The Complete Reference,  Java For Dummie...
8          Trans9  Java For Dummies,  Android Programming: The Bi...
9         Trans10  Beginning Programming with Java,  Java 8 Pocke...
10        Trans11  A Beginner's Guide,  Java: The Complete Refere...
11        Trans12  A Beginner's Guide,  Java: The Complete Refere...
12        Tr

# User inputs minimum support and confidence

In [None]:
min_sup = input("Please, input your Min. Support \n")
min_sup = float(min_sup)
min_con = input("Please, input your Min. confidence \n")
min_con = float(min_con)


  and should_run_async(code)


Please, input your Min. Support 
0.1
Please, input your Min. confidence 
0.1


# Brute-Forced Apriori

In [None]:
# Preprocess the 'Transaction' column by splitting the string into a list of items
df['Transaction'] = df['Transaction'].apply(lambda x: x.split(','))

# Extract unique transactions and items for preprocessing
unique_transactions = df['Transaction ID'].unique()
transaction_items = df['Transaction'].tolist()

# Since every transaction is unique, we can directly use the transaction_items for analysis
transactions = transaction_items

# frequent items
def frequent_items(new_patterns, current_items):
    items_in_patterns = set(item for pattern in new_patterns for item in pattern)
    return [item for item in current_items if item in items_in_patterns]

# frequent patterns
def find_frequent_patterns(transactions, min_support):
    unique_items = set(item for sublist in transactions for item in sublist)
    pattern_size = 1
    frequent_patterns = []
    frequent_patterns_count = []
    current_frequent_items = list(unique_items)
    while current_frequent_items:
        potential_patterns = combinations(current_frequent_items, pattern_size)
        new_frequent_patterns = []
        for pattern in list(potential_patterns):
            count = sum(1 for transaction in transactions if set(pattern).issubset(set(transaction)))
            if count >= min_support * len(transactions):
                new_frequent_patterns.append(pattern)
                frequent_patterns_count.append(count)
        frequent_patterns.extend(new_frequent_patterns)
        pattern_size += 1
        current_frequent_items = frequent_items(new_frequent_patterns, current_frequent_items)
    return frequent_patterns, frequent_patterns_count

def generate_association_rules(frequent_patterns, frequent_patterns_count, transactions, min_confidence):
    rules_with_confidence = []
    for pattern, pattern_count in zip(frequent_patterns, frequent_patterns_count):
        if len(pattern) > 1:
            sub_patterns = [sub_pattern for i in range(1, len(pattern))
                            for sub_pattern in combinations(pattern, i)]
            for sub_pattern in sub_patterns:
                sub_pattern_count = sum(1 for transaction in transactions if set(sub_pattern).issubset(set(transaction)))
                if sub_pattern_count > 0:  # Avoid division by zero
                    confidence = pattern_count / sub_pattern_count
                    if confidence >= min_confidence:
                        consequence = set(pattern) - set(sub_pattern)
                        rules_with_confidence.append(((tuple(sub_pattern), tuple(consequence)), confidence))
    return rules_with_confidence

def format_rules_for_printing(rules_with_confidence):
    formatted_rules = []
    for (antecedent, consequent), confidence in rules_with_confidence:
        rule_string = f"{antecedent} ---> {consequent} with confidence = {confidence:.2f}"
        formatted_rules.append(rule_string)
    return formatted_rules
# Start timing
start_time = time.time()
# Find frequent patterns and rules
frequent_patterns, frequent_patterns_count = find_frequent_patterns(transactions, min_sup)
rules_with_confidence = generate_association_rules(frequent_patterns, frequent_patterns_count, transactions, min_con)
# End timing
end_time = time.time()
bruteapriori_runtime = end_time - start_time

formatted_rules = format_rules_for_printing(rules_with_confidence)

# Function to print frequent patterns and association rules
def print_frequent_patterns_and_rules(frequent_patterns, frequent_patterns_count, transactions, min_confidence,formatted_rules):
    print("Frequent patterns:\n")
    for pattern, count in zip(frequent_patterns, frequent_patterns_count):
        print(f"{pattern}, support: {count/len(transactions):.2f}")
    print('\nAssociation rules:')
    for rule in formatted_rules:
        print(rule)

# Print the frequent patterns and rules
print_frequent_patterns_and_rules(frequent_patterns, frequent_patterns_count, transactions, min_con,formatted_rules)

print(f"Brute-forced Apriori runtime: {bruteapriori_runtime} seconds")


Frequent patterns:

("A Beginner's Guide",), support: 0.55
('Java For Dummies',), support: 0.15
('  Head First Java 2nd Edition',), support: 0.20
('  Java For Dummies',), support: 0.50
('  Head First Java 2nd Edition ',), support: 0.15
('  Java 8 Pocket Guide',), support: 0.20
('  HTML and CSS: Design and Build Websites',), support: 0.10
('  Beginning Programming with Java',), support: 0.25
('  Java: The Complete Reference',), support: 0.45
('Android Programming: The Big Nerd Ranch',), support: 0.15
('  Android Programming: The Big Nerd Ranch',), support: 0.50
("A Beginner's Guide", '  Head First Java 2nd Edition'), support: 0.10
("A Beginner's Guide", '  Java For Dummies'), support: 0.45
("A Beginner's Guide", '  HTML and CSS: Design and Build Websites'), support: 0.10
("A Beginner's Guide", '  Java: The Complete Reference'), support: 0.45
("A Beginner's Guide", '  Android Programming: The Big Nerd Ranch'), support: 0.30
('Java For Dummies', '  Android Programming: The Big Nerd Ranch'

  and should_run_async(code)


# Validating with python package of Apriori

In [None]:
# Initialize TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Start timing
start_time = time.time()

# Apriori algorithm
frequent_itemsets = apriori(df_encoded, min_support=min_sup, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_con)

# End timing
end_time = time.time()
apriori_runtime = end_time - start_time

# Function to display output similar to the brute-force method
def display_output_like_brute_force(frequent_itemsets, rules):
    print("Frequent patterns:\n")
    for index, row in frequent_itemsets.iterrows():
        print(f"{list(row['itemsets'])}, support: {row['support']}")

    print("\nAssociation rules:")
    for index, row in rules.iterrows():
        print(f"{list(row['antecedents'])} ---> {list(row['consequents'])} with confidence = {row['confidence']:.2f}")

display_output_like_brute_force(frequent_itemsets, rules)

print(f"Apriori runtime: {apriori_runtime} seconds")


Frequent patterns:

['  Android Programming: The Big Nerd Ranch'], support: 0.5
['  Beginning Programming with Java'], support: 0.25
['  HTML and CSS: Design and Build Websites'], support: 0.1
['  Head First Java 2nd Edition'], support: 0.2
['  Head First Java 2nd Edition '], support: 0.15
['  Java 8 Pocket Guide'], support: 0.2
['  Java For Dummies'], support: 0.5
['  Java: The Complete Reference'], support: 0.45
["A Beginner's Guide"], support: 0.55
['Android Programming: The Big Nerd Ranch'], support: 0.15
['Java For Dummies'], support: 0.15
['  Head First Java 2nd Edition', '  Android Programming: The Big Nerd Ranch'], support: 0.15
['  Java For Dummies', '  Android Programming: The Big Nerd Ranch'], support: 0.3
['  Java: The Complete Reference', '  Android Programming: The Big Nerd Ranch'], support: 0.25
["A Beginner's Guide", '  Android Programming: The Big Nerd Ranch'], support: 0.3
['Java For Dummies', '  Android Programming: The Big Nerd Ranch'], support: 0.15
['  Head First 

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


# Validating with python package of FP Grrowth

In [None]:
# Start timing
start_time = time.time()

# Find frequent itemsets with the fpgrowth algorithm
frequent_itemsets_fp = fpgrowth(df_encoded, min_support=0.1, use_colnames=True)

# Generate association rules
rules_fp = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=0.1)

# End timing
end_time = time.time()
fpgrowth_runtime = end_time - start_time

# Function to display output similar to the brute-force method
def display_output_like_brute_force(frequent_itemsets, rules):
    print("Frequent patterns:\n")
    for index, row in frequent_itemsets.iterrows():
        print(f"{list(row['itemsets'])}, support: {row['support']}")

    print("\nAssociation rules:")
    for index, row in rules.iterrows():
        print(f"{list(row['antecedents'])} ---> {list(row['consequents'])} with confidence = {row['confidence']:.2f}")

# Display the formatted output
display_output_like_brute_force(frequent_itemsets_fp, rules_fp)

print(f"FP-Growth runtime: {fpgrowth_runtime} seconds")

Frequent patterns:

["A Beginner's Guide"], support: 0.55
['  Java For Dummies'], support: 0.5
['  Android Programming: The Big Nerd Ranch'], support: 0.5
['  Java: The Complete Reference'], support: 0.45
['  Head First Java 2nd Edition'], support: 0.2
['  Beginning Programming with Java'], support: 0.25
['Android Programming: The Big Nerd Ranch'], support: 0.15
['  Head First Java 2nd Edition '], support: 0.15
['  Java 8 Pocket Guide'], support: 0.2
['Java For Dummies'], support: 0.15
['  HTML and CSS: Design and Build Websites'], support: 0.1
["A Beginner's Guide", '  Java For Dummies'], support: 0.45
['  Java For Dummies', '  Android Programming: The Big Nerd Ranch'], support: 0.3
["A Beginner's Guide", '  Android Programming: The Big Nerd Ranch'], support: 0.3
["A Beginner's Guide", '  Java For Dummies', '  Android Programming: The Big Nerd Ranch'], support: 0.25
['  Java For Dummies', '  Java: The Complete Reference'], support: 0.45
["A Beginner's Guide", '  Java: The Complete Ref

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [None]:
data = {
    "Algorithm": ["BruteApriori", "Apriori", "FPGrowth"],
    "Runtime": [bruteapriori_runtime, apriori_runtime, fpgrowth_runtime]
}

df = pd.DataFrame(data)
df_sorted = df.sort_values(by="Runtime", ascending=True)
print(df_sorted)


      Algorithm   Runtime
0  BruteApriori  0.003513
2      FPGrowth  0.013688
1       Apriori  0.016238


  and should_run_async(code)
