In [1]:
# ! pip install prettytable

In [2]:
import time
from prettytable import PrettyTable

In [3]:
def generate_candidates(itemset, length):
    candidates = []
    for i in range(len(itemset)):
        for j in range(i + 1, len(itemset)):
            union_set = set(itemset[i]).union(set(itemset[j]))
            if len(union_set) == length:
                candidates.append(list(union_set))
    return candidates

In [4]:
def get_support(transactions, itemset):
    count = 0
    for transaction in transactions:
        if set(itemset).issubset(set(transaction)):
            count += 1
    return count, count / len(transactions) * 100  # Return both absolute count and percentage

In [5]:
def apriori(transactions, min_support_percentage):
    start_time = time.time()  # Start timing
    items = []
    for transaction in transactions:
        for item in transaction:
            if [item] not in items:
                items.append([item])
    
    k = 1
    frequent_itemsets = []
    total_frequent_count = 0

    while len(items) != 0:
        print(f"\nLevel {k} Candidate Itemsets:")
        print(f"Candidates: {items}")
        
        support_count = {}
        for item in items:
            abs_count, support = get_support(transactions, item)
            if support >= min_support_percentage:
                support_count[tuple(sorted(item))] = (abs_count, support)

        if support_count:
            frequent_itemsets.append(support_count)
            level_count = len(support_count)
            total_frequent_count += level_count
            
            table = PrettyTable()
            table.field_names = ["Itemset", "Absolute Support", "Support (%)"]
            for itemset, (abs_count, support) in support_count.items():
                table.add_row([list(itemset), abs_count, f"{support:.2f}"])
                
            print(f"\nLevel {k} Frequent Itemsets:")
            
            print(table)
        else:
            break

        items = generate_candidates([list(k) for k in support_count.keys()], k + 1)
        k += 1

    end_time = time.time()  # End timing
    print("\n=== Summary ===")
    print(f"Total frequent itemsets found: {total_frequent_count}")
    print(f"Computation time: {end_time - start_time:.2f} seconds")

    return frequent_itemsets

In [6]:
transactions = []
with open('a.txt', 'r') as file:
    for line in file:
        transaction = line.strip().split()
        transactions.append(transaction)

In [7]:
print(f"Total number of transactions: {len(transactions)}")

Total number of transactions: 9


In [8]:
min_support = 20 # Ekhane support percentage set korte hbe, realtive ta
frequent_sets = apriori(transactions, min_support)

for i, itemsets in enumerate(frequent_sets):
    print(f"\nFrequent {i+1}-itemsets:")
    table = PrettyTable()
    table.field_names = ["Itemset", "Absolute Support", "Support (%)"]
    for itemset, (abs_count, support) in itemsets.items():
        table.add_row([list(itemset), abs_count, f"{support:.2f}"])
    print(table)


Level 1 Candidate Itemsets:
Candidates: [['1'], ['2'], ['5'], ['4'], ['3']]

Level 1 Frequent Itemsets:
+---------+------------------+-------------+
| Itemset | Absolute Support | Support (%) |
+---------+------------------+-------------+
|  ['1']  |        6         |    66.67    |
|  ['2']  |        7         |    77.78    |
|  ['5']  |        2         |    22.22    |
|  ['4']  |        2         |    22.22    |
|  ['3']  |        6         |    66.67    |
+---------+------------------+-------------+

Level 2 Candidate Itemsets:
Candidates: [['1', '2'], ['1', '5'], ['1', '4'], ['1', '3'], ['2', '5'], ['4', '2'], ['2', '3'], ['4', '5'], ['3', '5'], ['4', '3']]

Level 2 Frequent Itemsets:
+------------+------------------+-------------+
|  Itemset   | Absolute Support | Support (%) |
+------------+------------------+-------------+
| ['1', '2'] |        4         |    44.44    |
| ['1', '5'] |        2         |    22.22    |
| ['1', '3'] |        4         |    44.44    |
| ['2', '5']