In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

def run_eclat(data, min_support=0.05, max_length=10):
    """
    Runs the ECLAT algorithm on the specified DataFrame of transaction data and returns a DataFrame of frequent itemsets
    that satisfy the minimum support and maximum length criteria.

    Parameters:
    data (pandas.DataFrame): The DataFrame of transaction data to analyze.
    min_support (float): The minimum support threshold for frequent itemset mining. Default is 0.05.
    max_length (int): The maximum length of the frequent itemsets. Default is 3.

    Returns:
    pandas.DataFrame: A DataFrame of frequent itemsets that satisfy the specified criteria.

    Raises:
    TypeError: If the data parameter is not a pandas DataFrame.
    ValueError: If the min_support parameter is not a float between 0 and 1, or if the
                max_length parameter is not a positive integer.
    """

    # Validate input parameters
    if not isinstance(data, pd.DataFrame):
        raise TypeError("data parameter must be a pandas DataFrame")

    if not isinstance(min_support, float) or not 0 <= min_support <= 1:
        raise ValueError("min_support parameter must be a float between 0 and 1")

    if not isinstance(max_length, int) or max_length <= 0:
        raise ValueError("max_length parameter must be a positive integer")

    # Convert data to a list of transactions
    transactions = [[str(data.values[i,j]) for j in range(data.shape[1])] for i in range(data.shape[0])]

    # Convert transactions to a binary-encoded DataFrame
    te = TransactionEncoder()
    te_ary = te.fit_transform(transactions)
    df_transactions = pd.DataFrame(te_ary, columns=te.columns_)

    # Run ECLAT algorithm
    frequent_itemsets = apriori(df_transactions, min_support=min_support, max_len=max_length, use_colnames=True)

    # Organize results into a DataFrame
    results = pd.DataFrame({
        'Itemset': frequent_itemsets['itemsets'],
        'Support': frequent_itemsets['support']
    })

    return results


In [20]:
df = pd.read_csv('/home/young78703/Data_Science_Project/data/Market_Basket_Optimisation.csv',header=None)
run_eclat(df, min_support=0.05, max_length=5)

Unnamed: 0,Itemset,Support
0,(burgers),0.087188
1,(cake),0.081056
2,(chicken),0.059992
3,(chocolate),0.163845
4,(cookies),0.080389
5,(cooking oil),0.05106
6,(eggs),0.179709
7,(escalope),0.079323
8,(french fries),0.170911
9,(frozen smoothie),0.063325
