In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules


In [2]:
def load_data(filepath):
    df = pd.read_excel(filepath)
    print("Dataset Loaded Successfully")
    print(df.head())
    return df

In [3]:
# Function to preprocess the data
def preprocess_data(df):
    #"""Convert dataset into a format suitable for the Apriori algorithm"""
    # Split Product_Names and Quantities_Sold into lists for each transaction
    transactions = []
    for _, row in df.iterrows():
        products = row['Product_Names'].split(', ')
        quantities = list(map(int, row['Quantities_Sold'].split(', ')))
        for product, quantity in zip(products, quantities):
            if quantity > 0:
                transactions.append((row['Invoice_ID'], product, quantity))

    # Convert the transactions into a DataFrame
    transaction_df = pd.DataFrame(transactions, columns=['Invoice_ID', 'Item', 'Quantity'])

    # Pivot to get basket format
    basket = transaction_df.pivot_table(index='Invoice_ID', columns='Item', values='Quantity', fill_value=0)
    basket = basket.applymap(lambda x: 1 if x > 0 else 0)
    print("Data Preprocessing Complete")
    return basket

In [4]:
# Function to apply the Apriori algorithm
def apply_apriori(basket, min_support=0.05):
    #"""Apply Apriori algorithm to find frequent itemsets"""
    frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)
    print("Apriori Algorithm Applied Successfully")
    return frequent_itemsets

In [5]:
# Function to generate and display association rules
def generate_rules(frequent_itemsets, metric='lift', min_threshold=1.0):
    #"""Generate and display association rules"""
    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
    print("Association Rules Generated")
    return rules


In [6]:
# Main execution
if __name__ == "__main__":
    filepath = r"C:\Users\asus\Downloads\SD.xlsx"
    df = load_data(filepath)
    basket = preprocess_data(df)
    frequent_itemsets = apply_apriori(basket, min_support=0.05)
    print(frequent_itemsets)
    
    frequent_itemsets.to_csv("A-Priori", index=False)

Dataset Loaded Successfully
  Invoice_ID           Date_Time Customer_ID  \
0  INV347874 2022-03-21 05:00:00    CUST5385   
1  INV567275 2022-02-18 03:00:00    CUST3686   
2  INV286043 2022-02-07 22:00:00    CUST2151   
3  INV924732 2022-03-12 05:00:00    CUST8245   
4  INV595003 2022-01-06 10:00:00    CUST4048   

                                         Product_IDs  \
0  PROD74, PROD33, PROD44, PROD51, PROD67, PROD30...   
1             PROD100, PROD70, PROD3, PROD73, PROD52   
2     PROD52, PROD54, PROD94, PROD86, PROD50, PROD18   
3  PROD29, PROD31, PROD46, PROD86, PROD14, PROD75...   
4     PROD94, PROD89, PROD34, PROD82, PROD43, PROD83   

                                       Product_Names  \
0  Shampoo, Yogurt, Chips, Carrot, Apple, Soda, Milk   
1                Carrot, Yogurt, Apple, Juice, Chips   
2      Juice, Carrot, Soda, Yogurt, Cookies, Shampoo   
3  Apple, Juice, Carrot, Yogurt, Milk, Cookies, S...   
4        Apple, Shampoo, Chips, Cookies, Juice, Soda   

         

  basket = basket.applymap(lambda x: 1 if x > 0 else 0)
