In [86]:
import tkinter as tk
import customtkinter as ctk
from tkinter import messagebox
from tkinter import filedialog
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
import seaborn as snb
import pandas as pd
from fpdf import FPDF
import os
import mysql.connector
import warnings

In [7]:
pip install mysql-connector-python

Note: you may need to restart the kernel to use updated packages.


In [68]:
store_data = pd.read_csv("store.csv", header=None)
display(store_data.head())
print(store_data.shape)

Unnamed: 0,0
0,"MILK,BREAD,BISCUIT"
1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,"BREAD,TEA,BOURNVITA"
3,"JAM,MAGGI,BREAD,MILK"
4,"MAGGI,TEA,BISCUIT"


(20, 1)


In [104]:

def load_dataset(file_path):
    print(f"Loading dataset from: {file_path}")
    return pd.read_csv(file_path)

# Function to preprocess the data
def convert_to_boolean(df):
    # Assuming transactions are in column 0
    transactions = df.iloc[:, 0].str.split(',')
    
    # Get unique items
    unique_items = sorted(set(item for sublist in transactions for item in sublist))
    
    # Initialize a dictionary to map items to indices
    item_to_idx = {item: idx for idx, item in enumerate(unique_items)}
    
    # Initialize an empty matrix
    matrix = []
    
    # Populate the matrix
    for transaction in transactions:
        row = [0] * len(unique_items)
        for item in transaction:
            row[item_to_idx[item]] = 1
        matrix.append(row)
    
    # Return a DataFrame with the boolean matrix and columns as unique items
    return pd.DataFrame(matrix, columns=unique_items)

# Function to perform Apriori analysis
def perform_apriori(df, min_support, min_confidence):
    print(f"Performing Apriori analysis with min_support={min_support}, min_confidence={min_confidence}...")
    # Apply the apriori algorithm
    with warnings.catch_warnings():
        warnings.simplefilter(action='ignore', category=DeprecationWarning)
        frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    # Generate the association rules
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    # Filter rules by lift and length
    
    return rules

# Function to store results in MySQL
def store_results_in_mysql(rules):
    print("Storing results in MySQL...")
    try:
        conn = mysql.connector.connect(user='root', password='root', host='127.0.0.1', database='apriori')
        cursor = conn.cursor()
        
        for _, row in rules.iterrows():
            antecedents = ', '.join(list(row['antecedents']))
            consequents = ', '.join(list(row['consequents']))
            cursor.execute("INSERT INTO association_rules (antecedent, consequent, support, confidence, lift) VALUES (%s, %s, %s, %s, %s)",
                           (antecedents, consequents, row['support'], row['confidence'], row['lift']))
        
        conn.commit()
        print(f"Stored {len(rules)} rules successfully.")
    except mysql.connector.Error as err:
        print(f"Error storing rules in MySQL: {err}")
        conn.rollback()
    finally:
        cursor.close()
        conn.close()

def generate_top_confidence_pdf(rules, filename='top_confidence_rules.pdf'):
    # Step 1: Sort by confidence and select top 10 rules
    df=rules
    df['antecedents'] = df['antecedents'].apply(lambda x: ', '.join(list(x)))
    df['consequents'] = df['consequents'].apply(lambda x: ', '.join(list(x)))
    top_10_confidence = df.nlargest(10, 'confidence').reset_index(drop=True)
    
    # Step 2: Create PDF
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font('Arial', 'B', 16)
    
    # Step 3: Add title
    pdf.cell(200, 10, txt="Top 10 Association Rules by Confidence", ln=True, align='C')
    pdf.ln(10)  # Add a line break
    
    # Step 4: Add rules to PDF
    pdf.set_font('Arial', '', 12)
    for index, row in top_10_confidence.iterrows():
        antecedents = ', '.join(row['antecedents'])
        consequents = ', '.join(row['consequents'])
        confidence = row['confidence']
        support = row['support']
        lift = row['lift']
        
        # Add row number and rule text
        pdf.multi_cell(0, 10, txt=f"{index + 1}. Rule : {antecedents} -> {consequents}\nConfidence: {confidence:.2f}\nSupport: {support:.3f}\nLift: {lift:.2f}\n", align='L')
        
        pdf.ln(5)  # Add a small space between rules
        
        
    top_10_confidence = df.nlargest(10, 'confidence')

    top_10_confidence['Rule'] = top_10_confidence['antecedents'] + ' -> ' + top_10_confidence['consequents']

    
    plt.figure(figsize=(12, 6))
    plt.barh(top_10_confidence['Rule'], top_10_confidence['confidence'], color='skyblue')
    plt.xlabel('Confidence')
    plt.ylabel('Rules')
    plt.title('Top 10 Rules by Confidence')
    plt.gca().invert_yaxis()

        
        # Save the plot temporarily to a file
    temp_plot_filename = f"temp_plot.png"
    plt.savefig(temp_plot_filename, bbox_inches='tight')
    plt.close()
        
        # Embed the image in the PDF
    pdf.image(temp_plot_filename, x=15, y=None, w=180)
    pdf.ln(60)  # Add space after the chart
        
        # Remove the temporary plot file
    
    os.remove(temp_plot_filename)
    
    top_10_lift = df.nlargest(10, 'lift')


    top_10_lift['Rule'] = top_10_lift['antecedents'] + ' -> ' + top_10_lift['consequents']
    
    
    plt.figure(figsize=(12, 6))
    plt.barh(top_10_lift['Rule'], top_10_lift['lift'], color='skyblue')
    plt.xlabel('lift')
    plt.ylabel('Rules')
    plt.title('Top 10 Rules by lift')
    plt.gca().invert_yaxis()

        
        # Save the plot temporarily to a file
    temp_plot = f"temp.png"
    plt.savefig(temp_plot, bbox_inches='tight')
    plt.close()
        
        # Embed the image in the PDF
    pdf.image(temp_plot, x=15, y=None, w=180)
    pdf.ln(60)  # Add space after the chart
        
        # Remove the temporary plot file
    os.remove(temp_plot)

       
    # Step 5: Output PDF
    pdf.output(filename)

# Example usage:

# Function to handle the button click

def on_analyze_click():
    file_path = filedialog.askopenfilename()
    if file_path:
        print("File selected:", file_path)
        df = load_dataset(file_path)
        df_pivot = convert_to_boolean(df)  # Changed from preprocess_data to convert_to_boolean
        
        try:
            # Retrieve and convert min_support and min_confidence from entries
            min_support = float(entry_support.get())
            min_confidence = float(entry_confidence.get())
            
            print(f"min_support: {min_support}, min_confidence: {min_confidence}")
            
            # Check if support and confidence values are within valid range
            if min_support <= 0 or min_confidence <= 0 or min_confidence > 1:
                raise ValueError("Support and confidence values must be greater than 0 and confidence must be <= 1.")
            
            rules = perform_apriori(df_pivot, min_support, min_confidence)
            print(f"Found {len(rules)} rules")
            
            print(rules)  # Print rules for debugging
            
            # Optionally, store results in MySQL
            store_results_in_mysql(rules)
            generate_top_confidence_pdf(rules)
            
            messagebox.showinfo("Success", f"Analysis complete. Found {len(rules)} rules.")
        except ValueError as e:
            print("Error:", e)
            messagebox.showerror("Input Error", str(e))
    else:
        print("No file selected.")
        messagebox.showerror("File Error", "Please select a valid file.")
        
# Create the Tkinter window
root = tk.Tk()
root.title("Market Basket Analyzer Application")
root.geometry("800x800")

# Configure CustomTkinter
ctk.set_appearance_mode("System")
ctk.set_default_color_theme("blue")

frame = ctk.CTkFrame(master=root)
frame.pack(pady=20, padx=20, fill="both", expand=True)

label = ctk.CTkLabel(master=frame, text="Market Basket Analyzer", font=("Helvetica", 24),text_color="orange")
label.pack(pady=10)
colors = ["#E6E6FA", "#D8BFD8", "#DDA0DD", "#EE82EE", "#DA70D6", 
          "#BA55D3", "#9932CC", "#9400D3", "#8A2BE2", "#800080", 
          "#9370DB", "#7B68EE", "#6A5ACD", "#483D8B", "#4B0082"]
color_index = 0

# Function to animate the text color
def animate_text_color():
    global color_index
    label.configure(text_color=colors[color_index])
    color_index = (color_index + 1) % len(colors)
    root.after(500, animate_text_color)

# Start the color animation
animate_text_color()
label_support = ctk.CTkLabel(master=frame, text="Enter Minimum Support:")
label_support.pack(pady=5)
entry_support = ctk.CTkEntry(master=frame)
entry_support.pack(pady=5)

label_confidence = ctk.CTkLabel(master=frame, text="Enter Minimum Confidence:")
label_confidence.pack(pady=5)
entry_confidence = ctk.CTkEntry(master=frame)
entry_confidence.pack(pady=5)

button_analyze = ctk.CTkButton(master=frame, text="Analyze", command=on_analyze_click)
button_analyze.pack(pady=20)

root.mainloop()