In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import warnings
warnings.filterwarnings('ignore')

# --- 1. Load and Pre-process Data (a) ---
try:
    df = pd.read_csv("Oder3.csv")
except FileNotFoundError:
    print("Error: Oder3.csv not found.")
    exit()

# Data Cleaning
df['Items'] = df['Items'].str.strip()
# Remove 'NONE' and empty items
df = df[df['Items'].astype(bool)]
df = df[df['Items'] != 'NONE']

# --- 2. Generate the List of Transactions (b) ---
transactions = df.groupby('TransactionNo')['Items'].apply(list).tolist()

# Convert transactions to one-hot encoded DataFrame
te = TransactionEncoder()
df_oht = pd.DataFrame(te.fit(transactions).transform(transactions), columns=te.columns_)

# --- 3. Train Apriori Algorithm (c) ---
min_support = 0.01 # 1% support
frequent_itemsets = apriori(df_oht, min_support=min_support, use_colnames=True)

min_confidence = 0.25 # 25% confidence
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

# Sort rules for analysis
rules_sorted = rules.sort_values(['lift'], ascending=False).reset_index(drop=True)

print("--- Apriori Analysis Results (Top 5 Rules by Lift) ---")
print(f"Total Rules Found: {len(rules)}")
print(rules_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head())

# --- 4. Visualize the List of Rules (d) ---
plt.figure(figsize=(10, 7))
sns.scatterplot(x="support", y="confidence", size="lift", data=rules_sorted, hue="lift", palette="viridis", sizes=(20, 400), alpha=0.7)
plt.title('Association Rules: Support vs Confidence (Size determined by Lift)')
plt.xlabel('Support (Frequency)')
plt.ylabel('Confidence (Predictive Power)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Lift')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

# Concise Interpretation Summary
if not rules_sorted.empty:
    top_rule = rules_sorted.iloc[0]
    print("\n--- Interpretation of Top Rule ---")
    print(f"IF {list(top_rule['antecedents'])} THEN {list(top_rule['consequents'])}")
    print(f"Lift: {top_rule['lift']:.2f} (Highly associated)")

ModuleNotFoundError: No module named 'mlxtend'