In [1]:
import pandas as pd
import os
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
import networkx as nx
# 加载数据集,可自行下载相关数据集https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/groceries.csv
path = os.path.join('..', 'data', '7武汉顶单货品数据.csv')
data = pd.read_csv(path, header=None)
len(data)

  data = pd.read_csv(path, header=None)


1048575

In [2]:
# 数据预处理
transactions = []
for i in range(len(data)):
    transactions.append([item for item in data.iloc[i,:] if str(item) != 'nan'])
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [None]:
# 挖掘频繁项集
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)
# 生成关联规则
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
# 分析结果
print("前10条关联规则：")
print(rules.sort_values(['lift', 'confidence'], ascending=[False, False]).head(10)[
    ['antecedents', 'consequents', 'support', 'confidence', 'lift', 'conviction']])

In [None]:
# 可视化
plt.figure(figsize=(14, 8))
G = nx.DiGraph()
# 添加节点和边
top_rules = rules.sort_values('lift', ascending=False).head(15)
for _, rule in top_rules.iterrows():
    G.add_edge(", ".join(rule['antecedents']),
               ", ".join(rule['consequents']),
               weight=rule['lift'],
               confidence=rule['confidence'])
pos = nx.spring_layout(G, k=0.5, seed=42)
node_sizes = [2000 * df[list(node.split(', '))].all(axis=1).mean() for node in G.nodes()]
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, alpha=0.7, node_color='skyblue')
nx.draw_networkx_edges(G, pos, width=[d['weight']*0.3 for (_, _, d) in G.edges(data=True)],
                       edge_color='gray', alpha=0.6, arrowstyle='->', arrowsize=15)
nx.draw_networkx_labels(G, pos, font_size=9, font_family='sans-serif')
# 添加边标签（lift值）
edge_labels = {(u, v): f"Lift: {d['weight']:.2f}\nConf: {d['confidence']:.2f}"
               for u, v, d in G.edges(data=True)}
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
plt.title("Top 15 Association Rules by Lift", fontsize=15)
plt.axis('off')
plt.tight_layout()
plt.show()