In [1]:
from sklearn import datasets  
  
iris = datasets.load_iris()  
X, y = iris.data[:, 1:3], iris.target  
  
from sklearn import model_selection  
from sklearn.linear_model import LogisticRegression  
from sklearn.neighbors import KNeighborsClassifier  
from sklearn.naive_bayes import GaussianNB   
from sklearn.ensemble import RandomForestClassifier  
from mlxtend.classifier import StackingClassifier  
import numpy as np  
  
clf1 = KNeighborsClassifier(n_neighbors=1)  
clf2 = RandomForestClassifier(random_state=1)  
clf3 = GaussianNB()  
lr = LogisticRegression()  
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],   
                          meta_classifier=lr)  
  
print('3-fold cross validation:\n') 
for clf, label in zip([clf1, clf2, clf3, sclf],   
                      ['KNN',   
                       'Random Forest',   
                       'Naive Bayes',  
                       'StackingClassifier']):  
  
    scores = model_selection.cross_val_score(clf, X, y,   
                                              cv=3, scoring='accuracy')  
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" 
          % (scores.mean(), scores.std(), label))  

3-fold cross validation:

Accuracy: 0.91 (+/- 0.01) [KNN]
Accuracy: 0.95 (+/- 0.01) [Random Forest]
Accuracy: 0.91 (+/- 0.02) [Naive Bayes]
Accuracy: 0.95 (+/- 0.02) [StackingClassifier]


In [2]:
import pandas as  pd
from mlxtend.preprocessing import TransactionEncoder   #类似文本分析库，将数据库项转为数字编码

#组合测试数据
shopping_records =  [['linux操作系统实践', '线性代数', 'python基础', '机器学习实训', 'spark大数据分析'],
           ['线性代数', 'python进阶', 'spark大数据分析', '机器学习实训', 'git教程'],
           ['linux操作系统实践', 'git教程', 'python进阶', '机器学习实训'],
           ['linux操作系统实践', 'scikit-learn 机器学习', '操作系统', 'python进阶', 'spark大数据分析'],
           ['scikit-learn 机器学习', '线性代数', 'python进阶', '计算机网络', '机器学习实训']] 
#转为DataFrame，以便做清洗和处理
shopping_df = pd.DataFrame(shopping_records)   
#转为数组
def deal(data):
    return data.dropna().tolist()
df_arr = shopping_df.apply(deal,axis=1).tolist()



In [3]:
from mlxtend.preprocessing import TransactionEncoder    # 可以用这种方法来转换为bool值，也可以用函数转换为0、1
 
te = TransactionEncoder()   # 定义模型
df_data = te.fit_transform(df_arr)
data = pd.DataFrame(df_data,columns=te.columns_) 

In [4]:
from mlxtend.frequent_patterns import apriori
 
frequent_itemsets = apriori(data,min_support=0.3,use_colnames=True)    # use_colnames=True表示使用元素名字，默认的False使用列名代表元素
#frequent_itemsets = apriori(df,min_support=0.05)
frequent_itemsets.sort_values(by='support',ascending=False,inplace=True)# 频繁项集可以按支持度排序

In [5]:
from mlxtend.frequent_patterns import association_rules
 
association_rule = association_rules(frequent_itemsets,metric='confidence',min_threshold=0.9)   # metric可以有很多的度量选项，返回的表列名都可以作为参数
association_rule.sort_values(by='leverage',ascending=False,inplace=True)    #关联规则可以按leverage排序
print(association_rule)

            antecedents         consequents  antecedent support  \
4               (git教程)  (python进阶, 机器学习实训)                 0.4   
9  (spark大数据分析, 机器学习实训)              (线性代数)                 0.4   
0                (线性代数)            (机器学习实训)                 0.6   
1      (python进阶, 线性代数)            (机器学习实训)                 0.4   
2     (python进阶, git教程)            (机器学习实训)                 0.4   
3       (机器学习实训, git教程)          (python进阶)                 0.4   
5   (scikit-learn 机器学习)          (python进阶)                 0.4   
6               (git教程)            (机器学习实训)                 0.4   
7               (git教程)          (python进阶)                 0.4   
8    (spark大数据分析, 线性代数)            (机器学习实训)                 0.4   

   consequent support  support  confidence      lift  leverage  conviction  
4                 0.6      0.4         1.0  1.666667      0.16         inf  
9                 0.6      0.4         1.0  1.666667      0.16         inf  
0                 0.8      0.6 

In [6]:
from mlxtend.frequent_patterns import apriori, fpgrowth
import datetime 

print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
print(datetime.datetime.today())
start = datetime.datetime.now()
frequent_itemsets1 = apriori(data,min_support=0.4,use_colnames=True) 
frequent_itemsets1.sort_values(by='support',ascending=False,inplace=True)# 频繁项集可以按支持度排序
association_rule1 = association_rules(frequent_itemsets1,metric='confidence',min_threshold=0.9)   # metric可以有很多的度量选项，返回的表列名都可以作为参数
association_rule1.sort_values(by='leverage',ascending=False,inplace=True)    #关联规则可以按leverage排序
print(association_rule1)
end = datetime.datetime.now()
print("程序运行时间："+str((end-start).seconds)+"秒")

print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
print(datetime.datetime.today())
start = datetime.datetime.now()
frequent_itemsets2 = fpgrowth(data,min_support=0.4 ,use_colnames=True) 
frequent_itemsets2.sort_values(by='support',ascending=False,inplace=True)# 频繁项集可以按支持度排序
association_rule2 = association_rules(frequent_itemsets2,metric='confidence',min_threshold=0.9)   # metric可以有很多的度量选项，返回的表列名都可以作为参数
association_rule2.sort_values(by='leverage',ascending=False,inplace=True)    #关联规则可以按leverage排序
print(association_rule2)
end = datetime.datetime.now()
print("程序运行时间："+str((end-start).seconds)+"秒")

2020-04-13 18:28:40
2020-04-13 18:28:40.177321
            antecedents         consequents  antecedent support  \
4               (git教程)  (python进阶, 机器学习实训)                 0.4   
9  (spark大数据分析, 机器学习实训)              (线性代数)                 0.4   
0                (线性代数)            (机器学习实训)                 0.6   
1      (python进阶, 线性代数)            (机器学习实训)                 0.4   
2     (python进阶, git教程)            (机器学习实训)                 0.4   
3       (机器学习实训, git教程)          (python进阶)                 0.4   
5   (scikit-learn 机器学习)          (python进阶)                 0.4   
6               (git教程)            (机器学习实训)                 0.4   
7               (git教程)          (python进阶)                 0.4   
8    (spark大数据分析, 线性代数)            (机器学习实训)                 0.4   

   consequent support  support  confidence      lift  leverage  conviction  
4                 0.6      0.4         1.0  1.666667      0.16         inf  
9                 0.6      0.4         1.0  1.666667      0.1