In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import BaggingClassifier

#importing DCS techniques from DESlib
from deslib.dcs.ola import OLA
from deslib.dcs.a_priori import APriori
from deslib.dcs.mcb import MCB

#import DES techniques from DESlib
from deslib.des.des_p import DESP
from deslib.des.knora_u import KNORAU
from deslib.des.knora_e import KNORAE
from deslib.des.meta_des import METADES

#### 处理数据集
分为三部分，训练集、模型动态选择集、测试集

In [4]:
data = load_breast_cancer()
X = data.data
y = data.target
# split the data into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Scale the variables to have 0 mean and unit variance
scalar = StandardScaler() # 很关键
X_train = scalar.fit_transform(X_train) 
X_test = scalar.transform(X_test)  # 用训练数据tranform测试数据

# Split the data into training and DSEL for DS techniques
X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5)

#### 训练基础分类器集合

In [5]:
# 标签类别需要满足从0到L-1（L为类别总数），不满足条件的可以借助LabelEncoder
model = CalibratedClassifierCV(Perceptron(max_iter=10))
# Train a pool of 10 classifiers
pool_classifiers = BaggingClassifier(model, n_estimators=10)
pool_classifiers.fit(X_train, y_train)

BaggingClassifier(base_estimator=CalibratedClassifierCV(base_estimator=Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      max_iter=10, n_iter=None, n_jobs=1, penalty=None, random_state=0,
      shuffle=True, tol=None, verbose=0, warm_start=False),
            cv=3, method='sigmoid'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

#### 构建DS模型
唯一参数是基础训练器集合

In [6]:
# DCS techniques
ola = OLA(pool_classifiers)
mcb = MCB(pool_classifiers)
apriori = APriori(pool_classifiers)

# DES techniques
knorau = KNORAU(pool_classifiers)
kne = KNORAE(pool_classifiers)
desp = DESP(pool_classifiers)
meta = METADES(pool_classifiers)

#### DS模型拟合数据

In [7]:
knorau.fit(X_dsel, y_dsel)
kne.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)
apriori.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)

<deslib.des.meta_des.METADES at 0x1a666786828>

#### 预估准确率

In [8]:
print('Classification accuracy OLA: ', ola.score(X_test, y_test))
print('Classification accuracy A priori: ', apriori.score(X_test, y_test))
print('Classification accuracy KNORA-Union: ', knorau.score(X_test, y_test))
print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test))
print('Classification accuracy DESP: ', desp.score(X_test, y_test))
print('Classification accuracy METADES: ', meta.score(X_test, y_test))
print('Classification accuracy MCB: ', mcb.score(X_test, y_test))

Classification accuracy OLA:  0.9473684210526315
Classification accuracy A priori:  0.9473684210526315
Classification accuracy KNORA-Union:  0.9736842105263158
Classification accuracy KNORA-Eliminate:  0.9649122807017544
Classification accuracy DESP:  0.9736842105263158
Classification accuracy METADES:  0.9736842105263158
Classification accuracy MCB:  0.9649122807017544


#### 轻微调参-0-
以METADES为例

In [9]:
# meta2 = METADES(pool_classifiers,k=5) #指定k时出现ValueError: shapes (110,17) and (21,2) not aligned
meta2 = METADES(pool_classifiers, Hc=0.8, k=5, mode='hybrid')
meta2.fit(X_dsel, y_dsel)
print('Classification accuracy METADES: ', meta2.score(X_test, y_test))

ValueError: shapes (80,17) and (21,2) not aligned: 17 (dim 1) != 21 (dim 0)

In [33]:
fire_mcb = MCB(pool_classifiers, DFP=True, safe_k=7)
fire_mcb.fit(X_dsel,y_dsel)
print(fire_mcb.score(X_test,y_test))
print(mcb.score(X_test,y_test))

0.9736842105263158
0.956140350877193


In [34]:
import platform; print(platform.platform())
import sys; print("Python", sys.version)
import numpy; print("NumPy", numpy.__version__)
import scipy; print("SciPy", scipy.__version__)
import sklearn; print("Scikit-Learn", sklearn.__version__)

Windows-10-10.0.17134-SP0
Python 3.6.5 |Anaconda, Inc.| (default, Mar 29 2018, 13:32:41) [MSC v.1900 64 bit (AMD64)]
NumPy 1.14.5
SciPy 1.1.0
Scikit-Learn 0.19.1


In [1]:
from deslib.dcs.ola import OLA
from deslib.dcs.mcb import MCB
from deslib.des.des_p import DESP
from deslib.des.knora_u import KNORAU
from deslib.des.meta_des import METADES
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Example of a des techniques
from deslib.des.knora_e import KNORAE

# Generate a classification dataset
data = load_breast_cancer()
X = data.data
y = data.target
# split the data into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
RF = RandomForestClassifier()
RF.fit(X_train, y_train)

X_train, X_dsel, y_train, y_dsel = train_test_split(X, y, test_size=0.5)

# Training a random forest to be used as the pool of classifiers. We set the maximum depth of the tree so that it
# can estimate probabilities
pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5)
pool_classifiers.fit(X_train, y_train)

# Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors)
knorau = KNORAU(pool_classifiers)
kne = KNORAE(pool_classifiers)
desp = DESP(pool_classifiers)
ola = OLA(pool_classifiers)
mcb = MCB(pool_classifiers)
meta = METADES(pool_classifiers)

# Fit the DS techniques
knorau.fit(X_dsel, y_dsel)
kne.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)

# Calculate classification accuracy of each technique
print('Classification accuracy RF: ', RF.score(X_test, y_test))
print('Evaluating DS techniques:')
print('Classification accuracy KNORAU: ', knorau.score(X_test, y_test))
print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test))
print('Classification accuracy DESP: ', desp.score(X_test, y_test))
print('Classification accuracy OLA: ', ola.score(X_test, y_test))
print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
print('Classification accuracy META-DES: ', meta.score(X_test, y_test))


Classification accuracy RF:  0.9440559440559441
Evaluating DS techniques:
Classification accuracy KNORAU:  0.993006993006993
Classification accuracy KNORA-Eliminate:  1.0
Classification accuracy DESP:  0.986013986013986
Classification accuracy OLA:  0.9790209790209791
Classification accuracy MCB:  0.986013986013986
Classification accuracy META-DES:  0.993006993006993


In [12]:
data = load_breast_cancer()
X = data.data
y = data.target
# split the data into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
RF = RandomForestClassifier()
RF.fit(X_train, y_train)

X_train, X_dsel, y_train, y_dsel = train_test_split(X, y, test_size=0.50)

# Training a random forest to be used as the pool of classifiers. We set the maximum depth of the tree so that it
# can estimate probabilities
pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5)
pool_classifiers.fit(X_train, y_train)

# Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors)
knorau = KNORAU(pool_classifiers,k=5)
kne = KNORAE(pool_classifiers, k=5)
desp = DESP(pool_classifiers, k=5)
ola = OLA(pool_classifiers, k=5)
mcb = MCB(pool_classifiers, k=5)
meta = METADES(pool_classifiers)

# Fit the DS techniques
knorau.fit(X_dsel, y_dsel)
kne.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)

# Calculate classification accuracy of each technique
print('Classification accuracy RF: ', RF.score(X_test, y_test))
print('Evaluating DS techniques:')
print('Classification accuracy KNORAU: ', knorau.score(X_test, y_test))
print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test))
print('Classification accuracy DESP: ', desp.score(X_test, y_test))
print('Classification accuracy OLA: ', ola.score(X_test, y_test))
print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
print('Classification accuracy META-DES: ', meta.score(X_test, y_test))

Classification accuracy RF:  0.8881118881118881
Evaluating DS techniques:
Classification accuracy KNORAU:  0.9440559440559441
Classification accuracy KNORA-Eliminate:  0.993006993006993
Classification accuracy DESP:  0.9440559440559441
Classification accuracy OLA:  0.951048951048951
Classification accuracy MCB:  0.9790209790209791
Classification accuracy META-DES:  0.993006993006993


#### 不同的base learner
结果都很糟糕
最好的步骤是：
- 先用gridsearch，cv找单个模型的最好参数
- 训练几个表现不错的base learner之后再用stacking方法

In [26]:
'''
In this example we show that the framework can also be used using different classifier models in the pool
of classifiers. Such pool of classifiers are called Heterogeneous.
Here we consider a pool of classifiers composed of a Gaussian Naive Bayes, Perceptron, k-NN, Decision tree
Linear SVM and Gaussian SVM
'''

# Importing dynamic selection techniques:
from deslib.dcs.a_posteriori import APosteriori
from deslib.dcs.mcb import MCB
from deslib.dcs.lca import LCA
from deslib.des.probabilistic import RRC
from deslib.des.knop import KNOP
from deslib.des.knora_e import KNORAE

# Base classifier models:
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.calibration import CalibratedClassifierCV

# Importing dataset
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler



# Generate a classification dataset
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
# Scale the variables to have 0 mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Split the data into training and DSEL for DS techniques
X_train, X_dsel, y_train, y_dsel = train_test_split(X, y, test_size=0.5)

# 基础学习器都太弱了，不是全0就是全1
# STACKING有用的基础是，base learner表现好而不同
model_perceptron = CalibratedClassifierCV(Perceptron(max_iter=100)).fit(X_train, y_train)
model_linear_svm = CalibratedClassifierCV(LinearSVC()).fit(X_train, y_train)
model_svc = SVC(probability=True).fit(X_train, y_train)
model_bayes = GaussianNB().fit(X_train, y_train)
model_tree = DecisionTreeClassifier().fit(X_train, y_train)
model_knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)
pool_classifiers = [model_perceptron, model_linear_svm, model_svc, model_bayes, model_tree, model_knn]

# Initializing the DS techniques
knop = KNOP(pool_classifiers)
rrc = RRC(pool_classifiers)
lca = LCA(pool_classifiers)
mcb = MCB(pool_classifiers)
aposteriori = APosteriori(pool_classifiers)

# Fitting the techniques
knop.fit(X_dsel, y_dsel)
rrc.fit(X_dsel, y_dsel)
lca.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)
aposteriori.fit(X_dsel, y_dsel)
# print("base learners' score:")
# print('bayes:',model_bayes.score(X_test,y_test))
# print('knn:',model_knn.score(X_test,y_test))
# print('Linear SVM:',model_linear_svm.score(X_test,y_test))
# print('perceptron:',model_perceptron.score(X_test,y_test))
# print('decision tree:',model_tree.score(X_test,y_test))
# print('SVC:',model_svc.score(X_test,y_test))


# Calculate classification accuracy of each technique
# print('Evaluating DS techniques:')
# print('Classification accuracy KNOP: ', knop.score(X_test, y_test))
# print('Classification accuracy RRC: ', rrc.score(X_test, y_test))
# print('Classification accuracy LCA: ', lca.score(X_test, y_test))
# print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
# print('Classification accuracy A posteriori: ', aposteriori.score(X_test, y_test))

model_linear_svm.predict(X_test)

  TEP_minus_T1P = P * (T * E - T1)
  TEP_minus_T1P = P * (T * E - T1)
  TEP_minus_T1P = P * (T * E - T1)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [20]:
X_test[0].shape

(30,)

In [27]:
# model_perceptron, model_linear_svm, model_svc, model_bayes, model_tree, model_knn
# 试一下Voting
from sklearn.ensemble import VotingClassifier
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
model_perceptron = CalibratedClassifierCV(Perceptron(max_iter=100)).fit(X_train, y_train)
model_linear_svm = CalibratedClassifierCV(LinearSVC()).fit(X_train, y_train)
model_svc = SVC(probability=True).fit(X_train, y_train)
model_bayes = GaussianNB().fit(X_train, y_train)
model_tree = DecisionTreeClassifier().fit(X_train, y_train)
model_knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)

voting_clf = VotingClassifier( estimators=[('p', model_perceptron), 
                                           ('svm', model_linear_svm), 
                                           ('svc', model_svc),
                                           ('bayes',model_bayes),
                                           ('decision tree',model_tree),
                                           ('knn',model_knn)
                                          ], voting='hard')
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test,y_test)

  TEP_minus_T1P = P * (T * E - T1)
  TEP_minus_T1P = P * (T * E - T1)
  TEP_minus_T1P = P * (T * E - T1)
  TEP_minus_T1P = P * (T * E - T1)
  TEP_minus_T1P = P * (T * E - T1)
  TEP_minus_T1P = P * (T * E - T1)
  if diff:


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])