In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs

np.random.seed(42)

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes',labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

PROJECT_ROOT_DIR="."
CHAPTER_ID="svm"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [2]:
import pandas as pd

file_path="D:/study/ml/数据/ae信号/dfnc_sheet3.xlsx"
df=pd.read_excel(file_path)

In [6]:
y=df['BQ']
df2=df.drop(['BQ','LL','LL1','LL2','LL3','LL4'],axis=1)
df2=np.array(df2)

In [7]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
X=df2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(max_depth=3,max_features=1,n_estimators=100,min_samples_split=6, n_jobs=-1, random_state=42)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred_rf))

0.9937106918238994


In [9]:
from sklearn.model_selection import KFold

kf = KFold(5, True, 42)

X, Y = X_train, y_train
# for train_index, test_index in kf.split(X):
#     print('训练集:{}'.format(train_index))
#     print('测试集:{}'.format(test_index))
 



In [10]:
from sklearn.model_selection import cross_val_score

socres=cross_val_score(rnd_clf,X_train,y_train,cv=kf)

In [11]:
def display_scores(scores):
    print("scores:",scores)
    print("mean:",scores.mean())
    print("strandard deviation:",socres.std())
    
display_scores(socres)

scores: [1. 1. 1. 1. 1.]
mean: 1.0
strandard deviation: 0.0


In [12]:
rnd_clf.score(X_test,y_test)

0.9937106918238994

In [13]:
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV,cross_val_score

clf = RandomForestClassifier(n_estimators=100)

# 给定参数搜索范围：list or distribution
param_dist = {"max_depth": [3, 20],                     #给定list
              "max_features": sp_randint(1,11),          #给定distribution
              "min_samples_split": sp_randint(2, 11),     #给定distribution
              "bootstrap": [True, False],                 #给定list
              "criterion": ["gini", "entropy"],
              "n_estimators":[1,100]}           #给定list

# 用RandomSearch+CV选取超参数
n_iter_search = 20
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search, cv=5, iid=False)
random_search.fit(X_train, y_train)

Traceback (most recent call last):
  File "D:\study\anaconda\envs\Test\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "D:\study\anaconda\envs\Test\lib\site-packages\sklearn\ensemble\_forest.py", line 386, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "D:\study\anaconda\envs\Test\lib\site-packages\joblib\parallel.py", line 1048, in __call__
    if self.dispatch_one_batch(iterator):
  File "D:\study\anaconda\envs\Test\lib\site-packages\joblib\parallel.py", line 866, in dispatch_one_batch
    self._dispatch(tasks)
  File "D:\study\anaconda\envs\Test\lib\site-packages\joblib\parallel.py", line 784, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "D:\study\anaconda\envs\Test\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "D:\study\anaconda\envs\Test\lib\site-packages\jo

RandomizedSearchCV(cv=5, estimator=RandomForestClassifier(), iid=False,
                   n_iter=20,
                   param_distributions={'bootstrap': [True, False],
                                        'criterion': ['gini', 'entropy'],
                                        'max_depth': [3, 20],
                                        'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002331B3FB340>,
                                        'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002334C80C1F0>,
                                        'n_estimators': [1, 100]})

In [14]:

best_estimator = random_search.best_estimator_
print(best_estimator)
#输出最优训练器的精度
print(random_search.best_score_)
print(random_search.score(X_test,y_test))

RandomForestClassifier(bootstrap=False, max_depth=20, max_features=2,
                       min_samples_split=10)
1.0
0.9937106918238994


In [18]:
random_search.predict([[1,2]])

array([0], dtype=int64)