## <center> 贝叶斯优化 </center>

### 一.高斯过程

In [30]:
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets

iris = datasets.load_iris()
X, y = iris.data, iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=999)

clf = GaussianNB()
#拟合数据
clf.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [31]:
clf.score(X_test, y_test)

0.9333333333333333

### 二. sklearn 贝叶斯优化 使用案例

In [24]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization
import numpy as np

import warnings
warnings.filterwarnings("ignore")

# 产生随机分类数据集，10个特征， 2个类别
x, y = make_classification(n_samples=1000,n_features=10,n_classes=2)

In [2]:
rf = RandomForestClassifier()
print(np.mean(cross_val_score(rf, x, y, cv=20, scoring='roc_auc')))

0.988395


In [3]:
def rf_cv(n_estimators, min_samples_split, max_features, max_depth):
    val = cross_val_score(
        RandomForestClassifier(n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=min(max_features, 0.999), # float
            max_depth=int(max_depth),
            random_state=2
        ),
        x, y, scoring='roc_auc', cv=5
    ).mean()
    return val

In [21]:
rf_bo = BayesianOptimization(
        rf_cv,
        {'n_estimators': [10, 100],
        'min_samples_split': [2, 10],
        'max_features': [0.1, 0.5],
        'max_depth': [5, 10]
    }
    )

In [22]:
rf_bo.maximize()

|   iter    |  target   | max_depth | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.9885  [0m | [0m 8.997   [0m | [0m 0.2599  [0m | [0m 8.029   [0m | [0m 56.84   [0m |
| [0m 2       [0m | [0m 0.9847  [0m | [0m 5.119   [0m | [0m 0.1971  [0m | [0m 7.079   [0m | [0m 30.91   [0m |
| [0m 3       [0m | [0m 0.9881  [0m | [0m 7.042   [0m | [0m 0.108   [0m | [0m 9.42    [0m | [0m 96.77   [0m |
| [0m 4       [0m | [0m 0.9866  [0m | [0m 7.069   [0m | [0m 0.1798  [0m | [0m 3.799   [0m | [0m 67.93   [0m |
| [0m 5       [0m | [0m 0.9874  [0m | [0m 6.132   [0m | [0m 0.1444  [0m | [0m 7.82    [0m | [0m 43.33   [0m |
| [95m 6       [0m | [95m 0.991   [0m | [95m 9.209   [0m | [95m 0.339   [0m | [95m 2.502   [0m | [95m 99.88   [0m |
| [0m 7       [0m | [0m 0.991   [0m | [0m 9.555   [0m | [0m 0.3138  [0m | [0m 2.119   [0m | [0m 99.76  

In [6]:
rf_bo.max

{'target': 0.9917291809180918,
 'params': {'max_depth': 7.805137623288971,
  'max_features': 0.3695655852991595,
  'min_samples_split': 2.9263805056874252,
  'n_estimators': 249.57319688350591}}

In [8]:
for i, res in enumerate(rf_bo.res):
    print("Iteration {}: \n\t{}".format(i, res))

Iteration 0: 
	{'target': 0.9889771647164716, 'params': {'max_depth': 6.893182598099855, 'max_features': 0.5991968134251776, 'min_samples_split': 18.36382269893527, 'n_estimators': 102.47549659456831}}
Iteration 1: 
	{'target': 0.9870093309330933, 'params': {'max_depth': 8.394267888570939, 'max_features': 0.9600138277027127, 'min_samples_split': 6.089463692975234, 'n_estimators': 10.514162943770726}}
Iteration 2: 
	{'target': 0.9901545424542455, 'params': {'max_depth': 7.149097161768745, 'max_features': 0.4711225567635189, 'min_samples_split': 2.130241382853619, 'n_estimators': 249.7795342945158}}
Iteration 3: 
	{'target': 0.9896797589758975, 'params': {'max_depth': 12.196698592769122, 'max_features': 0.7101357468886657, 'min_samples_split': 24.069009748820644, 'n_estimators': 249.60070230186835}}
Iteration 4: 
	{'target': 0.9917291809180918, 'params': {'max_depth': 7.805137623288971, 'max_features': 0.3695655852991595, 'min_samples_split': 2.9263805056874252, 'n_estimators': 249.57319

In [16]:
rf_bo.probe(
    params={'n_estimators': 10,
        'min_samples_split': 2,
        'max_features': 0.5,
        'max_depth': 5
    },
    lazy=True
)

In [17]:
rf_bo.maximize(init_points=0, n_iter=0)

|   iter    |  target   | max_depth | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------------------
| [0m 15      [0m | [0m 0.9871  [0m | [0m 5.0     [0m | [0m 0.5     [0m | [0m 2.0     [0m | [0m 7.0     [0m |
| [0m 16      [0m | [0m 0.9871  [0m | [0m 5.0     [0m | [0m 0.5     [0m | [0m 2.0     [0m | [0m 7.0     [0m |
| [0m 17      [0m | [0m 0.9871  [0m | [0m 5.0     [0m | [0m 0.5     [0m | [0m 2.0     [0m | [0m 7.0     [0m |


### 三. 深度学习 贝叶斯优化使用案例

In [32]:
from NFtorch.DeepLearning import Regression
import torch
from torch import nn
from sklearn import datasets

boston = datasets.load_boston()

X, y = boston.data, boston.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19)

from sklearn.preprocessing import StandardScaler

standard = StandardScaler()
standard.fit(X_train)
X_train_standard = standard.transform(X_train)
X_test_standard = standard.transform(X_test)

In [43]:
class module_net(nn.Module):
    def __init__(self, num_input, num_hidden, num_output):
        super(module_net, self).__init__()
        self.layer1 = nn.Linear(num_input, num_hidden)

        self.layer2 = nn.ReLU()

        self.layer3 = nn.Linear(num_hidden, num_output)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

In [44]:
num_input = X_train_standard.shape[1]
num_hidden = 32
num_output = 1

In [45]:
net = module_net(num_input, num_hidden, num_output)

In [46]:
from sklearn.metrics import r2_score

# 计算 prediction
def calculate_pred(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train, X_test, y_test)
    pred = model.predict(X_test)
    return pred

# 构造符合 贝叶斯优化 的函数
def bayes_reg(dropout, weight_decay, X_train, y_train, X_test, y_test):

    # 自定义参数, 这里采用的是 “学习率衰减” 的方法，如传入list，为梯度衰减，反之为正常lr。
    reg = Regression(net, learning_rate=[1e-3, 1e-5, 1e-7], 
                     dropout=dropout, weight_decay=weight_decay,
                     epoch=2000, batch_size=128)
    
    r2 = r2_score(y_test, calculate_pred(reg, X_train, y_train, X_test, y_test))
    return r2

def optimize_reg(X_train, y_train, X_test, y_test):
    """Apply Bayesian Optimization to Random Forest parameters."""
    def reg_bayes(dropout, weight_decay):
        """Wrapper of RandomForest cross validation.

        Notice how we ensure n_estimators and min_samples_split are casted
        to integer before we pass them along. Moreover, to avoid max_features
        taking values outside the (0, 1) range, we also ensure it is capped
        accordingly.
        """
        return bayes_reg(
            dropout=dropout,
            weight_decay=weight_decay,
            X_train=X_train,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test,
        )

    optimizer = BayesianOptimization(
        f=reg_bayes,
        pbounds={
            "dropout": (0, 0.5),
            "weight_decay": (1e-6, 1e-7),
        },
        random_state=1234,
        verbose=2
    )
    optimizer.maximize(n_iter=3, init_points=2)

    print("Final result:", optimizer.max)

In [47]:
optimize_reg(X_train_standard, y_train, X_test_standard, y_test)

|   iter    |  target   |  dropout  | weight... |
-------------------------------------------------
Now we're using the CPU
The error changes within 0.01
Training... epoch: 1090, loss: 1.5363640785217285
[1;35m Testing... epoch: 1090, loss: 28.518508911132812 [0m!
Now learning rate is : 1e-05
The error changes within 0.001
Training... epoch: 1110, loss: 1.529030442237854
[1;35m Testing... epoch: 1110, loss: 28.51602554321289 [0m!
Now learning rate is : 1e-07
The error changes within 0.0001
Training... epoch: 1130, loss: 1.5289645195007324
[1;35m Testing... epoch: 1130, loss: 28.516002655029297 [0m!
The meaning of the loop is not big, stop!!
Training completed!!! Time consuming: 2.109017848968506
| [0m 1       [0m | [0m 0.7275  [0m | [0m 0.09576 [0m | [0m 4.401e-0[0m |
Now we're using the CPU
The error changes within 0.01
Training... epoch: 298, loss: 0.6897491812705994
[1;35m Testing... epoch: 298, loss: 23.770870208740234 [0m!
Now learning rate is : 1e-05
The error cha