In [1]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
from GPmodel import GaussianProcess
from singlemes import MaxvalueEntropySearch
from scipy.optimize import minimize as scipyminimize
from platypus import NSGAII, Problem, Real
import sobol_seq
# from pygmo import hypervolume

In [2]:
path='../'
# from benchmark_functions import branin,Currin
# functions=[branin,Currin]

# 输入维度
d = 9

# 目标
inputs = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9']
functions = ['zdt2_y1', 'zdt2_y2']

# 随机数确定
seed=0
np.random.seed(seed)

# X的边界
bound=[0,1]
Fun_bounds = [bound]*d

# 计算采集函数时使用的采样数量
sample_number=32

# 取新点时的采样数量
MC_sample = 2048

bounds = np.array([[0., 0., 0., 0., 0., 0., 0., 0., 0.],
                       [1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [44]:
def opt(filename, batch_size, log, output):

    # 读取数据
    df = pd.read_csv(path + filename + '.csv')

    initial_x = np.array(df[inputs]) 
    initial_y = np.array(df[functions])
    initial_x = (initial_x - bounds[0])/(bounds[1]-bounds[0])
    # initial_y = (initial_y - initial_y.mean(axis=0) )/initial_y.std(axis=0)
    
    GPs=[]
    Multiplemes=[]
    for i in range(len(functions)):
        GPs.append(GaussianProcess(d))

    for i in range(len(functions)):
        for j in range(len(initial_x)):
            x_ = initial_x[j]
            y_ = initial_y[j]
            GPs[i].addSample(np.asarray(x_),y_[i])

    for i in range(len(functions)):   
        GPs[i].fitModel()
        Multiplemes.append(MaxvalueEntropySearch(GPs[i]))
        
    for i in range(len(functions)):
        Multiplemes[i]=MaxvalueEntropySearch(GPs[i])
        Multiplemes[i].Sampling_RFM()
    max_samples=[]
    for j in range(sample_number):
        for i in range(len(functions)):
            Multiplemes[i].weigh_sampling()
        cheap_pareto_front=[]
        def CMO(xi):
            xi=np.asarray(xi)
            y=[Multiplemes[i].f_regression(xi)[0][0] for i in range(len(GPs))]
            return y

        problem = Problem(d, len(functions))
        problem.types[:] = Real(bound[0], bound[1])
        problem.function = CMO
        algorithm = NSGAII(problem)
        algorithm.run(1500)
        cheap_pareto_front=[list(solution.objectives) for solution in algorithm.result]
    #########picking the max over the pareto: best case
        maxoffunctions=[-1*min(f) for f in list(zip(*cheap_pareto_front))]
        max_samples.append(maxoffunctions)
        
    def mesmo_acq(x):
        multi_obj_acq_total=0
        for j in range(sample_number):
            multi_obj_acq_sample=0
            for i in range(len(functions)):
                multi_obj_acq_sample=multi_obj_acq_sample+Multiplemes[i].single_acq(x,max_samples[j][i])
            multi_obj_acq_total=multi_obj_acq_total+multi_obj_acq_sample
        return (multi_obj_acq_total/sample_number)


    # l-bfgs-b acquisation optimization
    x_tries = np.random.uniform(bound[0], bound[1],size=(int(MC_sample/32), d))
    y_tries=[mesmo_acq(x) for x in x_tries]
    sorted_indecies=np.argsort(y_tries)
    i=0
    x_best=x_tries[sorted_indecies[i]]
    while (any((x_best == x).all() for x in GPs[0].xValues)):
        i=i+1
        x_best=x_tries[sorted_indecies[i]]
    y_best=y_tries[sorted_indecies[i]]  
    
    select_x = [x_best]
    select_y = [y_best]
    # select_x = list(select_x)
    # select_y = list(select_y)
    x_seed=list(np.random.uniform(low=bound[0], high=bound[1], size=(MC_sample,d)))    
    for x_try in tqdm(x_seed):

        result = scipyminimize(mesmo_acq, x0=np.asarray(x_try).reshape(-1), method='L-BFGS-B', bounds=Fun_bounds)
        if not result.success:
            continue
        if not (result.x in np.asarray(GPs[0].xValues)):
            select_x.append(result.x)
            select_y.append(result.fun)
            
    select_x = np.array(select_x)
    select_y = np.array(select_y)
    sorted_indecies=np.argsort(select_y)
    select_y[sorted_indecies][:batch_size]
    new_x = select_x[sorted_indecies][:batch_size]
    new_ = new_x * (bounds[1]-bounds[0]) + bounds[0]
    
    import pickle
    state = {'df': df, 'd': d, 'functions': functions, 'seed': seed, 'bound': bound,
            'Fun_bounds': Fun_bounds, 'sample_number': sample_number, 'MC_sample':MC_sample,
            'initial_x': initial_x, 'initial_y':initial_y, 'GPs':GPs, 
            'Multiplemes':Multiplemes, 'max_samples':max_samples, 'select_x':select_x,
            'select_y':select_y, 'new_x':new_}
    with open('%s.pkl'%log, 'wb') as f:     
        pickle.dump(state, f)
        
    select_df = pd.DataFrame()
    select_df[inputs] = new_
    select_df.to_csv('%s.csv'%output, index=False)

In [45]:
filename = 'test_zdt12_1'
batch_size = 4
log = 'log_2'
output = 'select_2'
opt(filename, batch_size, log, output)

100%|██████████████████████████████████████████████████████████████████████████████| 2048/2048 [06:42<00:00,  5.09it/s]


In [46]:
filename = 'test_zdt12_2'
batch_size = 4
log = 'log_3'
output = 'select_3'
opt(filename, batch_size, log, output)

100%|██████████████████████████████████████████████████████████████████████████████| 2048/2048 [06:38<00:00,  5.13it/s]


In [47]:
filename = 'test_zdt12_3'
batch_size = 4
log = 'log_4'
output = 'select_4'
opt(filename, batch_size, log, output)

100%|██████████████████████████████████████████████████████████████████████████████| 2048/2048 [06:39<00:00,  5.12it/s]
