In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy

# import emukit
import GPy
# from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
# from emukit.core.initial_designs.random_design import RandomDesign
# from emukit.core.initial_designs.latin_design import LatinDesign
from GPy.models import GPRegression
from emukit.model_wrappers import GPyModelWrapper

# from emukit.bayesian_optimization.loops import BayesianOptimizationLoop
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement, \
                                                      NegativeLowerConfidenceBound, \
                                                      MaxValueEntropySearch, \
                                                      ProbabilityOfImprovement
# from emukit.core.acquisition import IntegratedHyperParameterAcquisition

from rgpe import compute_rank_weights

In [2]:

import numpy as np
from scipy.spatial.distance import cdist
from emukit.core.initial_designs.latin_design import LatinDesign

def find_y_by_x(x_input, X_all, Y_all):  #一通过x找到对应的y的函数
    # 确保 x_input 是一个二维数组
    if len(x_input.shape) == 1:
        x_input = x_input.reshape(1, -1)

    # 初始化一个列表来存储所有结果
    all_results = []

    # 遍历 x_input 中的每个特征向量
    for x in x_input:
        # 使用 NumPy 的函数 argwhere 来查找 x 在 X 中的位置
        indices = np.argwhere(np.all(X_all == x, axis=1))

        # 初始化一个列表来存储匹配的 Y 值
        matching_y_values = []

        # 遍历匹配的位置
        for index in indices:
            match_index = index[0]
            matching_y_values.append(Y_all[match_index])

        all_results.append(matching_y_values)
    
    all_results = np.array(all_results)
    return all_results

def x_normalizer(X, var_array):
    
    def max_min_scaler(x, x_max, x_min):
        return (x-x_min)/(x_max-x_min)
    x_norm = []
    for x in (X):
           x_norm.append([max_min_scaler(x[i], 
                                         max(var_array[i]), 
                                         min(var_array[i])) for i in range(len(x))])
            
    return x_norm

def x_denormalizer(x_norm, var_array):
    
    def max_min_rescaler(x, x_max, x_min):
        return x*(x_max-x_min)+x_min
    x_original = []
    for x in (x_norm):
           x_original.append([max_min_rescaler(x[i], 
                                         max(var_array[i]), 
                                         min(var_array[i])) for i in range(len(x))])
            
    return x_original




# def get_closest_array(x_init, X_all):
#     # 计算 x_init 中每个点与 X_all 中所有点之间的距离
#     distances = cdist(x_init, X_all)   
#     # 找到每个点最近的索引
#     closest_indices = np.argmin(distances, axis=1)    
#     # 根据索引获取最近的点
#     closest_array = X_all[closest_indices]  
#     return closest_array

def get_closest_array(x_init, X_all):
    X_all_copy = X_all.copy()
    closest_array = []
    
    for x in x_init:
        # 计算 x 与 X_all_copy 中所有点之间的距离
        distances = cdist([x], X_all_copy)[0]
        # 找到最近的索引
        closest_index = np.argmin(distances)
        # 根据索引获取最近的点
        closest_point = X_all_copy[closest_index]
        # 将最近的点添加到结果数组中
        closest_array.append(closest_point)
        # 从 X_all_copy 中移除这个点
        X_all_copy = np.delete(X_all_copy, closest_index, axis=0)
        
    return np.array(closest_array)


# # 计算两个点之间的欧几里得距离
# def euclidean_distance(point1, point2):
#     return np.sqrt(np.sum((point1 - point2) ** 2))

# # 找到X_all中与x_init每个元素最近的点
# def get_closest_array(x_init, X_all):
#     closest_array = []
#     for point in x_init:
#         distances = np.array([euclidean_distance(point, x) for x in X_all])
#         closest_idx = distances.argmin()
#         closest_array.append(X_all[closest_idx])
#     return np.array(closest_array)

def create_latin_design(X_all):
 
    x_array = [X_all[:, i] for i in range(X_all.shape[1])]
    
    num_columns = X_all.shape[1]
    parameter_space = ParameterSpace([ContinuousParameter(f'x{i+1}', 0, 1) for i in range(num_columns)])
    design = LatinDesign(parameter_space)
    
    return x_array , design



# 导入目标任务

In [3]:
import pandas as pd
import numpy as np
import os

# folder_path = r'C:\Users\13282\Desktop\papercode\data\Goldstein' 
# file_path = os.path.join(folder_path, 'goldstein_data.xlsx')

folder_path = r'C:\Users\13282\Desktop\papercode\data\Alloy\matminner_processed' 
file_path = os.path.join(folder_path, 'Ti.xlsx')  # y 记得加负号 ，如果需要原始值，需要反log回去

# Virtual_ti


df = pd.read_excel(file_path)

#去除存在nan的点
# nan_indices = np.isnan(Y)
# X = X[~nan_indices.flatten()]
# Y = Y[~nan_indices.flatten()]
print(df.shape)

X_all = df.iloc[:, :-1].values  
Y_all = - df.iloc[:, -1].values   

print(X_all.shape,Y_all.shape)

(88, 10)
(88, 9) (88,)


# x和y预处理

In [29]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
min_max_scaler = MinMaxScaler()
X_all = min_max_scaler.fit_transform(X_all)

#去除存在nan的点
nan_indices = np.isnan(Y_all)
true_indices = np.where(nan_indices)[0]
print("nan 的索引:", true_indices)

X_all = X_all[~nan_indices.flatten()]
Y_all = Y_all[~nan_indices.flatten()]


nan 的索引: []
(88, 9) (88,)
-8.470101583882409 (array([6], dtype=int64),)


# 导入源模型

In [69]:

import pickle

# 定义要加载的模型名称列表

# file_name = "oth2" 
# model_names_to_load = ['base_model_oth2.pkl']
# folder_path = r'C:\Users\13282\Desktop\papercode\trasfer\base_model\Goldstein' 

# Fe  Co  Ni  others  Co_plus
model_names_to_load = ['Fe.pkl','Ni.pkl','Co_plus.pkl',]
folder_path = r'C:\Users\13282\Desktop\papercode\trasfer\base_model\Alloy\matminer' 




loaded_models = {}
for model_name in model_names_to_load:
    model_filename = os.path.join(folder_path, model_name)
    with open(model_filename, 'rb') as f:
        loaded_models[model_name] = pickle.load(f)
base_models = []
for model_name in model_names_to_load:
    base_models.append(loaded_models[model_name])


# 模型预测

In [75]:
import numpy as np

# 针对最优值点计算同增同减的对数

def calculate_ratio(X_all, Y_all, model):
    # 找到 Y_all 中的最小值及其索引
    y_min = np.nanmin(Y_all)
    indice = np.where(Y_all == y_min)
    
    # 使用模型进行预测
    f_obj = model.predict
    y_pred, y_uncer = f_obj(X_all)
    y_pred = y_pred[:, -1]  # 假设只需要最后一列的预测值
    
    # 计算 truth 和 pred
    truth = Y_all - y_min
    pred = y_pred - y_pred[indice].flatten()  # flatten 以确保形状匹配
    
    # 计算乘积 c
    c = truth * pred
    
    # 计算 c 中大于 0 的元素个数及其比例
    count = np.sum(c > 0)
    ratio = count / len(c)
    ratio = round(ratio, 2)
    
    return ratio

# 计算所有点同增同减的对数

def roll_col(X: np.ndarray, shift: int) -> np.ndarray:
    """
    Rotate columns to right by shift.
    """
    return np.concatenate((X[:, -shift:], X[:, :-shift]), axis=1)



def compute_ranking_loss(
    f_samps: np.ndarray,
    target_y: np.ndarray,
    target_model: bool,
) -> np.ndarray:
    """
    Compute ranking loss for each sample from the posterior over target points.
    """
    y_stack = np.tile(target_y.reshape((-1, 1)), f_samps.shape[0]).transpose()
    rank_loss = np.zeros(f_samps.shape[0])
    if not target_model:
        for i in range(1, target_y.shape[0]):
            rank_loss += np.sum(
                (roll_col(f_samps, i) < f_samps) ^ (roll_col(y_stack, i) < y_stack),  #用矩阵滚动的形式，让每个数都进行了比较
                axis=1
            )
    else:
        for i in range(1, target_y.shape[0]):
            rank_loss += np.sum(
                (roll_col(f_samps, i) < y_stack) ^ (roll_col(y_stack, i) < y_stack),
                axis=1
            )

    return rank_loss

def calculate_all_ratio(X_all, Y_all, model):
    
    f_obj = model.predict
    y_pred, y_uncer = f_obj(X_all)
    y_pred = y_pred[:, -1]
    
    rank_loss = compute_ranking_loss(y_pred.reshape(-1,1), Y_all.reshape(-1,1), False)
    count = rank_loss[0]
    all_count = len(rank_loss) * (len(rank_loss)-1)
    ratio = count / all_count
    ratio = round(ratio, 2)
    
    return ratio,count
    

In [77]:
ratios = []

for model in base_models:
    ratio = calculate_ratio(X_all, Y_all, model.model)
#     ratio ,count = calculate_all_ratio(X_all, Y_all, model.model)
    
    ratios.append(ratio)
    

ratios_array = np.array(ratios)
print(ratios_array)

[0.6  0.55 0.56]
