## 准备工作

In [1]:
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
from tqdm import tqdm
from differential_evolution.DifferentialEvolution import DifferentialEvolution
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # SVC用于分类，SVR用于回归
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

### 实值映射到0-1

In [ ]:
# 根据种群个体的编码值，获得要选择的实例的索引
def get_indices(x):
    x = x.astype(int)  # 数据范围在0-2之间，转化成int的同时会舍去小数部分，从而将个体映射到0-1编码
    indices = np.where(x == 1)  # 1代表选择该实例，返回值是tuple，tuple[0]取元组中的第一个元素
    return indices[0]


# 得到分类、以及分类所对应的索引
def get_class_index(y):
    # 统计每个类别的个数，y.max()+1是类别的个数
    num_class = y.max() + 1
    counts = np.zeros(num_class, dtype=int)
    classes = []
    for i in range(y.shape[0]):  # y.shape[0]相当于y的长度
        counts[y[i]] += 1
    for i in range(num_class):
        classes.append(
            np.where(y == i)[0])  # np.where() 返回值是一个tuple数组，np.where(y == i)[0],表示取出该tuple数组的第一个元素，是一个ndarray数组
    return num_class, classes, counts

### 获取实例子集

In [ ]:
# 根据索引得到要选择的子集：数据和标签
# minimum的最大值不能超过数据集中包含的所有类别的最小数量
def get_sub_dataset(indices, x, y, num_class, classes, minimum=10):
    # 根据索引得到实例子集
    lenth = len(indices)
    x_sub = np.empty((lenth, x.shape[1]))  # x.shape[1]代表了特征数
    y_sub = np.empty(lenth)
    #for i in range(lenth):
    x_sub = x[indices, :]
    y_sub = y[indices]

    print("实例子集x：", x_sub)
    print("实例子集y：", y_sub)
    # 计算实例子集各个类别的数量
    counts_sub = np.zeros(num_class, dtype=int)
    for i in range(y_sub.shape[0]):
        counts_sub[y_sub[i]] += 1
    print("实例子集的数量：", counts_sub)
    # 遍历子集中各个类别的数量，保证大于最小数量
    for i in range(num_class):
        if counts_sub[i] < minimum:
            unselected_indices = set(classes[i]) - set(y_sub)  # 转换成集合进行差运算（& | -，分别是交、并、差） unselected_indices是一个set集合
            random_selecte_indices = np.random.choice(np.array(list(unselected_indices)), size=minimum - counts_sub[i],
                                                      replace=False)  # list(unselected_indices)将集合转换成list,replace=False表示不允许重复
            print(f"类别{i}所要补充的个体数为{minimum - counts_sub[i]},索引为：", random_selecte_indices)
            random_selecte_x = x[random_selecte_indices, :]
            random_selecte_y = y[random_selecte_indices]
            x_sub = np.concatenate((x_sub, random_selecte_x))  # 将子集拼接在一起 concatenate默认axis=0
            y_sub = np.concatenate((y_sub, random_selecte_y))  # 将子集拼接在一起 concatenate默认axis=0
    return x_sub, y_sub

### 适应度函数

In [ ]:

# 适应度函数/目标函数
def objective_function(xi, x, y, model):
    # 先将x的实值编码四舍五入得到0-1编码，根据编码得到训练子集
    indices = get_indices(xi)
    num_class, classes, counts = get_class_index(y)
    x_sub, y_sub = get_sub_dataset(indices, x, y, num_class, classes, minimum=10)

    # 模型训练
    x_train, x_test, y_train, y_test = train_test_split(x_sub, y_sub, test_size=0.3, random_state=42)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    # 计算准确率
    accuracy = accuracy_score(y_test, y_pred)

    # 计算错误率
    error_rate = 1 - accuracy
    return error_rate


# 求适应度
def fitness(x):
    result = np.empty(x.shape[0])  # 记录种群中个体的适应度
    # 计算每个个体的适应度
    for i in range(0, x.shape[0]):
        result[i] = objective_function(x[i, :])
    return result

### 加载数据集

In [None]:
# 读取.mat文件
mat_data = sio.loadmat('../data/dataset/Australian.mat')
print(mat_data.keys())
# 提取变量
x = mat_data['X']
y = mat_data['Y']
y = y[:, 0]
# 显示变量信息
print("x的形状:", x.shape)
print("y的数据类型:", y.shape)
# 统计每个类别的个数，y.max()+1是类别的个数
counts = np.zeros(y.max() + 1)
for i in range(y.shape[0]):
    counts[y[i]] += 1
print("每种类别的数量：", counts)

### 参数设定

In [9]:
NP = 50  # 个体数目
D = 10  # 目标函数中变量的个数
G = 8000  # 最大迭代数
CR = 0.1  # 交叉算子
Threshold = 1e-6  # 阈值
F = 0.5  # 变异算子
Left = -20  # 左边界
Right = 20  # 右边界

In [10]:

# 适应度函数/目标函数
def function(x):  # x的维度为10，也即D=10
    return sum(x ** 2)


# 求适应度
def fitness(x):
    result = np.empty(x.shape[0])  # 记录种群中个体的适应度
    # 计算每个个体的适应度
    for i in range(0, x.shape[0]):
        result[i] = function(x[i, :])
    return result


x = np.random.rand(NP, D) * (Right - Left) + Left  # 在[Left,Right]之间产生随机数

# 记录进化迭代的过程
number_generations = G
per_generation_optimal_value = np.zeros(G)

x_fitness = fitness(x)

de = DifferentialEvolution(NP, D, G, CR, Threshold, F, Left, Right)

with tqdm(total=G, desc="DE") as pbar:
    for i in range(0, G):
        #v=variation(x,F) # 变异
        v = de.variation_optimize(x)  # 变异的优化
        c = de.cross(x, v)  # 交叉

        c = de.boundary_process(c)  # 若超出边界则重新生成

        c_fitness = fitness(c)  # 对新的试验种群求适应度
        for m in range(0, x.shape[0]):
            if c_fitness[m] < x_fitness[m]:  # 贪婪地选择当前最优个体
                x[m, :] = c[m, :]
                x_fitness[m] = c_fitness[m]
        index = np.argmin(x_fitness)  # 记录最优个体索引
        per_generation_optimal_value[i] = x_fitness[index]  # 记录最优适应度
        # 动态更新进度条信息
        pbar.set_postfix({
            "当前迭代次数": i + 1,
            "最优个体索引": index,
            "目标函数最小值": x_fitness[index]
        })
        # 更新进度条
        pbar.update(1)

DE: 100%|██████████| 8000/8000 [00:08<00:00, 893.38it/s, 当前迭代次数=8000, 最优个体索引=35, 目标函数最小值=1.27e-13]


## 测试

In [None]:
xx = np.array([
    [0.2, 1.2],
    [1.3, 2.4],
    [2.6, 3.5],
    [3.2, 4.2],
    [4.3, 5.4],
    [5.6, 6.5],
    [6.2, 7.2],
    [7.3, 8.4],
    [8.6, 9.5],
    [9.2, 10.2],
    [10.3, 2.4],
    [11.6, 3.5],
    [12.2, 2.2],
    [13.3, 2.4],
    [14.6, 3.5],
    [15.2, 2.2],
    [16.3, 2.4],
    [17.6, 3.5],
    [18.2, 2.2],
    [29.3, 2.4],
    [20.6, 3.5],
    [21.2, 2.2],
    [22.3, 2.4],
    [23.6, 3.5],

])

print(xx.shape)
yy = np.array([0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1])
print(yy.shape)
xi = np.random.rand(24) * (2 - 0) + 0

print("随机生成的个体：", xi)

indexes = get_indices(xi)

print("个体对应的实例索引：", indexes)

num_class, classes, counts = get_class_index(yy)

#print("类：",num_class)

#print("类的数量：",counts)

x_sub, y_sub = get_sub_dataset(indexes, xx, yy, num_class, classes, minimum=10)

print("选择的实例子集x：", x_sub)
print("选择的实例子集y：", y_sub)
print("Over")