### 导包

In [3]:
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
from tqdm import tqdm
from differential_evolution.DifferentialEvolution import DifferentialEvolution
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # SVC用于分类，SVR用于回归
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

### 加载数据集

In [75]:
# 读取.mat文件
mat_data = sio.loadmat('../data/dataset/Australian.mat')
print(mat_data.keys())
# 提取变量
x = mat_data['X']
y = mat_data['Y']
y = y[:, 0]
# 显示变量信息
print("x的形状:", x.shape)
print("y的数据类型:", y.shape)

# 统计每个类别的个数，y.max()+1是类别的个数
counts = np.zeros(y.max() + 1)
for i in range(y.shape[0]):
    counts[y[i]] += 1
print("每种类别的数量：", counts)

dict_keys(['__header__', '__version__', '__globals__', 'X', 'Y'])
x的形状: (690, 14)
y的数据类型: (690,)
每种类别的数量： [383. 307.]


In [76]:
NP = 50  # 个体数目
D = y.shape[0]  # 目标函数中变量的个数
G = 100  # 最大迭代数
CR = 0.1  # 交叉算子
Threshold = 1e-6  # 阈值
F = 0.5  # 变异算子
Left = 0  # 左边界
Right = 2  # 右边界

### 适应度函数

In [81]:
init_population = np.random.rand(NP, D) * (Right - Left) + Left  # 在[Left,Right]之间产生随机数


# 根据种群个体的编码值，获得要选择的实例的索引
def get_indices(x):
    x = x.astype(int)  #数据范围在0-2之间，转化成int的同时会舍去小数部分，从而将个体映射到0-1编码
    max_indices = np.argwhere(x == 1)
    return max_indices


# 根据索引得到要选择的子集：数据和标签
def get_sub_dataset(indices, x, y):
    lenth = len(indices)
    x_sub = np.empty((lenth, x.shape[1]))
    y_sub = np.empty(lenth)

    for i in range(lenth):
        x_sub[i, :] = x[indices[i], :]
        y_sub[i] = y[indices[i]]
    return x_sub, y_sub


# 最小的实例选择的数量
def minimum_quantity_limit(x_sub=None,y_sub=None,minimum=10):
    # 统计每个类别的个数，y.max()+1是类别的个数
    counts = np.zeros(y.max() + 1)
    for i in range(y.shape[0]):
        counts[y[i]] += 1
    while 1==1:
        for i in range(counts.shape[0]):
            if counts[i] < minimum:
                for j in range(minimum-counts[i]):
                    


indices = get_indices(init_population[0, :])
print(indices.shape)

x_sub, y_sub = get_sub_dataset(indices, x, y)

print(x_sub.shape)
print(y_sub.shape)



(326, 1)
(326, 14)
(326,)


In [None]:
# 适应度函数/目标函数
def function(x, ):
    # 先将x的实值编码四舍五入得到0-1编码，根据编码得到训练子集

    return sum(x ** 2)


# 求适应度
def fitness(x):
    result = np.empty(x.shape[1])  # 记录种群中个体的适应度
    # 计算每个个体的适应度
    for i in range(0, x.shape[1]):
        result[i] = function(x[:, i])
    return result

In [None]:

x = np.random.rand(D, NP) * (Right - Left) + Left  # 在[Left,Right]之间产生随机数

# 记录进化迭代的过程
number_generations = G
per_generation_optimal_value = np.zeros(G)

x_fitness = fitness(x)

de = DifferentialEvolution(NP, D, G, CR, Threshold, F, Left, Right)

with tqdm(total=G, desc="DE") as pbar:
    for i in range(0, G):
        #v=variation(x,F) # 变异
        v = de.variation_optimize(x)  # 变异的优化
        c = de.cross(x, v)  # 交叉

        c = de.boundary_process(c)  # 若超出边界则重新生成

        c_fitness = fitness(c)  # 对新的试验种群求适应度
        for m in range(0, x.shape[1]):
            if c_fitness[m] < x_fitness[m]:  # 贪婪地选择当前最优个体
                x[:, m] = c[:, m]
                x_fitness[m] = c_fitness[m]
        index = np.argmin(x_fitness)  # 记录最优个体索引
        per_generation_optimal_value[i] = x_fitness[index]  # 记录最优适应度
        # 动态更新进度条信息
        pbar.set_postfix({
            "当前迭代次数": i + 1,
            "最优个体索引": index,
            "目标函数最小值": x_fitness[index]
        })
        # 更新进度条
        pbar.update(1) 