### 导包

In [15]:
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
from tqdm import tqdm
from differential_evolution.DifferentialEvolution import DifferentialEvolution
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # SVC用于分类，SVR用于回归
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

### 加载数据集

In [16]:
# 读取.mat文件
mat_data = sio.loadmat('../data/dataset/Australian.mat')
print(mat_data.keys())
# 提取变量
x = mat_data['X']
y = mat_data['Y']
y = y[:, 0]
# 显示变量信息
print("x的形状:", x.shape)
print("y的数据类型:", y.shape)
# 统计每个类别的个数，y.max()+1是类别的个数
counts = np.zeros(y.max() + 1)
for i in range(y.shape[0]):
    counts[y[i]] += 1
print("每种类别的数量：", counts)

dict_keys(['__header__', '__version__', '__globals__', 'X', 'Y'])
x的形状: (690, 14)
y的数据类型: (690,)
每种类别的数量： [383. 307.]


In [17]:
NP = 50  # 个体数目
D = y.shape[0]  # 目标函数中变量的个数
G = 100  # 最大迭代数
CR = 0.1  # 交叉算子
Threshold = 1e-6  # 阈值
F = 0.5  # 变异算子
Left = 0  # 左边界
Right = 2  # 右边界

### 适应度函数

In [21]:
init_population = np.random.rand(NP, D) * (Right - Left) + Left  # 在[Left,Right]之间产生随机数


# 根据种群个体的编码值，获得要选择的实例的索引
def get_indices(x):
    x = x.astype(int)  #数据范围在0-2之间，转化成int的同时会舍去小数部分，从而将个体映射到0-1编码
    max_indices = np.where(x == 1)
    return max_indices

# 得到分类、以及分类所对应的索引

def get_class_index(y):
    # 统计每个类别的个数，y.max()+1是类别的个数
    num_class = y.max() + 1
    counts = np.zeros(num_class)
    classes=[]
    for i in range(y.shape[0]):
        counts[y[i]] += 1
    for i in range(num_class):
        classes.append(np.where(y == i).tolist())
    return classes,counts

# 根据索引得到要选择的子集：数据和标签
def get_sub_dataset(indices, x, y,minimum=10):
    lenth = len(indices)
    x_sub = np.empty((lenth, x.shape[1]))
    y_sub = np.empty(lenth)

    for i in range(lenth):
        x_sub[i, :] = x[indices[i], :]
        y_sub[i] = y[indices[i]]
    
    counts = np.zeros(y.max() + 1)
    for i in range(y_sub.shape[0]):
        counts[y_sub[i]] += 1
    for i in range(y.max() + 1):
        if counts[i] < minimum:
            unselected = np.arange(0, x.shape[0], 1)-y_sub
            unselected_indices=np.random.choice(unselected,size=minimum-counts[i])
            add=x[unselected_indices,:]
            
            
                
    return x_sub, y_sub


# 最小的实例选择的数量
# def minimum_quantity_limit(x_sub=None,y_sub=None,minimum=10):
#     # 统计每个类别的个数，y.max()+1是类别的个数
    
                        


indices = get_indices(init_population[0, :])
print(indices)

x_sub, y_sub = get_sub_dataset(indices, x, y)

print(x_sub.shape)
print(y_sub.shape)



(array([  1,   2,   3,   5,   7,   8,   9,  10,  11,  12,  14,  15,  17,
        20,  21,  22,  25,  26,  32,  33,  35,  37,  38,  41,  43,  46,
        49,  50,  51,  55,  57,  59,  61,  63,  64,  66,  67,  68,  72,
        73,  74,  75,  76,  77,  79,  80,  81,  82,  84,  85,  88,  89,
        90,  93,  94,  99, 102, 103, 106, 107, 108, 111, 114, 117, 120,
       124, 125, 127, 128, 129, 132, 135, 140, 142, 144, 148, 151, 152,
       154, 155, 156, 157, 158, 159, 161, 164, 165, 168, 169, 171, 172,
       173, 177, 182, 183, 184, 185, 187, 189, 190, 192, 194, 195, 196,
       199, 204, 206, 210, 211, 214, 215, 216, 223, 224, 226, 227, 229,
       230, 234, 239, 240, 242, 243, 244, 245, 246, 248, 250, 251, 252,
       254, 256, 257, 260, 262, 267, 276, 280, 289, 293, 294, 295, 297,
       298, 299, 301, 302, 303, 307, 308, 309, 310, 311, 316, 318, 319,
       320, 321, 323, 335, 338, 343, 350, 355, 356, 357, 359, 360, 362,
       366, 368, 369, 373, 375, 381, 382, 383, 389, 390, 396, 3

ValueError: could not broadcast input array from shape (326,14) into shape (14,)

In [23]:
in2,c=get_class_index(np.array([1,0,2,1,2,3,2,1,2,1,0,0,0,2,2,1,3,3]))
print(in2)
print(c)

AttributeError: 'tuple' object has no attribute 'tolist'

In [None]:
# 适应度函数/目标函数
def function(x, ):
    # 先将x的实值编码四舍五入得到0-1编码，根据编码得到训练子集

    return sum(x ** 2)


# 求适应度
def fitness(x):
    result = np.empty(x.shape[1])  # 记录种群中个体的适应度
    # 计算每个个体的适应度
    for i in range(0, x.shape[1]):
        result[i] = function(x[:, i])
    return result

In [None]:

x = np.random.rand(D, NP) * (Right - Left) + Left  # 在[Left,Right]之间产生随机数

# 记录进化迭代的过程
number_generations = G
per_generation_optimal_value = np.zeros(G)

x_fitness = fitness(x)

de = DifferentialEvolution(NP, D, G, CR, Threshold, F, Left, Right)

with tqdm(total=G, desc="DE") as pbar:
    for i in range(0, G):
        #v=variation(x,F) # 变异
        v = de.variation_optimize(x)  # 变异的优化
        c = de.cross(x, v)  # 交叉

        c = de.boundary_process(c)  # 若超出边界则重新生成

        c_fitness = fitness(c)  # 对新的试验种群求适应度
        for m in range(0, x.shape[1]):
            if c_fitness[m] < x_fitness[m]:  # 贪婪地选择当前最优个体
                x[:, m] = c[:, m]
                x_fitness[m] = c_fitness[m]
        index = np.argmin(x_fitness)  # 记录最优个体索引
        per_generation_optimal_value[i] = x_fitness[index]  # 记录最优适应度
        # 动态更新进度条信息
        pbar.set_postfix({
            "当前迭代次数": i + 1,
            "最优个体索引": index,
            "目标函数最小值": x_fitness[index]
        })
        # 更新进度条
        pbar.update(1) 

In [4]:
a=np.array([1,2,3,4,5,6])
index=np.array([2,3])
b=a[index]
print(b)

[3 4]


In [6]:
result = np.concatenate(([[1,2,3],[1,1,1]], [[2,2,2]]), axis=0)
print(result)

[[1 2 3]
 [1 1 1]
 [2 2 2]]


In [8]:
import numpy as np

irregular_array = np.array([[1, 2, 3], [4, 5], [6]], dtype=object)

# 遍历不规则数组
for row in irregular_array:
    for element in row:
        print(element)


1
2
3
4
5
6


In [10]:
import numpy as np

# 创建一个不规则的二维数组
irregular_array = np.array([[1, 2, 3], [4, 5], [6]], dtype=object)

# 遍历不规则数组
for i, row in enumerate(irregular_array):
    print(f"Row {i}:")
    for j, element in enumerate(row):
        print(f"  Element {j}: {element}")


Row 0:
  Element 0: 1
  Element 1: 2
  Element 2: 3
Row 1:
  Element 0: 4
  Element 1: 5
Row 2:
  Element 0: 6
