## 数据的加载

In [None]:
from data_preprocess import data_loader, data_preprocess
file_path = '../datasets/dat/'
datasetname = 'wisconsin'
X, y = data_loader(file_path + datasetname + '.dat')

X_train, X_test, y_train, y_test = data_preprocess(X, y, random_state=42)

In [None]:
from deap import algorithms

In [None]:
import numpy as np

def calculate_k_min_distances_mean(x, k):
    """
    计算特征数据中前k个最小欧式距离的平均值
    
    参数:
    x: ndarray, 特征数据矩阵，每行代表一个样本，每列代表一个特征
    k: int, 要选择的前k个最小距离的数量
    
    返回:
    float: 前k个最小欧式距离的平均值
    """
    # 1. 计算数据的中心（平均值）
    center = np.mean(x, axis=0)
    
    # 2. 计算中心到每个样本的欧式距离
    # 使用广播计算每个样本与中心的差值，然后计算欧式距离
    distances = np.sqrt(np.sum((x - center) ** 2, axis=1))
    
    # 3. 找到前k个最小的距离
    # 使用np.partition来部分排序，比完全排序更高效
    k_smallest_distances = np.partition(distances, k)[:k]
    
    # 4. 计算前k个最小距离的平均值
    mean_k_smallest = np.mean(k_smallest_distances)
    
    return mean_k_smallest

# 更简洁的版本
def calculate_k_min_distances_mean_compact(x, k):
    """
    紧凑版本的函数实现
    """
    center = np.mean(x, axis=0)
    distances = np.linalg.norm(x - center, axis=1)
    return np.mean(np.partition(distances, k)[:k])

# 测试函数
if __name__ == "__main__":
    # 创建测试数据
    np.random.seed(42)
    # 生成20个样本，每个样本有3个特征
    test_data = np.random.randn(20, 3)
    
    k = 5
    result = calculate_k_min_distances_mean_compact(test_data, k)
    
    print(f"测试数据形状: {test_data.shape}")
    print(f"数据中心的坐标: {np.mean(test_data, axis=0)}")
    print(f"前{k}个最小欧式距离的平均值: {result:.4f}")
    
    # 验证结果
    center = np.mean(test_data, axis=0)
    distances = np.linalg.norm(test_data - center, axis=1)
    sorted_distances = np.sort(distances)
    manual_result = np.mean(sorted_distances[:k])
    
    print(f"手动验证结果: {manual_result:.4f}")
    print(f"结果是否一致: {np.isclose(result, manual_result)}")

In [10]:
import numpy as np

# 最小距离版本
def calculate_k_min_distances_mean_compact(x, k):
    center = np.mean(x, axis=0)
    distances = np.linalg.norm(x - center, axis=1)
    return np.mean(np.partition(distances, k)[:k])

# 最大距离版本
def calculate_k_max_distances_mean_compact(x, k):
    center = np.mean(x, axis=0)
    distances = np.linalg.norm(x - center, axis=1)
    return np.mean(np.partition(distances, -k)[-k:])

# 测试
if __name__ == "__main__":
    np.random.seed(42)
    test_data = np.array([[1,2],[5,6],[6,1]])
    k = 2
    
    min_mean = calculate_k_min_distances_mean_compact(test_data, k)
    max_mean = calculate_k_max_distances_mean_compact(test_data, k)
    
    print(f"前{k}个最小距离平均值: {min_mean:.4f}")
    print(f"前{k}个最大距离平均值: {max_mean:.4f}")

前2个最小距离平均值: 2.9954
前2个最大距离平均值: 3.1623


In [1]:
def calculate_statistics(individuals):
    """
    计算individuals列表中各个属性的最大值
    
    参数:
    individuals -- Individual对象列表
    
    返回:
    tuple -- (max_a, max_b, max_c, max_d)
    """
    if not individuals:
        return None, None, None, None
    
    max_a = max(individual.a for individual in individuals)
    max_b = max(individual.b for individual in individuals)
    max_c = max(individual.c for individual in individuals)
    max_d = max(individual.d for individual in individuals)
    
    return max_a, max_b, max_c, max_d


# 假设Individual类的定义
class Individual:
    def __init__(self, a, b, c, d):
        self.a = a
        self.b = b
        self.c = c
        self.d = d

# 创建测试数据
individuals = [
    Individual(1, 5, 3, 8),
    Individual(4, 2, 7, 1),
    Individual(2, 9, 4, 6),
    Individual(3, 1, 2, 9)
]

# 调用函数
max_a, max_b, max_c, max_d = calculate_statistics(individuals)

print(f"最大a值: {max_a}")
print(f"最大b值: {max_b}")
print(f"最大c值: {max_c}")
print(f"最大d值: {max_d}")

最大a值: 4
最大b值: 9
最大c值: 7
最大d值: 9
