In [5]:
import numpy as np

def calculate_mapen_optimized(data, m, r):
    """
    计算多变量联合近似熵（Multivariate Approximate Entropy, mApEn）
    参数:
    - data: ndarray, shape = (n_samples, n_variables)，m×15的数组
    - m: 嵌入维度
    - r: 相似性阈值 (建议为数据标准差的比例, 比如 0.2)
    
    返回:
    - mapen: 联合近似熵值
    """
    n_samples, n_variables = data.shape

    # Step 1: 构造嵌入向量
    def embed(data, dim):
        """
        嵌入数据到指定维度
        """
        embedded_data = np.array([data[i: n_samples - dim + i + 1] for i in range(dim)])
        return embedded_data.transpose(1, 2, 0)

    embedded_m = embed(data, m)
    embedded_m1 = embed(data, m + 1)

    # Step 2: 计算满足阈值的比例
    def phi_optimized(embedded_data, threshold):
        """
        逐行计算距离，避免生成大矩阵
        """
        n_patterns = embedded_data.shape[0]
        count = 0
        for i in range(n_patterns):
            distances = np.max(np.abs(embedded_data - embedded_data[i]), axis=(1, 2))
            count += np.sum(distances <= threshold) - 1  # 减去自身
        return count / (n_patterns * (n_patterns - 1))

    r_threshold = r * np.std(data)  # 阈值为 r 的标准差
    phi_m = phi_optimized(embedded_m, r_threshold)
    phi_m1 = phi_optimized(embedded_m1, r_threshold)

    # Step 3: 计算联合近似熵
    mapen = np.log(phi_m) - np.log(phi_m1)
    return mapen


In [6]:
import pandas as pd

# 文件路径
file_path = r"C:\Users\12694\Documents\ndss\merged_files\user23\user23_merged.csv"

# 读取文件，第一行为标签，读取前98001行
data = pd.read_csv(file_path, nrows=98000)

# 去掉第一行和第一列
data = data.iloc[:, 1:]

# 转换为数组
array_output = data.values

# 显示数组维度和前几行
array_output.shape, array_output[:5]


((98000, 14),
 array([[0.00000000e+00, 0.00000000e+00, 2.66000000e+02, 1.99000000e+02,
         2.11148347e-03, 3.87718805e-06, 6.05768127e-09, 1.57300631e-01,
         1.75250084e-01, 3.60655909e-01, 3.70856483e-01, 3.60660346e-01,
         3.70803039e-01, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 2.65000000e+02, 1.97000000e+02,
         2.92880340e-02, 3.07313378e-04, 2.74367766e-06, 3.38443408e-01,
         3.06895532e-01, 3.60792206e-01, 3.70647489e-01, 3.60661548e-01,
         3.70801154e-01, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 2.76000000e+02, 1.68000000e+02,
         6.60998881e-03, 6.47335042e-05, 5.39408621e-07, 2.99870935e-01,
         2.49127716e-01, 3.60658044e-01, 3.70811377e-01, 3.60660350e-01,
         3.70802646e-01, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 2.76000000e+02, 1.61000000e+02,
         1.80405227e-02, 1.89295532e-04, 1.69002054e-06, 3.20490658e-01,
         1.18083548e-01, 3.60487291e-01, 3.70764205e-01, 

In [7]:
# 示例输入
data = array_output  # 100 行，15 列的随机数组
m = 500  # 嵌入维度
r = 0.2  # 阈值比例

# 计算联合近似熵
mapen = calculate_mapen_optimized(data, m, r)
print(f"联合近似熵: {mapen}")

KeyboardInterrupt: 