数据读取

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
from scipy.fft import fft  # 导入傅里叶变换函数

# 读取数据
data = pd.read_excel('data_T4.xlsx')

# 提取各个特征
magnetic_flux_density = data.iloc[:, 5:]  # 假设数据位于第5列到第1029列
temperate = data.iloc[:, 0]
frequency = data.iloc[:, 1]
y = data.iloc[:, 2]  # 磁芯损耗，w/m3
material_type = data.iloc[:, 3]
excitation_waveform = data.iloc[:, 4]


# 打印特征以验证
print("magnetic_flux_density:")
print(magnetic_flux_density.head())
print("temperate:")
print(temperate.head())
print("frequency:")
print(frequency.head())
print("y:")
print(y.head())
print("excitation_waveform:")
print(excitation_waveform.head())
print("material_type:")
print(material_type.head())

# 检查是否存在缺失值
missing_count = magnetic_flux_density.isnull().sum().sum()
print(f"缺失值总数：{missing_count}")

# 填补缺失值（以列的均值进行填补，也可以用其他值替换，如0）
magnetic_flux_density_filled = magnetic_flux_density.fillna(magnetic_flux_density.mean())

magnetic_flux_density:
   0（磁通密度B，T）         1         2         3         4         5         6  \
0    0.000213  0.000389  0.000566  0.000743  0.000919  0.001096  0.001272   
1   -0.000551 -0.000358 -0.000165  0.000028  0.000221  0.000413  0.000605   
2   -0.003780 -0.003564 -0.003349 -0.003134 -0.002919 -0.002704 -0.002488   
3   -0.000511 -0.000267 -0.000023  0.000222  0.000466  0.000711  0.000955   
4    0.000458  0.000732  0.001007  0.001281  0.001555  0.001830  0.002104   

          7         8         9  ...      1014      1015      1016      1017  \
0  0.001448  0.001624  0.001800  ... -0.001551 -0.001374 -0.001198 -0.001022   
1  0.000798  0.000991  0.001183  ... -0.002476 -0.002284 -0.002091 -0.001899   
2 -0.002273 -0.002057 -0.001841  ... -0.005927 -0.005713 -0.005499 -0.005285   
3  0.001199  0.001443  0.001687  ... -0.002950 -0.002707 -0.002463 -0.002220   
4  0.002378  0.002653  0.002927  ... -0.002290 -0.002016 -0.001741 -0.001466   

       1018      1019      1020  

提取波形的时域特征
上升沿和下降沿的斜率（Slope of Rising and Falling Edges）：通过计算波形的差分可以获取波形上升和下降沿的斜率，用于识别波形的变化速率。
零交点数（Number of Zero Crossings）：通过波形与零点的交点数来区分不同波形的周期性和对称性。

In [2]:
# 计算基本统计特征
mean_val = magnetic_flux_density_filled.mean(axis=1)  # 每行均值
std_val = magnetic_flux_density_filled.std(axis=1)    # 每行标准差
max_val = magnetic_flux_density_filled.max(axis=1)    # 每行最大值
min_val = magnetic_flux_density_filled.min(axis=1)    # 每行最小值
peak_to_peak = max_val - min_val                      # 每行峰峰值
skewness = magnetic_flux_density_filled.apply(lambda row: skew(row), axis=1)  # 每行偏度
kurt = magnetic_flux_density_filled.apply(lambda row: kurtosis(row), axis=1)  # 每行峰度

# 构建特征DataFrame
features = pd.DataFrame({
    'Mean': mean_val,
    'Std': std_val,
    'Max': max_val,
    'Min': min_val,
    'Peak-to-Peak': peak_to_peak,
    'Skewness': skewness,
    'Kurtosis': kurt
})

# 计算上升沿和下降沿的斜率
slope_rising = magnetic_flux_density.diff(axis=1).apply(lambda row: np.max(row), axis=1)
slope_falling = magnetic_flux_density.diff(axis=1).apply(lambda row: np.min(row), axis=1)

# 计算零交点数
zero_crossings = magnetic_flux_density.apply(lambda row: np.sum(np.diff(np.sign(row)) != 0), axis=1)

# 添加到特征表中
features['Slope_Rising'] = slope_rising
features['Slope_Falling'] = slope_falling
features['Zero_Crossings'] = zero_crossings

# 把温度、励磁波形和材料类型拼接到特征表中，并转换为one-hot编码
features['temperate'] = temperate
features['excitation_waveform'] = excitation_waveform
features['material_type'] = material_type
features = pd.get_dummies(features, columns=['temperate', 'excitation_waveform', 'material_type'])

# 频率数据也要加入特征表中
features['frequency'] = frequency
# 把magnetic_flux_density_filled也作为特征加入,该特征为2维,需多加几列
# for i in range(magnetic_flux_density_filled.shape[1]):
#     features[f'magnetic_flux_density_{i}'] = magnetic_flux_density_filled.iloc[:, i]

# 输出magnetic_flux_density、feature和label的shape
print(f"磁通密度数据的shape：{magnetic_flux_density_filled.shape}")
print(f"特征数据的shape：{features.shape}")
print(f"label的shape：{len(y)}")
print(features.head())

磁通密度数据的shape：(12400, 1024)
特征数据的shape：(12400, 22)
label的shape：12400
           Mean       Std       Max       Min  Peak-to-Peak  Skewness  \
0 -4.003908e-11  0.020410  0.028849 -0.028840      0.057689 -0.000627   
1 -2.539062e-11  0.022234  0.031419 -0.031427      0.062846 -0.001222   
2 -9.765699e-13  0.025119  0.035535 -0.035513      0.071047 -0.000494   
3 -5.859375e-11  0.028277  0.040015 -0.040025      0.080041 -0.001531   
4  4.101562e-11  0.031828  0.045028 -0.045085      0.090113 -0.002508   

   Kurtosis  Slope_Rising  Slope_Falling  Zero_Crossings  ...  temperate_70  \
0 -1.497912      0.000177      -0.000177               2  ...             0   
1 -1.497891      0.000193      -0.000193               2  ...             0   
2 -1.496391      0.000216      -0.000216               2  ...             0   
3 -1.494906      0.000245      -0.000243               2  ...             0   
4 -1.495133      0.000275      -0.000274               2  ...             0   

   temperate_90  e

通过上面这些特征，基于sklearn，训练机器学习模型

In [3]:
# 可视化和验证特征
from pyexpat import model
import matplotlib.pyplot as plt
import seaborn as sns

# # 可视化不同类别的特征分布（假设已经有标签列 'Label'）
# sns.pairplot(features, hue='励磁波形')  

# 梯度提升决策树回归，决策变量有离散值也有连续值
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
# 引入神经网络回归
from sklearn.neural_network import MLPRegressor
# 引入支持向量机回归
from sklearn.svm import SVR

# 数据标准化和归一化（Normalization/Standardization）
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)

# 训练模型
# model = GradientBoostingRegressor(n_estimators=500, max_depth=5, random_state=42, n_jobs=-1)
# model = MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=1000, random_state=42)
# model = SVR(kernel='rbf')
# cpu多核训练
model = RandomForestRegressor(n_estimators=100, max_depth=5, n_jobs=-1, random_state=42, warm_start=True)
model.fit(X_train, y_train)

# 预测
y_pred = model.predict(X_test)

# 评估
mse = mean_squared_error(y_test, y_pred)
print(f"均方误差：{mse}")

r2 = r2_score(y_test, y_pred)
print(f"R2：{r2}")

mae = mean_absolute_error(y_test, y_pred)
print(f"平均绝对误差：{mae}")

# # 特征重要性
# feature_importance = model.feature_importances_
# feature_importance_df = pd.DataFrame({
#     'Feature': features.columns,
#     'Importance': feature_importance
# })
# feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
# print(feature_importance_df)




均方误差：10398794591.317295
R2：0.9227076951366451
平均绝对误差：50125.830070065735


In [4]:
# 对附件三中样本的磁芯损耗进行预测，把预测结果填入附件四（Excel表格）中第3列
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
from scipy.fft import fft  # 导入傅里叶变换函数

# 读取数据
data_test = pd.read_excel('附件三（测试集）.xlsx')

# 提取各个特征
magnetic_flux_density = data_test.iloc[:, 5:]  # 假设数据位于第5列到第1029列
temperate = data_test.iloc[:, 1]
frequency = data_test.iloc[:, 2]
material_type = data_test.iloc[:, 3]
excitation_waveform = data_test.iloc[:, 4]


# 打印特征以验证
print("magnetic_flux_density:")
print(magnetic_flux_density.head())
print("temperate:")
print(temperate.head())
print("frequency:")
print(frequency.head())
print("excitation_waveform:")
print(excitation_waveform.head())
print("material_type:")
print(material_type.head())

# 检查是否存在缺失值
missing_count = magnetic_flux_density.isnull().sum().sum()
print(f"缺失值总数：{missing_count}")

# 填补缺失值（以列的均值进行填补，也可以用其他值替换，如0）
magnetic_flux_density_filled = magnetic_flux_density.fillna(magnetic_flux_density.mean())


magnetic_flux_density:
   0（磁通密度B，T）         1         2         3         4         5         6  \
0    0.002216  0.002597  0.002977  0.003358  0.003739  0.004119  0.004500   
1   -0.000436 -0.000051  0.000333  0.000716  0.001099  0.001482  0.001864   
2   -0.000474 -0.000042  0.000390  0.000822  0.001255  0.001688  0.002120   
3    0.001098  0.001482  0.001866  0.002249  0.002633  0.003017  0.003401   
4   -0.002549 -0.002062 -0.001576 -0.001090 -0.000603 -0.000117  0.000367   

          7         8         9  ...      1014      1015      1016      1017  \
0  0.004880  0.005261  0.005642  ... -0.001596 -0.001214 -0.000833 -0.000452   
1  0.002247  0.002630  0.003012  ... -0.004269 -0.003886 -0.003503 -0.003118   
2  0.002552  0.002985  0.003418  ... -0.004800 -0.004368 -0.003935 -0.003502   
3  0.003785  0.004169  0.004553  ... -0.002742 -0.002359 -0.001975 -0.001591   
4  0.000852  0.001339  0.001826  ... -0.007403 -0.006918 -0.006432 -0.005947   

       1018      1019      1020  

In [5]:
# 计算基本统计特征
mean_val = magnetic_flux_density_filled.mean(axis=1)  # 每行均值
std_val = magnetic_flux_density_filled.std(axis=1)    # 每行标准差
max_val = magnetic_flux_density_filled.max(axis=1)    # 每行最大值
min_val = magnetic_flux_density_filled.min(axis=1)    # 每行最小值
peak_to_peak = max_val - min_val                      # 每行峰峰值
skewness = magnetic_flux_density_filled.apply(lambda row: skew(row), axis=1)  # 每行偏度
kurt = magnetic_flux_density_filled.apply(lambda row: kurtosis(row), axis=1)  # 每行峰度

# 构建特征DataFrame
features_test = pd.DataFrame({
    'Mean': mean_val,
    'Std': std_val,
    'Max': max_val,
    'Min': min_val,
    'Peak-to-Peak': peak_to_peak,
    'Skewness': skewness,
    'Kurtosis': kurt
})
print(f"特征数据的shape：{features_test.shape}")
# 计算上升沿和下降沿的斜率
slope_rising = magnetic_flux_density.diff(axis=1).apply(lambda row: np.max(row), axis=1)
slope_falling = magnetic_flux_density.diff(axis=1).apply(lambda row: np.min(row), axis=1)
print(f"特征数据的shape：{features_test.shape}")
# 计算零交点数
zero_crossings = magnetic_flux_density.apply(lambda row: np.sum(np.diff(np.sign(row)) != 0), axis=1)

# 添加到特征表中
features_test['Slope_Rising'] = slope_rising
features_test['Slope_Falling'] = slope_falling
features_test['Zero_Crossings'] = zero_crossings

# 把温度、励磁波形和材料类型拼接到特征表中，并转换为one-hot编码
features_test['temperate'] = temperate
features_test['excitation_waveform'] = excitation_waveform
features_test['material_type'] = material_type
features_test = pd.get_dummies(features_test, columns=['temperate', 'excitation_waveform', 'material_type'])

# 频率数据也要加入特征表中
features_test['frequency'] = frequency

# 把magnetic_flux_density_filled也作为特征加入,该特征为2维,需多加几列
# magnetic_flux_density_filled = magnetic_flux_density_filled.T
# for i in range(magnetic_flux_density_filled.shape[1]):
#     features_test[f'magnetic_flux_density_{i}'] = magnetic_flux_density_filled.iloc[:, i]


# 输出magnetic_flux_density、feature和label的shape
print(f"磁通密度数据的shape：{magnetic_flux_density_filled.shape}")
print(f"特征数据的shape：{features_test.shape}")
print(f"label的shape：{len(y)}")

特征数据的shape：(400, 7)
特征数据的shape：(400, 7)
磁通密度数据的shape：(400, 1024)
特征数据的shape：(400, 22)
label的shape：12400


In [6]:
# 按样本序号填入相应的磁芯损耗预测结果，只保留小数点后1位；保存到Excel文件
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()
features_test = scaler.fit_transform(features_test)
y_test_pred = model.predict(features_test)
# 四舍五入,不要用np
y_test_pred_round = pd.Series(y_test_pred).round(1)
# 输出预测结果在附件四（Excel表）.xlsx，只从第三行第三列开始写入，其余别覆盖
y_test_pred_round.to_excel('附件四（Excel表）.xlsx', startrow=2, startcol=2, header=False, index=False)


In [7]:
# 特别把附件三中样本序号为：16、76、98、126、168、230、271、338、348、379的磁芯损耗预测结果，以表格形式呈现在论文正文中。
# 由于这里的数据是从0开始的，所以需要减1

# 选取样本序号
sample_indices = [16, 76, 98, 126, 168, 230, 271, 338, 348, 379]
# 选取样本
sample_indices = np.array(sample_indices)
selected_samples = pd.DataFrame(y_test_pred_round).iloc[sample_indices - 1]
# 索引用sample_indices
selected_samples.index = sample_indices
# 修改列名
selected_samples.columns = ['磁芯损耗，w/m3']
# 输出
print(selected_samples)

# 保存到Excel
selected_samples.to_excel('特殊预测结果样本序号_T4.xlsx', index=False)

     磁芯损耗，w/m3
16     11373.8
76   1218975.2
98     37511.5
126    11373.8
168   169743.2
230    71412.3
271  1588188.4
338    33837.3
348   682773.6
379    11373.8
