### 从UCI加载数据，转化格式，转换为数字形式，并保存为mat数据 
X为特征数据：num_instances*features
Y为lable：(num_instances,1) 是一个列向量

In [None]:

import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import pandas as pd
from ucimlrepo import fetch_ucirepo
from scipy.io import savemat

# 数据集
uci_dataset = fetch_ucirepo(id=76)
# 示例数据集
data = uci_dataset.data
print(data.headers.tolist())  # 
# 1. 特征和标签分离
X = data.features.values
X_columns = data.features.columns.tolist()
y = data.targets.values

# 3. 特征编码
label_encoder = LabelEncoder()
X_encoded = np.copy(X)
for i in range(X.shape[1]):
    X_encoded[:, i] = label_encoder.fit_transform(X_encoded[:, i])
y_encoded = label_encoder.fit_transform(y)
indexs = np.where(y_encoded == 2)

X_encoded = np.delete(X_encoded, indexs[0], axis=0)
X_encoded = X_encoded.astype(float)
y_encoded = np.delete(y_encoded, indexs[0], axis=0)
y_encoded = label_encoder.fit_transform(y_encoded)
# 保存为 .mat 文件
data_dict = {'X': X_encoded, 'Y': y_encoded.reshape(-1, 1)}  # 以字典形式存储
savemat('Nursery.mat', data_dict)

# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.3, random_state=42)

# 数据标准化
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(20,), max_iter=100, random_state=42)
mlp.fit(X_train, y_train)
index_pred_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)

# 输出结果
print("准确率:", accuracy_score(y_test, y_pred))
print("\n分类报告:\n", classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print(index_pred_proba.shape)
index_pred_proba

## numpy转mat的demo

In [None]:
import numpy as np
from scipy.io import savemat

# 示例数据
X = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])  # 特征数据
y = np.array([[0, 1, 0]])  # 标签数据
print(X.shape)
print(y.shape)
y = y.reshape(-1, 1)
# 保存为 .mat 文件
data_dict = {'X': X, 'y': y}  # 以字典形式存储
savemat('dataset.mat', data_dict)

print("数据已保存为 dataset.mat 文件。")

from scipy.io import loadmat

# 加载 .mat 文件
loaded_data = loadmat('dataset.mat')

XX = loaded_data['X']
YY = loaded_data['y']
# 查看加载的数据
print("加载的 X：\n", XX)
print("加载的 y：\n", YY)
print(YY[:, 0].shape)

### 测试保存的mat

In [4]:
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.metrics import confusion_matrix
# 导入必要的库
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集

# 加载鸢尾花数据集
# data = load_iris()
mat_data = sio.loadmat('Nursery.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签
X=X.squeeze()
print(X.shape)
print(y.shape)
# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
classes, counts = get_classes_indexes_counts(y_test)
print(counts)
# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(10, 20), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
index_pred_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)

# 输出结果
print("准确率:", accuracy_score(y_test, y_pred))
print("\n分类报告:\n", classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print(index_pred_proba.shape)
index_pred_proba

FileNotFoundError: [Errno 2] No such file or directory: 'Nursery1.mat'