### 从UCI加载数据，转化格式，转换为数字形式，并保存为mat数据 
X为特征数据：num_instances*features
Y为lable：(num_instances,1) 是一个列向量

In [5]:

import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import pandas as pd
from ucimlrepo import fetch_ucirepo
from scipy.io import savemat
# 76 Nursery 
# 30 Contraceptive Method Choice
# 146 Satellite
# 33 Dermatology
# 23 Chess
# 数据集
uci_dataset = fetch_ucirepo(id=23)
# 示例数据集
data = uci_dataset.data
print(data.headers.tolist())  # 
# 1. 特征和标签分离
X = data.features.values
X_columns = data.features.columns.tolist()
y = data.targets.values

# 3. 特征编码
label_encoder = LabelEncoder()
X_encoded = np.copy(X)
# 沿第0维（删除行）
#X_encoded = np.delete(X_encoded, [33,34,35,36,262,263,264,265], axis=0)
# 将特征数据每一列都进行编码
for i in range(X.shape[1]):
    X_encoded[:, i] = label_encoder.fit_transform(X_encoded[:, i])
y_encoded = label_encoder.fit_transform(y)
#y_encoded = np.delete(y_encoded, [33,34,35,36,262,263,264,265], axis=0)
y_encoded = label_encoder.fit_transform(y_encoded)
#indexs = np.where(y_encoded == 2)

#X_encoded = np.delete(X_encoded, indexs[0], axis=0)
# 转成float格式
X_encoded = X_encoded.astype(int)
#y_encoded = np.delete(y_encoded, indexs[0], axis=0)
y_encoded = label_encoder.fit_transform(y_encoded)
y_encoded = y_encoded.astype(int)
# 保存为 .mat 文件
data_dict = {'X': X_encoded, 'Y': y_encoded.reshape(-1, 1)}  # 以字典形式存储
savemat('Chess2.mat', data_dict)

# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.3, random_state=42)

# 数据标准化
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(15,), max_iter=100, random_state=42,learning_rate_init=0.001)
mlp.fit(X_train, y_train)
index_pred_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)

# 输出结果
print("准确率:", accuracy_score(y_test, y_pred))
print("\n分类报告:\n", classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print(index_pred_proba.shape)
index_pred_proba

['white-king-file', 'white-king-rank', 'white-rook-file', 'white-rook-rank', 'black-king-file', 'black-king-rank', 'white-depth-of-win']


  y = column_or_1d(y, warn=True)


准确率: 0.4593085422359511

分类报告:
               precision    recall  f1-score   support

           0       0.90      0.97      0.94       820
           1       0.37      0.36      0.36       426
           2       0.37      0.24      0.29       884
           3       0.46      0.44      0.45       626
           4       0.27      0.21      0.24       147
           5       0.48      0.50      0.49        68
           6       0.48      0.64      0.55      1397
           7       0.35      0.39      0.37       522
           8       0.50      0.03      0.06        29
           9       0.21      0.04      0.07       192
          10       0.38      0.48      0.42       180
          11       0.57      0.14      0.23       118
          12       0.24      0.20      0.22       577
          13       0.41      0.47      0.44      1237
          14       0.50      0.23      0.32        26
          15       0.40      0.37      0.38      1082
          16       0.49      0.60      0.54      



array([[9.70941352e-14, 6.31317102e-02, 1.03932421e-01, ...,
        7.60600639e-02, 1.92285936e-01, 1.85923438e-03],
       [1.46514314e-12, 1.79564016e-05, 1.68629826e-03, ...,
        1.41101762e-02, 6.27054277e-06, 2.61333616e-08],
       [2.05180601e-09, 1.20052959e-03, 6.72158615e-02, ...,
        1.57311302e-01, 1.56115986e-08, 6.32055249e-10],
       ...,
       [3.05684107e-12, 3.97844365e-04, 6.05971942e-02, ...,
        1.50135720e-01, 2.63168687e-09, 3.13058540e-11],
       [8.04120795e-01, 4.86604060e-04, 3.79029110e-03, ...,
        1.19521272e-02, 8.04300139e-06, 7.67657100e-05],
       [2.98150340e-05, 4.16257026e-02, 1.87096464e-01, ...,
        5.32350902e-02, 8.73142255e-09, 3.07115178e-07]])

## numpy转mat的demo

In [None]:
import numpy as np
from scipy.io import savemat

# 示例数据
X = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])  # 特征数据
y = np.array([[0, 1, 0]])  # 标签数据
print(X.shape)
print(y.shape)
y = y.reshape(-1, 1)
# 保存为 .mat 文件
data_dict = {'X': X, 'y': y}  # 以字典形式存储
savemat('dataset.mat', data_dict)

print("数据已保存为 dataset.mat 文件。")

from scipy.io import loadmat

# 加载 .mat 文件
loaded_data = loadmat('dataset.mat')

XX = loaded_data['X']
YY = loaded_data['y']
# 查看加载的数据
print("加载的 X：\n", XX)
print("加载的 y：\n", YY)
print(YY[:, 0].shape)

### 测试保存的mat

In [None]:
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import confusion_matrix
from utils.dataset_utils import get_classes_indexes_counts
# 导入必要的库
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
# 加载鸢尾花数据集
# data = load_iris()
mat_data = sio.loadmat('Satellite.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签
classes, counts = get_classes_indexes_counts(y)
print(counts)
print(X.shape)
print(y.shape)
# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# classes, counts = get_classes_indexes_counts(np.argmax(y_test, axis=1))
classes, counts = get_classes_indexes_counts(y_test)
print(counts)
# 数据标准化
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)
# One-hot encode target variable 强制将类别转换为0-1序列，0表示不是该类，1表示属于该类

# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(15,), max_iter=200, random_state=42,learning_rate_init=0.001)
# mlp.fit(X_train, np.argmax(y_train, axis=1))
mlp.fit(X_train, y_train)
print(mlp.classes_)
index_pred_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)
#y_test_labels = np.argmax(y_test, axis=1)
# y_pred_labels = np.argmax(y_pred, axis=1)
y_pred_labels = y_pred
# 输出结果
print("准确率:", accuracy_score(y_test, y_pred_labels))
print("\n分类报告:\n", classification_report(y_test, y_pred_labels))
# 打印混淆矩阵
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_labels))

print(index_pred_proba.shape)
res=np.sum(index_pred_proba, axis=1)
print(res)

### k-folds交叉验证


In [None]:
from scipy.stats import gmean
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
# 导入必要的库
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix
from utils.dataset_utils import get_classes_indexes_counts
import scipy.io as sio  # 从.mat文件中读取数据集
# 加载鸢尾花数据集
# iris = load_iris()
# 数据集

mat_data = sio.loadmat('../data/dataset/Connect4.mat')
# 提取变量
dataset_x = mat_data['X']
dataset_y = mat_data['Y'][:, 0]  # mat_data['Y']得到的形状为[n,1]，通过[:,0]，得到形状[n,]
# X, y = iris.data, iris.target
X, y = mat_data['X'], mat_data['Y'][:, 0]
# 统计每个类别的个数
classes, counts = get_classes_indexes_counts(y)  #np.argmax(y_onehot, axis=1)找最大值的索引，将0-1序列转化为0,1,2,3......的整数标签
print("每种类别的分布：", counts)
print("#########################5-folds交叉验证#########################")
# 定义MLP模型
mlp_model = MLPClassifier(hidden_layer_sizes=(20,), max_iter=1000, random_state=42,learning_rate_init=0.001)

# 定义5折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 使用交叉验证进行预测
y_pred = cross_val_predict(mlp_model, X, y, cv=kf)
y_pred_proba = cross_val_score(mlp_model, X, y, cv=kf)
# 输出分类结果报告
report = classification_report(y, y_pred)
#print(report)
# 计算 ROC AUC（ovo+macro）
#auc_ovo_macro = roc_auc_score(y, y_pred_proba, multi_class="ovo", average="macro")
cm = confusion_matrix(y, y_pred)
# 计算每类召回率（每类正确预测个数 / 该类总数）
recall_per_class = cm.diagonal() / cm.sum(axis=1)

# 计算G-Mean
geometric_mean = gmean(recall_per_class)
#print(f"最终的分类结果：Recall_Per_Class{recall_per_class}，Gmean：{geometric_mean}，mAUC：{auc_ovo_macro}")
print(f"最终的分类结果：Recall_Per_Class{recall_per_class}，Gmean：{geometric_mean}")


x_train, x_test, y_train, y_test = train_test_split(dataset_x, dataset_y, test_size=0.3, random_state=5)
# 统计每个类别的个数 
classes_train, counts_train = get_classes_indexes_counts(y_train)
print("训练集每种类别的分布：", counts_train)
classes_test, counts_test = get_classes_indexes_counts(y_test)
print("测试集每种类别的分布：", counts_test)
print("#########################NoS原始数据集训练预测结果#########################")
mlp = MLPClassifier(hidden_layer_sizes=(20,), max_iter=1000, random_state=42,learning_rate_init=0.001)
mlp.fit(x_train, y_train)

# 使用训练数据进行预测
ind_pred = mlp.predict(x_test)  # 计算accuracy、PPV
index_pred_proba = mlp.predict_proba(x_test)  # 计算mAUC
######################G-mean#########################
# 计算混淆矩阵
cm = confusion_matrix(y_test, ind_pred)

# 计算每类召回率（每类正确预测个数 / 该类总数）
recall_per_class = cm.diagonal() / cm.sum(axis=1)

# 计算G-Mean
geometric_mean = gmean(recall_per_class)
######################mAUC#######################
# 计算 ROC AUC（ovo+macro）
auc_ovo_macro = roc_auc_score(y_test, index_pred_proba, multi_class="ovo", average="macro")
print(f"最终的分类结果：Recall_Per_Class{recall_per_class}，Gmean：{geometric_mean}，mAUC：{auc_ovo_macro}")

In [None]:
import random

# 设置参数
lambda_ = 1.2  # 指数分布的参数λ（lambda）
threshold = 1.0  # 阈值（阈值决定了生成0或1）

def generate_sequence(n):
    sequence = []
    for _ in range(n):
        # 生成一个指数分布的随机数
        value = random.expovariate(lambda_)
        # 根据值与阈值的比较，生成 0 或 1
        if value < threshold:
            sequence.append(1)
        else:
            sequence.append(0)
    return sequence

# 生成一个包含100个元素的0和1的序列
sequence = generate_sequence(100)
print(sequence)
random.randint(1,1)