In [None]:
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier as XGBC  # sklearn中的xgboost模块
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb  # xgboost自己独立的库
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.metrics import mean_squared_error as MSE
import pandas as pd
import numpy as np
import data_read as dr
import os
import matplotlib.pyplot as plt

In [None]:
current_path = os.getcwd()
base_path = os.path.dirname(current_path)
base_path

In [None]:
# 数据存放路径
data_path = base_path + r"\data\01feature\texture"
out_path = base_path + r"\data\02texture_win_select\wv2_texture_2m_win2.xlsx"

files = os.listdir(data_path)  # 返回指定路径下的文件和文件夹列表
print(files)
# data_ = data.copy()  # 保护原始数据不收破坏
AC = [[] for k in range(14)]
csv_name = []
j = 0
for eachfile in files:
    # TODO(1)读取数据
    if eachfile.endswith('.csv'):

        csv_path = os.path.join(os.path.abspath(data_path), eachfile)  # 获取每个csv文件的绝对路径
        csv_name.append(eachfile.split('.')[0])
        data = dr.read_data_from_csv(csv_path)
        data_ = data.copy()  # 保护原始数据不收破坏

        # TODO(2)数据预处理-决策树和树的集成算法们，对决策树不需要无量纲化，决策树可以把任意数据都处理得很好
        data_ = data.replace([np.inf, -np.inf], np.nan).dropna(axis=0)  # 处理异常值
        x = data_.iloc[:, 1:]
        y = data_.iloc[:, 0]

        # TODO(3)训练模型
        for i in range(0, 100):
            xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.33, stratify=y)
            dtrain = xgb.DMatrix(xtrain, label=ytrain)
            dtest = xgb.DMatrix(xtest, label=ytest)

            param = {'verbosity': 0
                     , 'objective': 'multi:softmax'
                     , 'num_class': 8
                     , 'eta': 0.1}
            num_boost_round = 500

            clf = xgb.train(param, dtrain, num_boost_round)

            y_hat = clf.predict(dtest)

            AC[j].append(accuracy_score(ytest, y_hat))
            
        j = j+1

        print('准确率AC=', AC)

AC_mean = np.mean(AC, axis=1)
AC_std = np.std(AC, axis=1)

result_accuracy = pd.concat([pd.DataFrame(AC_mean), pd.DataFrame(AC_std)], axis=1)  # 按列合并
result_accuracy.columns = ['AC_mean', 'AC_std']
result_accuracy.index = csv_name
AC_out = pd.DataFrame(AC)
AC_out.index = csv_name

with pd.ExcelWriter(out_path, mode='w', engine='openpyxl') as writer:
    result_accuracy.to_excel(writer, sheet_name='result_accuracy', index=True, startcol=0, startrow=0)
    AC_out.to_excel(writer, sheet_name='AC', index=True, startcol=0, startrow=0)

In [None]:
data_ac = pd.read_excel(out_path, sheet_name='result_accuracy')
data_ac

In [None]:
x = np.arange(3,30,2)
y1 = data_ac['AC_mean'].values.tolist()
y1_= data_ac['AC_mean'].values
y2 = data_ac['AC_std'].values.tolist()
y2_ = data_ac['AC_std'].values
y5 = y1_ + y2_
y5

In [None]:
def fun(point1,point2):
    if point1[0]==point2[0]:
        a, b= 0,0
    else:
        a = (point1[1]-point2[1])/(point1[0]-point2[0])
        b = point1[1]-a*point1[0]
    return a,b

In [None]:
p1 = (x[0], y1_[0])
p2 = (x[-1], y1_[-1])
k,b = fun(p1,p2)
y3 = k*x + b

In [None]:
import matplotlib.patches as mpc

plt.figure(num=1,figsize=(6.5,3.5), dpi=300)
ax1=plt.gca()

ax1.fill_between(x, y1_, y3, facecolor='#6495ED', alpha=0.9)
ax1.fill_between(x, y1_, y5, facecolor='#FFFF00', alpha=0.9)

ax1.set_xlim(x[0]-2, x[-1]+2)
ax1.set_xticks(x[::1])
ax1.set_xticklabels(x[::1],fontsize=10, horizontalalignment='center')
ax1.set(ylim=[0.63, 0.90])
ax1.set_yticks([0.60, 0.70,0.80,0.90])
ax1.set_yticklabels(['60%', '70%','80%','90%'], fontsize=8)
# ax1.set_title('Texture Feature Selection Results', fontsize=9)
ax1.plot(x, y1_, color='black',linewidth=0.8, alpha=0.8)
ax1.plot(x, y3, color='black',linewidth=0.8, alpha=0.8)
ax1.plot(x, y5, color='black',linewidth=0.8, alpha=0.8)
ax1.set_xlabel(r'Window Size(pixel)', fontsize=8)
ax1.set_ylabel(r'Mean Accuracy', fontsize=8)
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.tick_params(axis='x', direction='in', length=3,width=0.7,pad=5,labelsize=8,labelrotation=0)
ax1.tick_params(axis='y', direction='in', length=3,width=0.7,pad=5,labelsize=8)

# 绘制矩形图例参数
handles = [mpc.Rectangle((0, 0), 1, 2, fc="#FFFF00", ec="black",
                                 lw=0.5, alpha=0.8)]
ax1.legend(handles
           ,['standard deviation']
           ,fontsize=9
           ,loc=2
           ,framealpha=0
           ,handlelength=1
           ,handletextpad=0.5)

# 数据标签及垂直虚线
for x_, y_ in zip(x, y1_):
    # ax1.text(x_,y_+1, '%.1f' % y_, fontsize=8, rotation=8, horizontalalignment='center')
    ax1.text(x_-0.5,y_+0.01, '{:.1%}'.format(y_), fontsize=7, rotation=10, horizontalalignment='center')
    ax1.vlines(x_, 0, y_,linestyles='dashed',colors='dimgray',linewidth=0.9)
plt.savefig(base_path + r"\result\texture_select_win.png")
plt.show()