In [4]:
import numpy as np
import pandas as pd
import os
import glob

In [5]:
msg_paths = sorted(glob.glob("../data/results/*.csv"))

df = pd.read_csv(msg_paths[0])
df

Unnamed: 0,timestamp,TimeUS,Roll,Pitch,Yaw,Alt,Lat,Lng,Q1,Q2,Q3,Q4,label
0,2024-11-12 14:02:01,287589565,0.88,0.73,78.79,70.110001,35.928239,128.871155,0.772733,0.001907,0.009879,0.634652,0
1,2024-11-12 14:02:01,287688106,0.88,0.73,78.79,70.199997,35.928239,128.871155,0.772718,0.001856,0.009852,0.634671,0
2,2024-11-12 14:02:02,287788037,0.87,0.73,78.79,70.099998,35.928239,128.871155,0.772720,0.001804,0.009811,0.634669,0
3,2024-11-12 14:02:02,287888029,0.87,0.73,78.79,70.290001,35.928239,128.871155,0.772763,0.001860,0.009781,0.634617,0
4,2024-11-12 14:02:02,287988089,0.88,0.72,78.78,70.139999,35.928239,128.871155,0.772776,0.001957,0.009758,0.634601,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
125966,2024-11-15 14:50:43,1370091355,0.34,-0.06,253.20,72.439995,35.928328,128.871678,0.596299,0.001361,-0.002779,-0.802756,1
125967,2024-11-15 14:50:43,1370191390,0.35,-0.06,253.23,72.610001,35.928328,128.871678,0.596457,0.001387,-0.002793,-0.802639,1
125968,2024-11-15 14:50:43,1370291411,0.35,-0.05,253.24,72.639999,35.928328,128.871678,0.596554,0.001480,-0.002749,-0.802567,1
125969,2024-11-15 14:50:43,1370391419,0.35,-0.05,253.26,72.659996,35.928328,128.871678,0.596663,0.001463,-0.002817,-0.802486,1


In [6]:
# from pycaret.classification import *

# df = pd.read_csv(msg_paths[1])
# n_df = df.drop(["timestamp", "TimeUS"], axis=1)

# clf1 = setup(data=n_df, target='label', session_id=42, verbose=False)

# tree_models = ['rf', 'et', 'dt', 'ada', 'gbc']
# best_model = compare_models(include=tree_models)

# sample_df = df.sample(n=3000, random_state=42)  # 3000~5000개로 충분

# # 4. 모델 해석 (SHAP 기반)
# # interpret_model(best_model)

# # 5. 평가 지표 확인
# # evaluate_model(best_model)


In [None]:
from pycaret.classification import *
import shap
import numpy as np
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# 전체 결과를 저장할 리스트
all_shap_results = []

for i in range(len(msg_paths)):
    msg_name = os.path.basename(msg_paths[i]).split('_')[0]
    print(f"{msg_name} Tree모델 학습 및 Best Model SHAP 분석")

    try:
        # 데이터 로딩 및 정제
        df = pd.read_csv(msg_paths[i])
        n_df = df.drop(["timestamp", "TimeUS"], axis=1)

        # PyCaret 학습
        clf1 = setup(data=n_df, target='label', session_id=42, verbose=False)
        tree_models = ['rf', 'et', 'dt', 'ada', 'gbc']
        best_model = compare_models(include=tree_models, sort="F1")

        # 클래스 균형 샘플링
        normal_sample = n_df[n_df['label'] == 0].sample(n=1500, random_state=42)
        anomaly_sample = n_df[n_df['label'] == 1].sample(n=1500, random_state=42)
        sample_df = pd.concat([normal_sample, anomaly_sample])
        X = sample_df.drop(columns=['label'])

        # SHAP 계산
        explainer = shap.TreeExplainer(best_model)
        shap_values = explainer.shap_values(X)

        if isinstance(shap_values, np.ndarray) and shap_values.ndim == 3:
            shap_array = shap_values[:, :, 1]
        elif isinstance(shap_values, list):
            shap_array = np.array(shap_values[1])
        else:
            shap_array = np.array(shap_values)

        # 중요도 계산 및 필드 이름 추가
        shap_importance = pd.DataFrame({
            'feature': X.columns,
            'mean_abs_shap': np.abs(shap_array).mean(axis=0)
        })
        shap_importance['field'] = msg_name
        # shap_importance = shap_importance.sort_values(by='mean_abs_shap', ascending=False)
        shap_importance['shap_ratio'] = shap_importance['mean_abs_shap'] / shap_importance['mean_abs_shap'].sum()
        shap_importance = shap_importance[['field', 'feature', 'shap_ratio']]

        # 중요도 기준 정렬
        shap_importance = shap_importance.sort_values(by='shap_ratio', ascending=False)

        # 누적 저장
        all_shap_results.append(shap_importance)

    except Exception as e:
        print(f"XXXXX {msg_name} 분석 실패: {e} XXXXX")
        continue

# 전체 통합 테이블 생성 및 저장
all_shap_df = pd.concat(all_shap_results, ignore_index=True)
os.makedirs("shap_results", exist_ok=True)
all_shap_df.to_csv("shap_results/all_fields_shap_importance_ratio.csv", index=False)
print("\n 모든 필드 SHAP 중요도 통합 저장 완료: shap_results/all_fields_shap_importance_ratio.csv")



AHR2 Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.714
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.667
ada,Ada Boost Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.36
gbc,Gradient Boosting Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.731
dt,Decision Tree Classifier,0.9999,0.9999,0.9999,0.9999,0.9999,0.9998,0.9998,0.047


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

ATT Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
from pycaret.classification import *
import shap
import numpy as np
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# 전체 결과를 저장할 리스트
all_shap_results = []
all_model_results = []


for i in range(len(msg_paths)):
    msg_name = os.path.basename(msg_paths[i]).split('_')[0]
    print(f"{msg_name} Tree모델 학습 및 Best Model SHAP 분석")

    try:
        # 데이터 로딩 및 정제
        df = pd.read_csv(msg_paths[i])
        n_df = df.drop(["timestamp", "TimeUS"], axis=1)

        # PyCaret 학습
        clf1 = setup(data=n_df, target='label', session_id=42, verbose=False)
        tree_models = ['rf', 'et', 'dt', 'ada', 'gbc']
        best_model = compare_models(include=tree_models, sort="F1")

        # 모델 성능 테이블 추출 및 필드명 추가
        model_result = pull()
        model_result['field'] = msg_name
        model_result = model_result[['field', 'Model', 'Accuracy', 'AUC', 'Recall', 'Prec.', 'F1', 'Kappa', 'MCC', 'TT (Sec)']]
        all_model_results.append(model_result)


        # 클래스 균형 샘플링
        normal_sample = n_df[n_df['label'] == 0].sample(n=1500, random_state=42)
        anomaly_sample = n_df[n_df['label'] == 1].sample(n=1500, random_state=42)
        sample_df = pd.concat([normal_sample, anomaly_sample])
        X = sample_df.drop(columns=['label'])

        # SHAP 계산
        explainer = shap.TreeExplainer(best_model)
        shap_values = explainer.shap_values(X)

        if isinstance(shap_values, np.ndarray) and shap_values.ndim == 3:
            shap_array = shap_values[:, :, 1]
        elif isinstance(shap_values, list):
            shap_array = np.array(shap_values[1])
        else:
            shap_array = np.array(shap_values)

        # 중요도 계산 및 필드 이름 추가
        shap_importance = pd.DataFrame({
            'feature': X.columns,
            'mean_abs_shap': np.abs(shap_array).mean(axis=0)
        })
        shap_importance['field'] = msg_name
        # shap_importance = shap_importance.sort_values(by='mean_abs_shap', ascending=False)
        shap_importance['shap_ratio'] = shap_importance['mean_abs_shap'] / shap_importance['mean_abs_shap'].sum()
        shap_importance = shap_importance[['field', 'feature', 'shap_ratio']]

        # 중요도 기준 정렬
        shap_importance = shap_importance.sort_values(by='shap_ratio', ascending=False)

        # 누적 저장
        all_shap_results.append(shap_importance)

    except Exception as e:
        print(f"XXXXX {msg_name} 분석 실패: {e} XXXXX")
        continue

# 전체 통합 테이블 생성 및 저장
all_shap_df = pd.concat(all_shap_results, ignore_index=True)
os.makedirs("shap_results", exist_ok=True)
all_shap_df.to_csv("shap_results/all_fields_shap_importance_ratio.csv", index=False)
print("\n 모든 필드 SHAP 중요도 통합 저장 완료: shap_results/all_fields_shap_importance_ratio.csv")

all_model_df = pd.concat(all_model_results, ignore_index=True)
all_model_df.to_csv("shap_results/all_fields_model_comparison.csv", index=False)
print("\n 모든 필드 모델 성능 비교 결과 저장 완료: shap_results/all_fields_model_comparison.csv")




AHR2 Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.788
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.369
ada,Ada Boost Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.356
gbc,Gradient Boosting Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.046
dt,Decision Tree Classifier,0.9999,0.9999,0.9999,0.9999,0.9999,0.9998,0.9998,0.043


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

ATT Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9993,1.0,0.9989,0.9997,0.9993,0.9986,0.9986,0.27
rf,Random Forest Classifier,0.9987,1.0,0.9978,0.9996,0.9987,0.9973,0.9973,0.499
dt,Decision Tree Classifier,0.9977,0.9977,0.998,0.9974,0.9977,0.9954,0.9954,0.038
gbc,Gradient Boosting Classifier,0.9931,0.9996,0.9921,0.9942,0.9932,0.9862,0.9862,0.752
ada,Ada Boost Classifier,0.983,0.9988,0.9856,0.9808,0.9832,0.9659,0.966,0.191


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

BARO Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.28
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.431
dt,Decision Tree Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.023
ada,Ada Boost Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.273
gbc,Gradient Boosting Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.082


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

BAT Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.376
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.17
ada,Ada Boost Classifier,1.0,1.0,0.9999,1.0,1.0,0.9999,0.9999,0.292
dt,Decision Tree Classifier,0.9999,0.9999,0.9998,0.9999,0.9999,0.9997,0.9997,0.039
gbc,Gradient Boosting Classifier,0.9999,1.0,0.9999,1.0,0.9999,0.9999,0.9999,1.593


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

CANS Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7691,0.8415,0.7789,0.7674,0.773,0.5381,0.5383,0.062
rf,Random Forest Classifier,0.756,0.8262,0.7668,0.7542,0.7603,0.512,0.5122,0.151
dt,Decision Tree Classifier,0.7285,0.7284,0.7324,0.7307,0.7314,0.4569,0.4572,0.02
gbc,Gradient Boosting Classifier,0.5695,0.6151,0.867,0.5463,0.6698,0.1339,0.1705,0.093
ada,Ada Boost Classifier,0.539,0.5687,0.8982,0.5253,0.6629,0.0714,0.104,0.031


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

CTRL Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9962,0.9996,0.9967,0.9957,0.9962,0.9923,0.9923,0.237
rf,Random Forest Classifier,0.9956,0.9994,0.9957,0.9955,0.9956,0.9912,0.9912,1.088
dt,Decision Tree Classifier,0.9928,0.9928,0.9929,0.9928,0.9929,0.9856,0.9856,0.067
gbc,Gradient Boosting Classifier,0.9928,0.9993,0.9933,0.9924,0.9928,0.9855,0.9855,1.673
ada,Ada Boost Classifier,0.9875,0.9986,0.9891,0.9862,0.9876,0.975,0.975,0.284


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

CTUN Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.616
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.267
dt,Decision Tree Classifier,0.9994,0.9994,0.9995,0.9992,0.9994,0.9987,0.9987,0.059
gbc,Gradient Boosting Classifier,0.9994,1.0,0.9998,0.9991,0.9994,0.9988,0.9988,2.088
ada,Ada Boost Classifier,0.9979,0.9999,0.9984,0.9974,0.9979,0.9958,0.9958,0.375


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

DCM Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9737,0.9955,0.9809,0.9674,0.9741,0.9474,0.9475,0.543
et,Extra Trees Classifier,0.9731,0.9959,0.9834,0.9641,0.9737,0.9463,0.9465,0.226
dt,Decision Tree Classifier,0.9633,0.9633,0.9626,0.9646,0.9636,0.9266,0.9266,0.038
gbc,Gradient Boosting Classifier,0.9583,0.9914,0.9763,0.9432,0.9595,0.9167,0.9172,0.97
ada,Ada Boost Classifier,0.9412,0.9846,0.9581,0.9277,0.9427,0.8823,0.8828,0.23


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

DSF Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.6052,0.6581,0.5376,0.6275,0.5788,0.2115,0.2139,0.102
dt,Decision Tree Classifier,0.5707,0.5705,0.5813,0.5739,0.5774,0.1411,0.1412,0.007
et,Extra Trees Classifier,0.6002,0.6551,0.5365,0.6206,0.5754,0.2014,0.2034,0.063
ada,Ada Boost Classifier,0.6021,0.6566,0.4356,0.661,0.5247,0.2069,0.2204,0.021
gbc,Gradient Boosting Classifier,0.6121,0.6634,0.3922,0.7101,0.505,0.2277,0.2551,0.059


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

DU32 Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.5055,0.4995,0.9901,0.5052,0.669,0.0016,0.0088,0.038
gbc,Gradient Boosting Classifier,0.506,0.5004,0.9888,0.5055,0.6689,0.0025,0.0127,0.013
et,Extra Trees Classifier,0.5062,0.5004,0.9879,0.5056,0.6688,0.003,0.0149,0.029
dt,Decision Tree Classifier,0.5062,0.5004,0.9879,0.5056,0.6688,0.003,0.0149,0.004
ada,Ada Boost Classifier,0.5058,0.5007,0.9881,0.5054,0.6687,0.0023,0.011,0.015


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

FTN1 Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9944,0.9997,0.9931,0.9959,0.9945,0.9888,0.9888,0.855
et,Extra Trees Classifier,0.9942,0.9997,0.9923,0.9961,0.9942,0.9884,0.9884,0.384
gbc,Gradient Boosting Classifier,0.9925,0.9995,0.9922,0.9929,0.9925,0.985,0.985,2.328
dt,Decision Tree Classifier,0.9874,0.9874,0.9876,0.9874,0.9875,0.9748,0.9748,0.101
ada,Ada Boost Classifier,0.9863,0.9988,0.9855,0.9874,0.9864,0.9726,0.9726,0.432


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

FTN2 Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9943,0.9997,0.9946,0.994,0.9943,0.9885,0.9885,7.072
et,Extra Trees Classifier,0.9938,0.9997,0.9946,0.993,0.9938,0.9875,0.9875,3.348
gbc,Gradient Boosting Classifier,0.9906,0.9995,0.9921,0.9894,0.9907,0.9813,0.9813,13.405
dt,Decision Tree Classifier,0.9883,0.9883,0.9886,0.9883,0.9884,0.9766,0.9766,0.758
ada,Ada Boost Classifier,0.9815,0.9983,0.9839,0.9796,0.9817,0.9631,0.9631,2.721


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

GPA Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,1.0,1.0,1.0,0.9999,1.0,0.9999,0.9999,0.247
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.191
dt,Decision Tree Classifier,0.9999,0.9999,0.9998,0.9999,0.9999,0.9997,0.9997,0.139
gbc,Gradient Boosting Classifier,0.9983,1.0,0.9988,0.9978,0.9983,0.9966,0.9966,0.292
ada,Ada Boost Classifier,0.9814,0.9986,0.9833,0.9799,0.9816,0.9627,0.9627,0.093


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

GPS Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.119
et,Extra Trees Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.074
dt,Decision Tree Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.011
ada,Ada Boost Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.014
gbc,Gradient Boosting Classifier,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.225


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

HEAT Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.7927,0.8938,0.7899,0.7976,0.7936,0.5853,0.5856,0.35
rf,Random Forest Classifier,0.7723,0.8765,0.7697,0.7771,0.7733,0.5445,0.5447,0.091
ada,Ada Boost Classifier,0.7614,0.8608,0.763,0.764,0.7633,0.5227,0.523,0.026
et,Extra Trees Classifier,0.7614,0.8456,0.7569,0.7674,0.7619,0.5228,0.523,0.055
dt,Decision Tree Classifier,0.7525,0.7529,0.7506,0.7572,0.7538,0.505,0.5052,0.006


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

IMU Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9921,0.9996,0.9926,0.9918,0.9922,0.9842,0.9842,2.139
rf,Random Forest Classifier,0.9907,0.9995,0.9905,0.991,0.9907,0.9813,0.9813,2.902
dt,Decision Tree Classifier,0.9847,0.9847,0.9846,0.985,0.9848,0.9693,0.9693,0.46
gbc,Gradient Boosting Classifier,0.9829,0.9984,0.9864,0.9799,0.9831,0.9658,0.9658,6.887
ada,Ada Boost Classifier,0.9782,0.9976,0.9818,0.9751,0.9785,0.9564,0.9564,1.689


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

IOMC Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.6995,0.6915,0.9844,0.6294,0.7679,0.3957,0.483,0.023
gbc,Gradient Boosting Classifier,0.6966,0.679,0.9799,0.6278,0.7653,0.3897,0.4742,0.062
rf,Random Forest Classifier,0.5899,0.6417,0.6622,0.5827,0.6198,0.1785,0.1803,0.336
dt,Decision Tree Classifier,0.6027,0.6027,0.6084,0.6063,0.6072,0.2053,0.2054,0.032
et,Extra Trees Classifier,0.5858,0.6666,0.6223,0.5845,0.6027,0.171,0.1714,0.238


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

XXXXX IOMC 분석 실패: Model type not yet supported by TreeExplainer: <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'> XXXXX
MAG Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.966,0.9956,0.9632,0.9693,0.9663,0.9321,0.9321,0.286
rf,Random Forest Classifier,0.9632,0.9947,0.9611,0.9659,0.9635,0.9264,0.9265,0.433
dt,Decision Tree Classifier,0.9499,0.9499,0.9503,0.9506,0.9504,0.8999,0.8999,0.037
gbc,Gradient Boosting Classifier,0.9456,0.9901,0.9367,0.9546,0.9456,0.8911,0.8913,0.935
ada,Ada Boost Classifier,0.9224,0.9786,0.9129,0.932,0.9224,0.8448,0.845,0.208


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

MAVC Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.5036,0.5107,0.5548,0.5071,0.5223,0.0059,0.006,0.009
rf,Random Forest Classifier,0.5108,0.5107,0.5191,0.514,0.5157,0.0215,0.0217,0.031
et,Extra Trees Classifier,0.5108,0.5107,0.5191,0.514,0.5157,0.0215,0.0217,0.025
dt,Decision Tree Classifier,0.5108,0.5107,0.5191,0.514,0.5157,0.0215,0.0217,0.004
ada,Ada Boost Classifier,0.5108,0.5107,0.5191,0.514,0.5157,0.0215,0.0217,0.01


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

XXXXX MAVC 분석 실패: Cannot take a larger sample than population when 'replace=False' XXXXX
MAV Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9647,0.9871,0.964,0.966,0.965,0.9294,0.9294,0.079
rf,Random Forest Classifier,0.9396,0.982,0.9389,0.9414,0.9401,0.8791,0.8792,0.148
dt,Decision Tree Classifier,0.8986,0.8988,0.8983,0.9006,0.8994,0.7972,0.7972,0.013
gbc,Gradient Boosting Classifier,0.6399,0.7074,0.8264,0.6055,0.6985,0.2772,0.2991,0.127
ada,Ada Boost Classifier,0.5431,0.5616,0.6398,0.5402,0.5839,0.0844,0.0869,0.041


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

MCU Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9997,1.0,0.9997,0.9997,0.9997,0.9993,0.9993,0.247
et,Extra Trees Classifier,0.9997,1.0,0.9997,0.9997,0.9997,0.9994,0.9994,0.157
dt,Decision Tree Classifier,0.9996,0.9998,0.9996,0.9996,0.9996,0.9992,0.9992,0.014
ada,Ada Boost Classifier,0.9996,1.0,0.9994,0.9998,0.9996,0.9991,0.9991,0.107
gbc,Gradient Boosting Classifier,0.9994,1.0,0.9991,0.9996,0.9994,0.9987,0.9987,0.555


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

MOTB Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.6257,0.6828,0.6855,0.6162,0.649,0.2506,0.2522,0.733
ada,Ada Boost Classifier,0.6169,0.6677,0.6667,0.6104,0.6373,0.2331,0.2341,0.19
rf,Random Forest Classifier,0.593,0.6464,0.6095,0.5945,0.6019,0.1857,0.1858,1.22
et,Extra Trees Classifier,0.5907,0.6409,0.6085,0.592,0.6001,0.181,0.1811,0.606
dt,Decision Tree Classifier,0.5722,0.5722,0.5734,0.5767,0.5751,0.1443,0.1443,0.056


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

PM Tree모델 학습 및 Best Model SHAP 분석


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.9615,0.9875,0.9686,0.9567,0.9622,0.923,0.9241,0.014
gbc,Gradient Boosting Classifier,0.9547,0.9886,0.9685,0.9441,0.9557,0.9094,0.9107,0.015
et,Extra Trees Classifier,0.9524,0.9895,0.9462,0.9601,0.9524,0.9049,0.9062,0.027
rf,Random Forest Classifier,0.9513,0.9901,0.9552,0.95,0.9518,0.9026,0.9042,0.033
dt,Decision Tree Classifier,0.94,0.9401,0.9305,0.9509,0.9396,0.8801,0.8819,0.004


Processing:   0%|          | 0/25 [00:00<?, ?it/s]

XXXXX PM 분석 실패: Cannot take a larger sample than population when 'replace=False' XXXXX

 모든 필드 SHAP 중요도 통합 저장 완료: shap_results/all_fields_shap_importance_ratio.csv

 모든 필드 모델 성능 비교 결과 저장 완료: shap_results/all_fields_model_comparison.csv
