In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# 加载模型
import joblib
best_clf = joblib.load('../models/random_forest_model.pkl')

# 加载测试数据
data = pd.read_csv('../data/processed/reduced_features_data.csv')
X = data.drop('Label', axis=1)
y = data['Label']

# 预测
y_pred = best_clf.predict(X)

# 混淆矩阵
conf_matrix = confusion_matrix(y, y_pred)
print(conf_matrix)

# 绘制混淆矩阵
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['BENIGN', 'MALICIOUS'], yticklabels=['BENIGN', 'MALICIOUS'])
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.title('Confusion Matrix')
plt.savefig('../results/confusion_matrix.png')
plt.show()

# 输出分类报告
report = classification_report(y, y_pred)
print(report)

# 保存分类报告
with open('../results/classification_report.txt', 'w') as f:
    f.write(report)
