# NER模型评估脚本

使用方法:
    python src/evaluate.py --model_path models/transformer_crf_model.h5 --processor_path models/transformer_crf_processor.pkl

## Notebook运行提示
- 代码已拆分为多个小单元, 按顺序运行即可在每一步观察输出与中间变量。
- 涉及 `Path(__file__)` 或相对路径的脚本会自动注入 `__file__` 解析逻辑, Notebook 环境下也能引用原项目资源。
- 可在每个单元下追加说明或参数试验记录, 以跟踪核心算法和数据处理步骤。


In [None]:
# Notebook路径自适应处理
import pathlib as _nb_pathlib
def _nb_resolve_file_path():
    if '__file__' not in globals():
        _cwd = _nb_pathlib.Path.cwd().resolve()
        for _candidate in (_cwd, *_cwd.parents):
            _potential = _candidate / '09-practical-projects/03_自然语言处理项目/03_Transformer命名实体识别_中级/src/evaluate.py'
            if _potential.exists():
                globals()['__file__'] = str(_potential)
                return
        globals()['__file__'] = str((_cwd / '09-practical-projects/03_自然语言处理项目/03_Transformer命名实体识别_中级/src/evaluate.py').resolve())
_nb_resolve_file_path()
del _nb_pathlib


In [None]:

import sys
import argparse
from pathlib import Path
import numpy as np
from tensorflow import keras
import tensorflow_addons as tfa
from sklearn.metrics import classification_report

project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from src.data import prepare_ner_data

In [None]:


def parse_args():
    parser = argparse.ArgumentParser(description='评估NER模型')
    parser.add_argument('--model_path', type=str, required=True)
    parser.add_argument('--processor_path', type=str, required=True)
    parser.add_argument('--test_path', type=str, default='data/test.txt')
    return parser.parse_args()

In [None]:


def main():
    args = parse_args()
    print("="*60)
    print("Transformer NER - 模型评估")
    print("="*60)

    # 加载处理器
    import pickle
    with open(args.processor_path, 'rb') as f:
        processor_data = pickle.load(f)
    print(f"✓ 处理器已加载")

    # 加载数据
    from src.data import NERDataProcessor
    processor = NERDataProcessor()
    processor.load_processor(args.processor_path)

    test_sentences, test_tags = processor.load_conll_data(args.test_path)
    if not test_sentences:
        print("✗ 无法加载测试数据")
        return

    test_encoded_sentences, test_encoded_tags = processor.encode_sentences(
        test_sentences, test_tags
    )
    X_test, y_test, mask_test = processor.pad_sequences(
        test_encoded_sentences, test_encoded_tags
    )

    # 加载模型
    model = keras.models.load_model(
        args.model_path,
        custom_objects={'CRF': tfa.layers.CRF}
    )
    print(f"✓ 模型已加载")

    # 预测
    predictions = model.predict([X_test, mask_test])
    if len(predictions.shape) == 3:
        predictions = np.argmax(predictions, axis=-1)

    # 评估
    y_true_flat = []
    y_pred_flat = []

    for i in range(len(y_test)):
        for j in range(len(y_test[i])):
            if mask_test[i][j] == 1:
                y_true_flat.append(processor.idx2tag[y_test[i][j]])
                y_pred_flat.append(processor.idx2tag[predictions[i][j]])

    print("\n分类报告:")
    print(classification_report(y_true_flat, y_pred_flat, digits=4))

    print("\n✓ 评估完成！")

In [None]:


if __name__ == '__main__':
    main()