In [1]:
def format_performance_to_table(text):
    """
    将性能文本转换为表格格式
    顺序: Overall | Head item | Tail item | Head user | Tail user
    每个部分包含: H@10 N@10
    """
    
    # 从文本中提取数值
    lines = text.strip().split('\n')
    metrics = {}
    
    for line in lines:
        line = line.strip()
        if 'NDCG@10:' in line:
            
            if 'Short' in line:
                metrics['tail_user_ndcg'] = line.split(': ')[1]
            elif 'Long' in line:
                metrics['head_user_ndcg'] = line.split(': ')[1]
            elif 'Tail' in line:
                metrics['tail_item_ndcg'] = line.split(': ')[1]
            elif 'Popular' in line:
                metrics['head_item_ndcg'] = line.split(': ')[1]
            else:
                metrics['overall_ndcg'] = line.split(': ')[1]
        elif 'HR@10:' in line:
            if 'Short' in line:
                metrics['tail_user_hr'] = line.split(': ')[1]
            elif 'Long' in line:
                metrics['head_user_hr'] = line.split(': ')[1]
            elif 'Tail' in line:
                metrics['tail_item_hr'] = line.split(': ')[1]
            elif 'Popular' in line:
                metrics['head_item_hr'] = line.split(': ')[1]
            else:
                metrics['overall_hr'] = line.split(': ')[1]
    
    order = [
        ('overall_hr', 'overall_ndcg'),           # Overall
        ('head_item_hr', 'head_item_ndcg'),       # Head item
        ('tail_item_hr', 'tail_item_ndcg'),       # Tail item
        ('head_user_hr', 'head_user_ndcg'),       # Head user
        ('tail_user_hr', 'tail_user_ndcg')        # Tail user
    ]
    
    result_parts = []
    for hr_key, ndcg_key in order:
        if hr_key in metrics and ndcg_key in metrics:
            result_parts.append(f"{metrics[hr_key]}\t\t{metrics[ndcg_key]}")
    
    return '\t\t'.join(result_parts)

In [8]:
# 测试函数
text = """
Overall Performance:
	 NDCG@10: 0.18650
	 HR@10: 0.31108
User Group Performance:
	 Short NDCG@10: 0.19169
	 Short HR@10: 0.31078
	 Long NDCG@10: 0.17829
	 Long HR@10: 0.31156
Item Group Performance:
	 Tail NDCG@10: 0.05812
	 Tail HR@10: 0.11796
	 Popular NDCG@10: 0.31208
	 Popular HR@10: 0.50000
"""

result = format_performance_to_table(text)
print(result)

0.31108		0.18650		0.50000		0.31208		0.11796		0.05812		0.31156		0.17829		0.31078		0.19169
