In [None]:
import json  

# 定义 Memory Sizes 和它们对应的失败 Index 列表  
memory_failed_indexes = {  
    50: [10,20,22,34,36,38,39],  # 失败的 Index 列表 for memory_size = 50  
    100: [11,20,22,23,34,36,39],  # 失败的 Index 列表 for memory_size = 100  
    200: [18,20,22,36,39],   # 失败的 Index 列表 for memory_size = 200  
    300: [16,20,22,35,36],    # 失败的 Index 列表 for memory_size = 300  
    400: [20,22,36,39],  # 失败的 Index 列表 for memory_size = 400  
    500: [20,22,23,36,39]   # 失败的 Index 列表 for memory_size = 500  
}  

# 定义 Index 范围  
index_ranges = [range(1, 41)]  # 可以根据需求修改范围  

# 用于存储所有 memory_size 的结果  
all_results = []  

# 遍历每个 memory_size 及其对应的失败列表  
for memory_size, failed_indexes in memory_failed_indexes.items():  
    print(f"Processing memory size: {memory_size}...")  

    # 定义 JSON 文件路径  
    file_path = f'new_result_general_edit_{memory_size}_merged.json'  

    # 用于存储该 memory_size 的结果  
    results = []  

    # 读取 JSON 文件  
    try:  
        with open(file_path, 'r', encoding='utf-8') as file:  
            data = json.load(file)  
    except FileNotFoundError:  
        print(f"File not found: {file_path}, skipping...")  
        continue  

    # 遍历每个 Index 范围  
    for index_range in index_ranges:  
        # 计算当前范围内的失败 Index  
        current_range = set(index_range)  
        current_failed_indexes = sorted(set(failed_indexes) & current_range)  

        # 计算成功的 Index  
        success_indexes = current_range - set(current_failed_indexes)  

        # 用于存储成功 Index 对应的 MemoryId  
        success_memory_ids = set()  

        # 遍历每个 "Index" 并提取 MemoryId  
        for item in data:  
            index = item.get('Index')  
            if index in success_indexes:  
                for memory in item.get('RetrievedMemory', []):  
                    success_memory_ids.add(memory.get('MemoryId'))  

        # 提取 `fixed_` 后的数字并排序  
        sorted_memory_ids = sorted(success_memory_ids, key=lambda x: int(x.split('_')[1]))  

        # 统计总数  
        total_count = len(sorted_memory_ids)  

        # 存储结果  
        results.append({  
            "IndexRange": f"{min(index_range)}-{max(index_range)}",  
            "FailedIndexes": current_failed_indexes,  
            "MemoryIds": sorted_memory_ids,  
            "TotalCount": total_count  
        })  

    # 将该 memory_size 的结果存储到全局结果中  
    all_results.append({  
        "MemorySize": memory_size,  
        "Results": results  
    })  

# 输出所有结果  
for memory_result in all_results:  
    print(f"Memory Size: {memory_result['MemorySize']}")  
    for result in memory_result['Results']:  
        print(f"  Index Range: {result['IndexRange']}")  
        print(f"  Failed Indexes: {result['FailedIndexes']}")  
        print(f"  MemoryIds: {result['MemoryIds']}")  
        print(f"  Total Count: {result['TotalCount']}")  
        print("-" * 50)

In [None]:
import json  
import pandas as pd  

def get_memory_ids(memory_file):  
    with open(memory_file, 'r', encoding='utf-8') as f:  
        memory_data = json.load(f)  
    ids = set()  
    for item in memory_data:  
        id_num = item['Id'].split('_')[1]  
        ids.add(id_num)  
    return ids  

def get_retrieved_ids(retrieval_log_file):  
    with open(retrieval_log_file, 'r', encoding='utf-8') as f:  
        retrieval_data = json.load(f)  
    retrieved_ids = set()  
    for entry in retrieval_data:  
        for info in entry['retrieved_info']:  
            for retrieved in info['retrieved_ids']:  
                id_num = retrieved['memory_id'].split('_')[1]  
                retrieved_ids.add(id_num)  
    return retrieved_ids  

def calculate_retrieval_rate():  
    memory_files = [  
        'memory_50.json',  
        'memory_100.json',   
        'memory_200.json',  
        'memory_300.json',
        'memory_400.json',
        'memory_500.json'  
    ]  
    
    query_sizes = [10, 20, 30, 40, 50]  
    #query_sizes = [30]
    # 创建结果存储字典  
    results = {  
        'Retrieved Count': [],  # 存储检索数量  
        'Total Count': [],     # 存储总数量  
        'Retrieval Rate': []   # 存储检索率  
    }  
    
    # 创建数据框的索引和列名  
    index = [f'Query_{q}' for q in query_sizes]  
    columns = [f'Memory_{m.split("_")[1].split(".")[0]}' for m in memory_files]  
    
    # 创建三个数据框来存储不同的指标  
    retrieved_counts_df = pd.DataFrame(index=index, columns=columns)  
    total_counts_df = pd.DataFrame(index=index, columns=columns)  
    retrieval_rates_df = pd.DataFrame(index=index, columns=columns)  
    
    for memory_file in memory_files:  
        memory_size = memory_file.split('_')[1].split('.')[0]  
        try:  
            memory_ids = get_memory_ids(memory_file)  
            total_count = len(memory_ids)  
            
            for query_size in query_sizes:  
                retrieval_log_file = f'new_retrieve_two_memory{memory_size}_query{query_size}.json'  
                
                try:  
                    retrieved_ids = get_retrieved_ids(retrieval_log_file)  
                    retrieved_count = len(retrieved_ids)  
                    retrieval_rate = (retrieved_count / total_count) * 100  
                    
                    # 填充数据框  
                    retrieved_counts_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = retrieved_count  
                    total_counts_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = total_count  
                    retrieval_rates_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = f'{retrieval_rate:.2f}%'  
                    
                except FileNotFoundError:  
                    retrieved_counts_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = 'N/A'  
                    total_counts_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = total_count  
                    retrieval_rates_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = 'N/A'  
                    
        except FileNotFoundError:  
            for query_size in query_sizes:  
                retrieved_counts_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = 'N/A'  
                total_counts_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = 'N/A'  
                retrieval_rates_df.at[f'Query_{query_size}', f'Memory_{memory_size}'] = 'N/A'  
    
    # 打印结果  
    print("\n检索数量:")  
    print(retrieved_counts_df)  
    print("\n总数量:")  
    print(total_counts_df)  
    print("\n检索率:")  
    print(retrieval_rates_df)  

if __name__ == "__main__":  
    calculate_retrieval_rate()