In [1]:
import os
import pandas as pd
import glob 
# Function to parse a file and extract metrics
def parse_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    metrics = {}
    top_k = ''

    for line in lines:
        line = line.strip()
        if line.startswith("Top"):
            top_k = line.split(' ')[1].strip()
        else:
            metric_name, metric_value = line.split(":")
            metrics[f"{metric_name}@{top_k}"] = metric_value

    return metrics


In [2]:
# Initialize an empty DataFrame to store the data
data = []
modes = ['cold_start', 'add_noise', 'missing']
# Directory path
root_dir = "./results"

# Iterate through the directory structure
# for model_name in os.listdir(root_dir):
model_name = 'HGNN'

model_dir = os.path.join(root_dir, model_name)
for dataset in os.listdir(model_dir):
    dataset_dir = os.path.join(model_dir, dataset)
    for sub_dir in os.listdir(dataset_dir):
        for mode in modes:
            if mode in sub_dir:
                exp_mode, percentage = mode, sub_dir.split(mode)[1].split('_')[-1]
                file_pattern = os.path.join(dataset_dir, sub_dir, "*", "*-performance.txt")
                # Use glob to list all files that match the pattern
                matching_files = glob.glob(file_pattern)
                if len(matching_files)== 0:
                    continue 
                else:
                    for file in matching_files:
                        if os.path.isfile(file):
                            metrics = parse_file(file)
                            data.append([model_name, dataset, mode, percentage] + [
                                metrics['Hit Ratio@10'], metrics['Precision@10'], metrics['Recall@10'], metrics['NDCG@10'], 
                                metrics['Hit Ratio@20'], metrics['Precision@20'], metrics['Recall@20'], metrics['NDCG@20'], 
                                metrics['Hit Ratio@40'], metrics['Precision@40'], metrics['Recall@40'], metrics['NDCG@40'], 
                            ])

# Create a DataFrame
df = pd.DataFrame(data, columns=["Model", "Dataset", "Mode", "Percentage", "Hit Ratio@10", "Precision@10", "Recall@10", "NDCG@10",
                                  "Hit Ratio@20", "Precision@20", "Recall@20", "NDCG@20", "Hit Ratio@40", "Precision@40", "Recall@40", "NDCG@40"])


In [3]:
df_ = df.sort_values(['Model', 'Dataset', 'Mode', 'Percentage','Hit Ratio@10'])
df_ = df_.drop_duplicates(subset=['Model', 'Dataset', 'Mode', 'Percentage'], keep='last')
df_

Unnamed: 0,Model,Dataset,Mode,Percentage,Hit Ratio@10,Precision@10,Recall@10,NDCG@10,Hit Ratio@20,Precision@20,Recall@20,NDCG@20,Hit Ratio@40,Precision@40,Recall@40,NDCG@40
0,HGNN,lastfm,missing,10,0.14608,0.18013,0.14858,0.22244,0.22193,0.13682,0.22349,0.23873,0.31686,0.09768,0.31807,0.28628
1,HGNN,lastfm,missing,20,0.12894,0.15906,0.13057,0.19531,0.20254,0.12493,0.20314,0.21343,0.29748,0.09175,0.2966,0.261
2,HGNN,lastfm,missing,30,0.11243,0.1387,0.11311,0.16783,0.18203,0.11228,0.18277,0.18767,0.27546,0.08495,0.27562,0.23483


In [4]:
# Save the DataFrame to a CSV file
df_.to_csv("output_final_4.csv", index=False)