In [1]:
import os
import pandas as pd
import glob 
# Function to parse a file and extract metrics
def parse_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    metrics = {}
    top_k = ''

    for line in lines:
        line = line.strip()
        if line.startswith("Top"):
            top_k = line.split(' ')[1].strip()
        else:
            metric_name, metric_value = line.split(":")
            metrics[f"{metric_name}@{top_k}"] = metric_value

    return metrics


In [2]:
# Initialize an empty DataFrame to store the data
data = []
modes = ['cold_start', 'add_noise', 'missing']
# Directory path
root_dir = "./results"

# Iterate through the directory structure
for model_name in os.listdir(root_dir):
    model_dir = os.path.join(root_dir, model_name)
    for dataset in os.listdir(model_dir):
        dataset_dir = os.path.join(model_dir, dataset)
        for sub_dir in os.listdir(dataset_dir):
            for mode in modes:
                if mode in sub_dir:
                    exp_mode, percentage = mode, sub_dir.split(mode)[1].split('_')[-1]
                    file_pattern = os.path.join(dataset_dir, sub_dir, "*", "*-performance.txt")
                    # Use glob to list all files that match the pattern
                    matching_files = glob.glob(file_pattern)
                    if len(matching_files)== 0:
                        continue 
                    else:
                        if os.path.isfile(matching_files[0]):
                            metrics = parse_file(matching_files[0])
                            data.append([model_name, dataset, mode, percentage] + [
                                metrics['Hit Ratio@10'], metrics['Precision@10'], metrics['Recall@10'], metrics['NDCG@10'], 
                                metrics['Hit Ratio@20'], metrics['Precision@20'], metrics['Recall@20'], metrics['NDCG@20'], 
                                metrics['Hit Ratio@40'], metrics['Precision@40'], metrics['Recall@40'], metrics['NDCG@40'], 
                            ])

# Create a DataFrame
df = pd.DataFrame(data, columns=["Model", "Dataset", "Mode", "Percentage", "Hit Ratio@10", "Precision@10", "Recall@10", "NDCG@10",
                                  "Hit Ratio@20", "Precision@20", "Recall@20", "NDCG@20", "Hit Ratio@40", "Precision@40", "Recall@40", "NDCG@40"])


In [3]:
df_ = df.sort_values(['Model', 'Dataset', 'Mode', 'Percentage','Hit Ratio@10'])
df_ = df_.drop_duplicates(subset=['Model', 'Dataset', 'Mode', 'Percentage'], keep='last')
df_

Unnamed: 0,Model,Dataset,Mode,Percentage,Hit Ratio@10,Precision@10,Recall@10,NDCG@10,Hit Ratio@20,Precision@20,Recall@20,NDCG@20,Hit Ratio@40,Precision@40,Recall@40,NDCG@40
57,DHCF,lastfm,add_noise,10,0.07873,0.09713,0.07992,0.12786,0.13135,0.08102,0.13227,0.14295,0.20086,0.06195,0.2021,0.17811
59,DHCF,lastfm,add_noise,20,0.08032,0.09904,0.08173,0.13563,0.13001,0.08015,0.13171,0.14615,0.19878,0.06128,0.20087,0.18045
56,DHCF,lastfm,add_noise,30,0.08206,0.10128,0.08336,0.12426,0.13192,0.08141,0.13325,0.13801,0.19782,0.06104,0.19901,0.17122
64,DHCF,lastfm,add_noise,40,0.07378,0.09102,0.07477,0.11586,0.12308,0.07592,0.12433,0.1295,0.18104,0.05583,0.18161,0.15885
60,DHCF,lastfm,add_noise,50,0.06636,0.08183,0.06791,0.10701,0.10885,0.06711,0.11025,0.12029,0.16017,0.04938,0.16184,0.14678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,SHT,ml-1m,missing,10,0.05466,0.2267,0.07781,0.26603,0.09497,0.19692,0.12793,0.25381,0.15831,0.16414,0.20177,0.25633
135,SHT,ml-1m,missing,20,0.04814,0.19965,0.07102,0.22868,0.08484,0.17592,0.1177,0.22272,0.14461,0.14994,0.1894,0.23091
140,SHT,ml-1m,missing,30,0.04353,0.18055,0.06862,0.21061,0.07851,0.1628,0.11629,0.20891,0.13698,0.14203,0.18839,0.22162
139,SHT,ml-1m,missing,40,0.04071,0.16885,0.0642,0.19352,0.07277,0.1509,0.10917,0.19209,0.12734,0.13203,0.1789,0.20537


In [4]:
# Save the DataFrame to a CSV file
df_.to_csv("output_final_4.csv", index=False)