In [1]:
import os
import pickle

import numpy as np
import pandas as pd

from experiment_details import ExperimentDetails
from parse_input import parse_xml, parse_itc2007_curriculum_based, parse_itc2007_post_enrolment
from penalty_calc import calculate_total_cost


In [2]:
experiment_folder = 'D:\Desktop\ITC2019\output\itc2019_depth_first_search_test_20240307-232305'

itc2007_track2_path = 'D:\\Desktop\\Datasets\\post'
itc2007_track3_path = 'D:\\Desktop\\Datasets\\curriculum'
itc2019_path = 'D:\\Desktop\\Datasets\\2019'

In [3]:
def get_all_files(path):
    return [os.path.join(dirpath, file) for dirpath, _, filenames in os.walk(path) for file in filenames]    

In [4]:
itc2007_track2_files = get_all_files(itc2007_track2_path)
itc2007_track3_files = get_all_files(itc2007_track3_path)
itc2019_files = get_all_files(itc2019_path)

In [5]:
def get_instance_by_dataset_and_instance_file_name(dataset, instance_file_name):
    if dataset == 'itc2007_track2':
        return next((file for file in itc2007_track2_files if instance_file_name in file), None)
    elif dataset == 'itc2007_track3':
        return next((file for file in itc2007_track3_files if instance_file_name in file), None)
    elif dataset == 'itc2019':
        return next((file for file in itc2019_files if instance_file_name in file), None)
    else:
        return None
    
def parse_problem (dataset,instance_path):
    if dataset == 'itc2007_track2':
        return parse_itc2007_post_enrolment(instance_path)
    elif dataset == 'itc2007_track3':
        return parse_itc2007_curriculum_based(instance_path)
    elif dataset == 'itc2019':
        return parse_xml(instance_path)

In [6]:
analysis_df = pd.DataFrame(list(os.listdir(experiment_folder)),columns=['folder'])

In [7]:
analysis_df['folder path']=analysis_df.apply(lambda x: os.path.join(experiment_folder,x.folder),axis=1) 

analysis_df

Unnamed: 0,folder,folder path
0,agh-fis-spr17,D:\Desktop\ITC2019\output\itc2019_depth_first_...


In [8]:
analysis_df['experiment_details'] = analysis_df.apply(lambda x: pickle.load(open(os.path.join(x['folder path'],'experiment_details.pkl'), 'rb')),axis=1)

analysis_df

Unnamed: 0,folder,folder path,experiment_details
0,agh-fis-spr17,D:\Desktop\ITC2019\output\itc2019_depth_first_...,<experiment_details.ExperimentDetails object a...


In [9]:
analysis_df['instance_name'] = analysis_df.apply(lambda x: x.experiment_details.instance_name,axis=1)

analysis_df

Unnamed: 0,folder,folder path,experiment_details,instance_name
0,agh-fis-spr17,D:\Desktop\ITC2019\output\itc2019_depth_first_...,<experiment_details.ExperimentDetails object a...,agh-fis-spr17


In [10]:
analysis_df['instance_file'] = analysis_df.apply(lambda x: get_instance_by_dataset_and_instance_file_name(x.experiment_details.dataset_name, x.experiment_details.instance_file_name), axis=1)

analysis_df

Unnamed: 0,folder,folder path,experiment_details,instance_name,instance_file
0,agh-fis-spr17,D:\Desktop\ITC2019\output\itc2019_depth_first_...,<experiment_details.ExperimentDetails object a...,agh-fis-spr17,D:\Desktop\Datasets\2019\1.early\agh-fis-spr17...


In [11]:
analysis_df['problem'],analysis_df['problem_stats'] = zip(*analysis_df.apply(lambda x: parse_problem(x.experiment_details.dataset_name, x['instance_file']),axis=1))
analysis_df

Unnamed: 0,folder,folder path,experiment_details,instance_name,instance_file,problem,problem_stats
0,agh-fis-spr17,D:\Desktop\ITC2019\output\itc2019_depth_first_...,<experiment_details.ExperimentDetails object a...,agh-fis-spr17,D:\Desktop\Datasets\2019\1.early\agh-fis-spr17...,<models.input.problem.Problem object at 0x0000...,"{'name': 'agh-fis-spr17', 'student_count': 164..."


In [12]:
analysis_df['success'] = analysis_df.apply(lambda x: x.experiment_details.results_data['success'],axis=1)

analysis_df['operation_count'] = analysis_df.apply(lambda x: x.experiment_details.results_data['operation_count'],axis=1)

analysis_df['backtrack_count'] = analysis_df.apply(lambda x: x.experiment_details.results_data['backtrack_count'],axis=1)

analysis_df['time'] = analysis_df.apply(lambda x: x.experiment_details.results_data['time'],axis=1)

analysis_df['operation_history'] = analysis_df.apply(lambda x: x.experiment_details.results_data['operation_history'],axis=1)

analysis_df

Unnamed: 0,folder,folder path,experiment_details,instance_name,instance_file,problem,problem_stats,success,operation_count,backtrack_count,time,operation_history
0,agh-fis-spr17,D:\Desktop\ITC2019\output\itc2019_depth_first_...,<experiment_details.ExperimentDetails object a...,agh-fis-spr17,D:\Desktop\Datasets\2019\1.early\agh-fis-spr17...,<models.input.problem.Problem object at 0x0000...,"{'name': 'agh-fis-spr17', 'student_count': 164...",False,10,0,11.811445,"[{'current_row': 1, 'time': 1.118422508239746}..."


In [13]:
analysis_df['solution_gene_file'] = (
    analysis_df.apply(lambda x: 
        os.path.join(x['folder path'],'solution gene.npy') if x['success']
        else None,axis=1)
)
                                                      
analysis_df['solution_gene'] = analysis_df.apply(lambda x: 
    np.load(x['solution_gene_file']) if x['success']
    else None,axis=1)

analysis_df['cost'] = analysis_df.apply(lambda x: 
    calculate_total_cost(x['problem'],x['solution_gene']) if x['success']
    else None,axis=1)

analysis_df


Unnamed: 0,folder,folder path,experiment_details,instance_name,instance_file,problem,problem_stats,success,operation_count,backtrack_count,time,operation_history,solution_gene_file,solution_gene,cost
0,agh-fis-spr17,D:\Desktop\ITC2019\output\itc2019_depth_first_...,<experiment_details.ExperimentDetails object a...,agh-fis-spr17,D:\Desktop\Datasets\2019\1.early\agh-fis-spr17...,<models.input.problem.Problem object at 0x0000...,"{'name': 'agh-fis-spr17', 'student_count': 164...",False,10,0,11.811445,"[{'current_row': 1, 'time': 1.118422508239746}...",,,
