In [None]:
import os
import pickle
import time

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.ticker import PercentFormatter

from parse_input import parse_xml, parse_itc2007_curriculum_based, parse_itc2007_post_enrolment
from penalty_calc import calculate_total_cost
from solution_search import SolutionSearch


In [None]:
output_folder_path = f'output\\.analysis\\{time.strftime("%Y%m%d-%H%M%S")}'
stats_file_path = os.path.join(output_folder_path, 'stats.txt')
os.makedirs(output_folder_path)
stats_file = open(stats_file_path, 'x')
assets_folder_path= os.path.join(output_folder_path, 'assets')
os.makedirs(assets_folder_path)

itc2007_track2_path = 'D:\\Desktop\\Datasets\\post'
itc2007_track3_path = 'D:\\Desktop\\Datasets\\curriculum'
itc2019_path = 'D:\\Desktop\\Datasets\\2019'

In [None]:
def get_all_files(path):
    return [os.path.join(dirpath, file) for dirpath, _, filenames in os.walk(path) for file in filenames]

In [None]:
itc2007_track2_files = get_all_files(itc2007_track2_path)
itc2007_track3_files = get_all_files(itc2007_track3_path)
itc2019_files = get_all_files(itc2019_path)

In [None]:
def get_instance_by_dataset_and_instance_file_name(dataset, instance_file_name):
    if dataset == 'itc2007_track2':
        return next((file for file in itc2007_track2_files if instance_file_name in file), None)
    elif dataset == 'itc2007_track3':
        return next((file for file in itc2007_track3_files if instance_file_name in file), None)
    elif dataset == 'itc2019':
        return next((file for file in itc2019_files if instance_file_name + ".xml" in
                     file.split("\\\\")[-1]), None)
    else:
        return None


def parse_problem(dataset, instance_path):
    if dataset == 'itc2007_track2':
        return parse_itc2007_post_enrolment(instance_path)
    elif dataset == 'itc2007_track3':
        return parse_itc2007_curriculum_based(instance_path)
    elif dataset == 'itc2019':
        return parse_xml(instance_path)

In [None]:
def plot_percentage_placed_over_operation(file, operation_history, class_count, title):
    fig, ax = plt.subplots()
    ax.set_title(title)

    y = [i['current_row'] for i in operation_history]
    y_percent = [i / class_count for i in y]

    ax.set_xlabel('Operation')
    ax.set_ylabel('Classes placed (%)')

    ax.yaxis.set_major_formatter(PercentFormatter(1))

    ax.plot(y_percent)
    plt.show()

    if file:
        fig.savefig(file + '.png')

In [None]:
def read_experiment_stats(experiment_folder):
    analysis_df = pd.DataFrame(list(os.listdir(experiment_folder)), columns=['folder'])

    analysis_df['folder path'] = analysis_df.apply(lambda x: os.path.join(experiment_folder, x.folder), axis=1)

    analysis_df['experiment_details'] = analysis_df.apply(
        lambda x: pickle.load(open(os.path.join(x['folder path'], 'experiment_details.pkl'), 'rb')), axis=1)

    analysis_df['instance_name'] = analysis_df.apply(lambda x: x.experiment_details.instance_name, axis=1)

    analysis_df['instance_file'] = analysis_df.apply(
        lambda x: get_instance_by_dataset_and_instance_file_name(x.experiment_details.dataset_name,
                                                                 x.experiment_details.instance_file_name), axis=1)

    analysis_df['problem'], analysis_df['problem_stats'] = zip(
        *analysis_df.apply(lambda x: parse_problem(x.experiment_details.dataset_name, x['instance_file']), axis=1))

    if analysis_df['experiment_details'][0].experiment_data['depth_first_params'] is not None:

        analysis_df['dfs_results'] = analysis_df.apply(
        lambda x: x.experiment_details.results_data['depth_first_search_results'], axis=1)

        analysis_df['dfs_success'] = analysis_df.apply(lambda x: x.dfs_results['success'], axis=1)

        analysis_df['dfs_operation_count'] = analysis_df.apply(lambda x: x.dfs_results['operation_count'],
                                                           axis=1)
    
        analysis_df['dfs_backtrack_count'] = analysis_df.apply(lambda x: x.dfs_results['backtrack_count'],
                                                           axis=1)
        
        
        analysis_df['dfs_time'] = analysis_df.apply(lambda x: x.dfs_results['time'],
                                                           axis=1)
        
        analysis_df['dfs_operation_history'] = analysis_df.apply(
        lambda x: x.dfs_results['operation_history'], axis=1)
        
        
    if 'ac3_params' in analysis_df['experiment_details'][0].experiment_data and  analysis_df['experiment_details'][0].experiment_data['ac3_params'] is not None:
            
            analysis_df['ac3_results'] = analysis_df.apply(
            lambda x: x.experiment_details.results_data['ac3_results'], axis=1)
    
            analysis_df['ac3_success'] = analysis_df.apply(lambda x: x.ac3_results['ac3_return_value'], axis=1)
        
            analysis_df['ac3_time'] = analysis_df.apply(lambda x: x.ac3_results['ac3_total_time'],
                                                               axis=1)
        
            analysis_df['ac3_init_time'] = analysis_df.apply(lambda x: x.ac3_results['ac3_init_time'],
                                                               axis=1)
        
            analysis_df['ac3_apply_time'] = analysis_df.apply(lambda x: x.ac3_results['ac3_apply_time'],
                                                                   axis=1)
        
        
            # must recalculate options opened before ac3 because data was incorrectly saved when running the experiments
            
            def count_options_opened_before_ac3(problem):
                search = SolutionSearch(problem)
                return np.count_nonzero(search.decision_table == 0)
            
            analysis_df['ac3_fixed_options_open_before_ac3'] = analysis_df.apply(lambda x:count_options_opened_before_ac3(x['problem']),
                                                                           axis=1)      
                  
            analysis_df['ac3_fixed_options_closed_by_ac3'] = analysis_df['ac3_fixed_options_open_before_ac3'] - analysis_df.ac3_results.apply(lambda x: x['options_open_after_ac3'])

    analysis_df['solution_gene_file'] = (
        analysis_df.apply(lambda x:
                          os.path.join(x['folder path'], 'solution gene after depth first search.npy') if x['dfs_success']
                          else None, axis=1)
    )

    analysis_df['solution_gene'] = analysis_df.apply(lambda x:
                                                     np.load(x['solution_gene_file']) if x['dfs_success']
                                                     else None, axis=1)

    analysis_df['cost'] = analysis_df.apply(lambda x:
                                            calculate_total_cost(x['problem'], x['solution_gene']) if x['dfs_success']
                                            else None, axis=1)

    analysis_df['class_count'] = analysis_df.apply(lambda x: len(x['problem'].classes), axis=1)

    _ = analysis_df.apply(
        lambda x: plot_percentage_placed_over_operation(
            os.path.join(x['folder path'], "class_placed_percent_over_operation_graph"),
            x['dfs_operation_history'],
            x['class_count'],
            x['experiment_details'].name + " " + x['experiment_details'].dataset_name + ' - ' + x['instance_name'])
        , axis=1
    )

    return analysis_df

In [None]:
itc07t2_dfs_res = read_experiment_stats("D:\Desktop\done tests\Depth_first_search with 10k max operations_itc2007_track2_20240320-001401")

itc07t2_ac3_res = read_experiment_stats("D:\Desktop\done tests\AC3 and depth_first_search with 10k max operations_itc2007_track2_20240321-171030")

In [None]:
itc07_t3_dfs_res = read_experiment_stats("D:\Desktop\done tests\Depth_first_search with 10k max operations_itc2007_track3_20240320-003204")

itc07t3_ac3_res = read_experiment_stats("D:\Desktop\done tests\AC3 and depth_first_search with 10k max operations_itc2007_track3_20240321-201025")

In [None]:
itc19_dfs_res = read_experiment_stats("D:\Desktop\done tests\Depth_first_search with 10k max operations_itc2019_20240320-003405")

In [None]:
def count_successes(df):
    return df['dfs_success'].sum()

In [None]:
stats_file.write(f"successes for itc2007 track 2 dfs: {count_successes(itc07t2_dfs_res)} / {len(itc07t2_dfs_res)} = {(count_successes(itc07t2_dfs_res) / len(itc07t2_dfs_res)*100):.2f}%")
stats_file.write('\n')

stats_file.write(f"successes for itc2007 track 2 ac3: {count_successes(itc07t2_ac3_res)} / {len(itc07t2_ac3_res)} = {(count_successes(itc07t2_ac3_res) / len(itc07t2_ac3_res)*100):.2f}%")
stats_file.write('\n')

stats_file.write(f"successes for itc2007 track 3 dfs: {count_successes(itc07_t3_dfs_res)} / {len(itc07_t3_dfs_res)} = {(count_successes(itc07_t3_dfs_res) / len(itc07_t3_dfs_res)*100):.2f}%")
stats_file.write('\n')

stats_file.write(f"successes for itc2007 track 3 ac3: {count_successes(itc07t3_ac3_res)} / {len(itc07t3_ac3_res)} = {(count_successes(itc07t3_ac3_res) / len(itc07t3_ac3_res)*100):.2f}%")
stats_file.write('\n')

stats_file.write(f"successes for itc2019 dfs: {count_successes(itc19_dfs_res)} / {len(itc19_dfs_res)} = {(count_successes(itc19_dfs_res) / len(itc19_dfs_res)*100):.2f}%")
stats_file.write('\n')

print(f"successes for itc2007 track 2 dfs: {count_successes(itc07t2_dfs_res)} / {len(itc07t2_dfs_res)} = {(count_successes(itc07t2_dfs_res) / len(itc07t2_dfs_res)*100):.2f}%")
print(f"successes for itc2007 track 2 ac3: {count_successes(itc07t2_ac3_res)} / {len(itc07t2_ac3_res)} = {(count_successes(itc07t2_ac3_res) / len(itc07t2_ac3_res)*100):.2f}%")

print(f"successes for itc2007 track 3 dfs: {count_successes(itc07_t3_dfs_res)} / {len(itc07_t3_dfs_res)} = {(count_successes(itc07_t3_dfs_res) / len(itc07_t3_dfs_res)*100):.2f}%")
print(f"successes for itc2007 track 3 ac3: {count_successes(itc07t3_ac3_res)} / {len(itc07t3_ac3_res)} = {(count_successes(itc07t3_ac3_res) / len(itc07t3_ac3_res)*100):.2f}%")

print(f"successes for itc2019 dfs: {count_successes(itc19_dfs_res)} / {len(itc19_dfs_res)} = {(count_successes(itc19_dfs_res) / len(itc19_dfs_res)*100):.2f}%")

stats_file.write('\n')
stats_file.write('\n')
stats_file.flush()

In [None]:
it07t2_dfs_succeeded = itc07t2_dfs_res[itc07t2_dfs_res['dfs_success']]
it07t2_ac3_succeeded = itc07t2_ac3_res[itc07t2_ac3_res['dfs_success']]
it07t3_dfs_succeeded = itc07_t3_dfs_res[itc07_t3_dfs_res['dfs_success']]
it07t3_ac3_succeeded = itc07t3_ac3_res[itc07t3_ac3_res['dfs_success']]
it19_dfs_succeeded = itc19_dfs_res[itc19_dfs_res['dfs_success']]

In [None]:
def plot_time_taken_successful_dfs_instances(df, title, file):
    fig, ax = plt.subplots()
    ax.set_title(title)

    bars = plt.bar(df['instance_name'], df['dfs_time'])
    
    #set x labels to vertical
    plt.xticks(rotation=90)
    
    plt.tight_layout()
    plt.show()
    
    fig.savefig(os.path.join(assets_folder_path, file + '.png'))

In [None]:
def plot_time_taken_successful_ac3_instances(df, title, file):
    fig, ax = plt.subplots()
    ax.set_title(title)

    bars = plt.bar(df['instance_name'], df['dfs_time'],bottom=df['ac3_time'])
    bars_ac3 = plt.bar(df['instance_name'], df['ac3_time'])
    
    
    plt.legend(['ac3 time', 'dfs time'])
    
    #set x labels to vertical
    plt.xticks(rotation=90)
    
    plt.tight_layout()
    plt.show()
    
    fig.savefig(os.path.join(assets_folder_path, file + '.png'))

In [None]:
plot_time_taken_successful_dfs_instances(it07t2_dfs_succeeded, 'Time taken for successful instances\nin itc2007 track 2 dfs', 'itc2007_track2_dfs_time')

plot_time_taken_successful_dfs_instances(it07t3_dfs_succeeded, 'Time taken for successful instances\nin itc2007 track 3 dfs', 'itc2007_track3_dfs_time')

plot_time_taken_successful_dfs_instances(it19_dfs_succeeded, 'Time taken for successful instances\nin itc2019 dfs', 'itc2019_dfs_time')

In [None]:
plot_time_taken_successful_ac3_instances( it07t2_ac3_succeeded, 'Time taken for successful instances\nin itc2007 track 2 dfs vs ac3', 'itc2007_track2_dfs_vs_ac3_time')

plot_time_taken_successful_ac3_instances( it07t3_ac3_succeeded, 'Time taken for successful instances\nin itc2007 track 3 dfs vs ac3', 'itc2007_track3_dfs_vs_ac3_time')

In [None]:
def plot_options_closed_by_ac3(df, title, file):
    fig, ax = plt.subplots()
    ax.set_title(title)


    color_list = ['green' if success else 'red' for success in df['dfs_success']]
    
    bars = plt.bar(df['instance_name'], df['ac3_fixed_options_closed_by_ac3']/df['ac3_fixed_options_open_before_ac3']*100,color=color_list)
    
    #set x labels to vertical
    plt.xticks(rotation=90)
    
    plt.tight_layout()
    plt.show()
    
    fig.savefig(os.path.join(assets_folder_path, file + '.png'))

plot_options_closed_by_ac3(itc07t2_ac3_res, '% of options closed by ac3\nin itc2007 track 2 ac3', 'itc2007_track2_ac3_options_closed')

plot_options_closed_by_ac3(itc07t3_ac3_res, '% of options closed by ac3\nin itc2007 track 3 ac3', 'itc2007_track3_ac3_options_closed')

In [None]:
itc19_dfs_res

In [None]:
stats_file

In [None]:
read_experiment_stats("D:\Desktop\ITC2019\output\Depth_first_search with 10k max operations_itc2007_track3_20240331-185204")