In [1]:
import json
import os
import pickle
import re
import time

import numpy as np
import pandas as pd
import yaml
from matplotlib import pyplot as plt
from matplotlib.ticker import PercentFormatter

from parse_input import parse_xml, parse_itc2007_curriculum_based, parse_itc2007_post_enrolment
from penalty_calc import calculate_total_cost_including_student_conflicts
from random_student_sectioning import RandomStudentSectioning
from solution_search import SolutionSearch
from solution_to_xml import generate_xml, output_itc2007_cb


In [2]:

show_graphs = False

output_folder_path = f'output\\.analysis\\{time.strftime("%Y%m%d-%H%M%S")}'
stats_file_path = os.path.join(output_folder_path, 'stats.txt')
os.makedirs(output_folder_path)
stats_file = open(stats_file_path, 'x')
assets_folder_path = os.path.join(output_folder_path, 'assets')
os.makedirs(assets_folder_path)

itc2007_track2_path = 'D:\\Desktop\\Datasets\\post'
itc2007_track3_path = 'D:\\Desktop\\Datasets\\curriculum'
itc2019_path = 'D:\\Desktop\\Datasets\\2019'

In [3]:
def get_all_files(path):
    return [os.path.join(dirpath, file) for dirpath, _, filenames in os.walk(path) for file in filenames]

In [4]:
itc2007_track2_files = get_all_files(itc2007_track2_path)
itc2007_track3_files = get_all_files(itc2007_track3_path)
itc2019_files = get_all_files(itc2019_path)

In [5]:
def get_instance_by_dataset_and_instance_file_name(dataset, instance_file_name):
    if dataset == 'itc2007_track2':
        return next((file for file in itc2007_track2_files if instance_file_name in file), None)
    elif dataset == 'itc2007_track3':
        return next((file for file in itc2007_track3_files if instance_file_name in file), None)
    elif dataset == 'itc2019':
        return next((file for file in itc2019_files if instance_file_name + ".xml" in
                     file.split("\\\\")[-1]), None)
    else:
        return None


def parse_problem(dataset, instance_path):
    if dataset == 'itc2007_track2':
        return parse_itc2007_post_enrolment(instance_path)
    elif dataset == 'itc2007_track3':
        return parse_itc2007_curriculum_based(instance_path)
    elif dataset == 'itc2019':
        return parse_xml(instance_path)


def save_solution(dataset, problem, solution_gene, output_folder, raw_room_ids=None, raw_course_ids_for_classes=None,
                  student_classes=None):
    if dataset == 'itc2007_track2':
        pass
        return os.path.join(output_folder, problem.name + '_solution.xml')
    elif dataset == 'itc2007_track3':
        output_itc2007_cb(problem, solution_gene, raw_room_ids, raw_course_ids_for_classes,
                          os.path.join(output_folder, problem.name + '_solution.ctt'))
        return os.path.join(output_folder, problem.name + '_solution.ctt')
    elif dataset == 'itc2019':
        generate_xml(problem, solution_gene, student_classes,
                     os.path.join(output_folder, problem.name + '_solution.xml'))
        return os.path.join(output_folder, problem.name + '_solution.xml')

In [6]:
def plot_percentage_placed_over_operation(file, operation_history, class_count, title):
    fig, ax = plt.subplots()
    ax.set_title(title)

    y = [i['current_row'] for i in operation_history]
    y_percent = [i / class_count for i in y]

    ax.set_xlabel('Operation')
    ax.set_ylabel('Classes placed (%)')

    ax.yaxis.set_major_formatter(PercentFormatter(1))

    ax.plot(y_percent)
    if show_graphs:
        plt.show()

    if file:
        fig.savefig(file + '.png')

    plt.close(fig)

In [7]:
def read_experiment_stats(experiment_folder):
    analysis_df = pd.DataFrame(list(os.listdir(experiment_folder)), columns=['folder'])

    analysis_df['folder path'] = analysis_df.apply(lambda x: os.path.join(experiment_folder, x.folder), axis=1)

    analysis_df['experiment_details'] = analysis_df.apply(
        lambda x: pickle.load(open(os.path.join(x['folder path'], 'experiment_details.pkl'), 'rb')), axis=1)

    analysis_df['instance_name'] = analysis_df.apply(lambda x: x.experiment_details.instance_name, axis=1)

    analysis_df['instance_file'] = analysis_df.apply(
        lambda x: get_instance_by_dataset_and_instance_file_name(x.experiment_details.dataset_name,
                                                                 x.experiment_details.instance_file_name), axis=1)
    if analysis_df['experiment_details'][0].dataset_name == 'itc2007_track3':
        analysis_df['problem'], analysis_df['problem_stats'], analysis_df['raw_room_ids'], analysis_df[
            'raw_course_ids_for_classes'] = zip(
            *analysis_df.apply(lambda x: parse_problem(x.experiment_details.dataset_name, x['instance_file']), axis=1))
    else:
        analysis_df['problem'], analysis_df['problem_stats'] = zip(
            *analysis_df.apply(lambda x: parse_problem(x.experiment_details.dataset_name, x['instance_file']), axis=1))

    if 'depth_first_params' in analysis_df['experiment_details'][0].experiment_data or \
            'mac_depth_first_params' in analysis_df['experiment_details'][0].experiment_data:
        analysis_df['dfs_results'] = analysis_df.apply(
            lambda x: x.experiment_details.results_data['depth_first_search_results'], axis=1)

        analysis_df['dfs_success'] = analysis_df.apply(lambda x: x.dfs_results['success'], axis=1)

        analysis_df['dfs_operation_count'] = analysis_df.apply(lambda x: x.dfs_results['operation_count'],
                                                               axis=1)

        analysis_df['dfs_backtrack_count'] = analysis_df.apply(lambda x: x.dfs_results['backtrack_count'],
                                                               axis=1)

        analysis_df['dfs_time'] = analysis_df.apply(lambda x: x.dfs_results['time'],
                                                    axis=1)

        analysis_df['dfs_operation_history'] = analysis_df.apply(
            lambda x: x.dfs_results['operation_history'], axis=1)

    if 'ac3_params' in analysis_df['experiment_details'][0].experiment_data and \
            analysis_df['experiment_details'][0].experiment_data['ac3_params'] is not None:
        analysis_df['ac3_results'] = analysis_df.apply(
            lambda x: x.experiment_details.results_data['ac3_results'], axis=1)

        analysis_df['ac3_success'] = analysis_df.apply(lambda x: x.ac3_results['ac3_return_value'], axis=1)

        analysis_df['ac3_time'] = analysis_df.apply(lambda x: x.ac3_results['ac3_total_time'],
                                                    axis=1)

        analysis_df['ac3_init_time'] = analysis_df.apply(lambda x: x.ac3_results['ac3_init_time'],
                                                         axis=1)

        analysis_df['ac3_apply_time'] = analysis_df.apply(lambda x: x.ac3_results['ac3_apply_time'],
                                                          axis=1)

        # must recalculate options opened before ac3 because data was incorrectly saved when running the experiments

        def count_options_opened_before_ac3(problem):
            search = SolutionSearch(problem)
            return np.count_nonzero(search.decision_table == 0)

        analysis_df['ac3_fixed_options_open_before_ac3'] = analysis_df.apply(
            lambda x: count_options_opened_before_ac3(x['problem']),
            axis=1)

        analysis_df['ac3_fixed_options_closed_by_ac3'] = analysis_df[
                                                             'ac3_fixed_options_open_before_ac3'] - analysis_df.ac3_results.apply(
            lambda x: x['options_open_after_ac3'])

    analysis_df['solution_gene_file'] = (
        analysis_df.apply(lambda x:
                          os.path.join(x['folder path'], 'solution gene after depth first search.npy') if x[
                              'dfs_success']
                          else None, axis=1)
    )

    analysis_df['solution_gene'] = analysis_df.apply(lambda x:
                                                     np.load(x['solution_gene_file']) if x['dfs_success']
                                                     else None, axis=1)

    if analysis_df['experiment_details'][0].dataset_name == 'itc2019':
        analysis_df['student_classes'] = analysis_df.apply(lambda x: RandomStudentSectioning(x['problem']).apply(),
                                                           axis=1)

    analysis_df['cost'] = analysis_df.apply(lambda x:
                                            calculate_total_cost_including_student_conflicts(x['problem'],
                                                                                             x['solution_gene'],
                                                                                             x.get('student_classes',
                                                                                                   {})) if x[
                                                'dfs_success']
                                            else None, axis=1)

    analysis_df['class_count'] = analysis_df.apply(lambda x: len(x['problem'].classes), axis=1)

    _ = analysis_df.apply(
        lambda x: plot_percentage_placed_over_operation(
            os.path.join(x['folder path'], "class_placed_percent_over_operation_graph"),
            x['dfs_operation_history'],
            x['class_count'],
            x['experiment_details'].name + " " + x['experiment_details'].dataset_name + ' - ' + x['instance_name'])
        , axis=1
    )

    analysis_df['solution_path'] = analysis_df.apply(
        lambda x: save_solution(x['experiment_details'].dataset_name, x['problem'], x['solution_gene'],
                                x['folder path'], x.get('raw_room_ids', None),
                                x.get('raw_course_ids_for_classes', None), x.get('student_classes', None))
        if x['dfs_success'] else None, axis=1)

    #remove problem column
    analysis_df.drop(columns=['problem'], inplace=True)

    return analysis_df

In [8]:
itc07t2_dfs_res = read_experiment_stats(
    "D:\Desktop\done tests\Depth_first_search with 10k max operations_itc2007_track2_20240320-001401")

itc07t2_ac3_res = read_experiment_stats(
    "D:\Desktop\done tests\AC3 and depth_first_search with 10k max operations_itc2007_track2_20240321-171030")

itc07t2_dfs_random_res = read_experiment_stats(
    "D:\Desktop\done tests\Depth_first_search (random option) with 10k max operations_itc2007_track2_20240401-225642")

itc07t2_mac_res = read_experiment_stats(
    "D:\Desktop\done tests\MAC Depth_first_search with 5k max operations_itc2007_track2_20240413-043034")

itc07t2_mac_random_Res = read_experiment_stats(
    "D:\Desktop\done tests\MAC Depth_first_search (random options) with 5k max backtracks_itc2007_track2_20240413-135711")

In [9]:
itc07_t3_dfs_res = read_experiment_stats(
    "D:\Desktop\done tests\Depth_first_search with 10k max operations_itc2007_track3_20240320-003204")

itc07t3_ac3_res = read_experiment_stats(
    "D:\Desktop\done tests\AC3 and depth_first_search with 10k max operations_itc2007_track3_20240321-201025")

itc07t3_dfs_random_res = read_experiment_stats(
    "D:\Desktop\done tests\Depth_first_search (random option) with 10k max operations_itc2007_track3_20240402-003252")

itc07t3_mac_res = read_experiment_stats(
    "D:\Desktop\done tests\MAC Depth_first_search with 5k max operations_itc2007_track3_20240413-093047")

itc07t3_mac_random_res = read_experiment_stats(
    "D:\Desktop\done tests\MAC Depth_first_search (random options) with 5k max backtracks_itc2007_track3_20240414-052518")


In [10]:
itc19_dfs_res = read_experiment_stats(
    "D:\Desktop\done tests\Depth_first_search with 10k max operations_itc2019_20240320-003405")

itc19_dfs_random_res = read_experiment_stats(
    "D:\Desktop\done tests\Depth_first_search (random option) with 10k max operations_itc2019_20240402-004018")

In [11]:
def count_successes(df):
    return df['dfs_success'].sum()

In [12]:
stats_file.write(
    f"successes for itc2007 track 2 dfs: {count_successes(itc07t2_dfs_res)} / {len(itc07t2_dfs_res)} = {(count_successes(itc07t2_dfs_res) / len(itc07t2_dfs_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 2 ac3: {count_successes(itc07t2_ac3_res)} / {len(itc07t2_ac3_res)} = {(count_successes(itc07t2_ac3_res) / len(itc07t2_ac3_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 2 dfs random: {count_successes(itc07t2_dfs_random_res)} / {len(itc07t2_dfs_random_res)} = {(count_successes(itc07t2_dfs_random_res) / len(itc07t2_dfs_random_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 2 mac: {count_successes(itc07t2_mac_res)} / {len(itc07t2_mac_res)} = {(count_successes(itc07t2_mac_res) / len(itc07t2_mac_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 2 mac random: {count_successes(itc07t2_mac_random_Res)} / {len(itc07t2_mac_random_Res)} = {(count_successes(itc07t2_mac_random_Res) / len(itc07t2_mac_random_Res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 3 dfs: {count_successes(itc07_t3_dfs_res)} / {len(itc07_t3_dfs_res)} = {(count_successes(itc07_t3_dfs_res) / len(itc07_t3_dfs_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 3 ac3: {count_successes(itc07t3_ac3_res)} / {len(itc07t3_ac3_res)} = {(count_successes(itc07t3_ac3_res) / len(itc07t3_ac3_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 3 dfs random: {count_successes(itc07t3_dfs_random_res)} / {len(itc07t3_dfs_random_res)} = {(count_successes(itc07t3_dfs_random_res) / len(itc07t3_dfs_random_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 3 mac: {count_successes(itc07t3_mac_res)} / {len(itc07t3_mac_res)} = {(count_successes(itc07t3_mac_res) / len(itc07t3_mac_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2007 track 3 mac random: {count_successes(itc07t3_mac_random_res)} / {len(itc07t3_mac_random_res)} = {(count_successes(itc07t3_mac_random_res) / len(itc07t3_mac_random_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2019 dfs: {count_successes(itc19_dfs_res)} / {len(itc19_dfs_res)} = {(count_successes(itc19_dfs_res) / len(itc19_dfs_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write(
    f"successes for itc2019 dfs random: {count_successes(itc19_dfs_random_res)} / {len(itc19_dfs_random_res)} = {(count_successes(itc19_dfs_random_res) / len(itc19_dfs_random_res) * 100):.2f}%")
stats_file.write('\n')

stats_file.write('\n')
stats_file.write('\n')
stats_file.flush()
stats_file.close()

with open(stats_file_path, 'r') as f:
    print(f.read())

successes for itc2007 track 2 dfs: 1 / 24 = 4.17%
successes for itc2007 track 2 ac3: 1 / 24 = 4.17%
successes for itc2007 track 2 dfs random: 4 / 120 = 3.33%
successes for itc2007 track 2 mac: 1 / 24 = 4.17%
successes for itc2007 track 2 mac random: 3 / 120 = 2.50%
successes for itc2007 track 3 dfs: 18 / 21 = 85.71%
successes for itc2007 track 3 ac3: 18 / 21 = 85.71%
successes for itc2007 track 3 dfs random: 95 / 105 = 90.48%
successes for itc2007 track 3 mac: 18 / 21 = 85.71%
successes for itc2007 track 3 mac random: 99 / 105 = 94.29%
successes for itc2019 dfs: 12 / 30 = 40.00%
successes for itc2019 dfs random: 58 / 150 = 38.67%



In [13]:
it07t2_dfs_succeeded = itc07t2_dfs_res.loc[itc07t2_dfs_res['dfs_success']]
it07t2_ac3_succeeded = itc07t2_ac3_res.loc[itc07t2_ac3_res['dfs_success']]
it07t2_mac_succeeded = itc07t2_mac_res.loc[itc07t2_mac_res['dfs_success']]
it07t3_dfs_succeeded = itc07_t3_dfs_res.loc[itc07_t3_dfs_res['dfs_success']]
it07t3_ac3_succeeded = itc07t3_ac3_res.loc[itc07t3_ac3_res['dfs_success']]
it07t3_mac_succeeded = itc07t3_mac_res.loc[itc07t3_mac_res['dfs_success']]
it19_dfs_succeeded = itc19_dfs_res.loc[itc19_dfs_res['dfs_success']]

In [14]:
def plot_time_taken_successful_dfs_instances(df, title, file):
    fig, ax = plt.subplots()
    ax.set_title(title)

    bars = plt.bar(df['instance_name'], df['dfs_time'])

    #set x labels to vertical
    plt.xticks(rotation=90)

    plt.tight_layout()
    if show_graphs:
        plt.show()

    fig.savefig(os.path.join(assets_folder_path, file + '.png'))

    plt.close(fig)

In [15]:
def plot_time_taken_successful_mac_instances(df, title, file):
    fig, ax = plt.subplots()
    ax.set_title(title)

    bars = plt.bar(df['instance_name'], df['dfs_time'])

    #set x labels to vertical
    plt.xticks(rotation=90)

    plt.tight_layout()
    if show_graphs:
        plt.show()

    fig.savefig(os.path.join(assets_folder_path, file + '.png'))

    plt.close(fig)

In [16]:
def plot_time_taken_successful_ac3_instances(df, title, file):
    fig, ax = plt.subplots()
    ax.set_title(title)

    bars = plt.bar(df['instance_name'], df['dfs_time'], bottom=df['ac3_time'])
    bars_ac3 = plt.bar(df['instance_name'], df['ac3_time'])

    plt.legend(['ac3 time', 'dfs time'])

    #set x labels to vertical
    plt.xticks(rotation=90)

    plt.tight_layout()
    if show_graphs:
        plt.show()

    fig.savefig(os.path.join(assets_folder_path, file + '.png'))

    plt.close(fig)

In [17]:
plot_time_taken_successful_dfs_instances(it07t2_dfs_succeeded,
                                         'Time taken for successful instances\nin itc2007 track 2 dfs',
                                         'itc2007_track2_dfs_time')

plot_time_taken_successful_dfs_instances(it07t3_dfs_succeeded,
                                         'Time taken for successful instances\nin itc2007 track 3 dfs',
                                         'itc2007_track3_dfs_time')

plot_time_taken_successful_dfs_instances(it19_dfs_succeeded, 'Time taken for successful instances\nin itc2019 dfs',
                                         'itc2019_dfs_time')

In [18]:
plot_time_taken_successful_ac3_instances(it07t2_ac3_succeeded,
                                         'Time taken for successful instances\nin itc2007 track 2 dfs vs ac3',
                                         'itc2007_track2_dfs_vs_ac3_time')

plot_time_taken_successful_ac3_instances(it07t3_ac3_succeeded,
                                         'Time taken for successful instances\nin itc2007 track 3 dfs vs ac3',
                                         'itc2007_track3_dfs_vs_ac3_time')

In [19]:
plot_time_taken_successful_mac_instances(it07t2_mac_succeeded,
                                         'Time taken for successful instances\nin itc2007 track 2 mac',
                                         'itc2007_track2_mac_time')

plot_time_taken_successful_mac_instances(it07t3_mac_succeeded,
                                            'Time taken for successful instances\nin itc2007 track 3 mac',
                                            'itc2007_track3_mac_time')

In [20]:
def plot_options_closed_by_ac3(df, title, file):
    fig, ax = plt.subplots()
    ax.set_title(title)

    color_list = ['green' if success else 'red' for success in df['dfs_success']]

    bars = plt.bar(df['instance_name'],
                   df['ac3_fixed_options_closed_by_ac3'] / df['ac3_fixed_options_open_before_ac3'] * 100,
                   color=color_list)

    #set x labels to vertical
    plt.xticks(rotation=90)

    plt.tight_layout()
    if show_graphs:
        plt.show()

    fig.savefig(os.path.join(assets_folder_path, file + '.png'))

    plt.close(fig)


plot_options_closed_by_ac3(itc07t2_ac3_res, '% of options closed by ac3\nin itc2007 track 2 ac3',
                           'itc2007_track2_ac3_options_closed')

plot_options_closed_by_ac3(itc07t3_ac3_res, '% of options closed by ac3\nin itc2007 track 3 ac3',
                           'itc2007_track3_ac3_options_closed')


In [21]:
itc19_dfs_res

Unnamed: 0,folder,folder path,experiment_details,instance_name,instance_file,problem_stats,dfs_results,dfs_success,dfs_operation_count,dfs_backtrack_count,dfs_time,dfs_operation_history,solution_gene_file,solution_gene,student_classes,cost,class_count,solution_path
0,agh-fal17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,agh-fal17,D:\Desktop\Datasets\2019\3.late\agh-fal17.xml,"{'name': 'agh-fal17', 'student_count': 6925, '...","{'success': False, 'operation_count': 10000, '...",False,10000,4711,3150.146189,"[{'current_row': 1, 'time': 0.5409755706787109...",,,"{1: [5081], 2: [5081], 3: [5081], 4: [5081], 5...",,5081,
1,agh-fis-spr17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,agh-fis-spr17,D:\Desktop\Datasets\2019\1.early\agh-fis-spr17...,"{'name': 'agh-fis-spr17', 'student_count': 164...","{'success': True, 'operation_count': 1403, 'ba...",True,1403,82,568.491562,"[{'current_row': 1, 'time': 0.506655216217041}...",D:\Desktop\done tests\Depth_first_search with ...,"[[1, 165], [-1, 0], [-1, 0], [-1, 0], [-1, 0],...","{1: [837, 838, 840, 839, 841, 843, 845, 844, 8...","(0, 50280)",1239,D:\Desktop\done tests\Depth_first_search with ...
2,agh-ggis-spr17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,agh-ggis-spr17,D:\Desktop\Datasets\2019\1.early\agh-ggis-spr1...,"{'name': 'agh-ggis-spr17', 'student_count': 21...","{'success': False, 'operation_count': 10000, '...",False,10000,4788,237.673114,"[{'current_row': 1, 'time': 0.0379271507263183...",,,"{1: [201, 202, 205, 190], 2: [201, 202, 205, 1...",,1852,
3,agh-ggos-spr17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,agh-ggos-spr17,D:\Desktop\Datasets\2019\2.middle\agh-ggos-spr...,"{'name': 'agh-ggos-spr17', 'student_count': 22...","{'success': False, 'operation_count': 10000, '...",False,10000,4761,1011.31349,"[{'current_row': 1, 'time': 0.1105084419250488...",,,"{1: [602, 580], 2: [602, 580], 3: [602, 580], ...",,1144,
4,agh-h-spr17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,agh-h-spr17,D:\Desktop\Datasets\2019\2.middle\agh-h-spr17.xml,"{'name': 'agh-h-spr17', 'student_count': 1988,...","{'success': False, 'operation_count': 10000, '...",False,10000,4945,1813.728514,"[{'current_row': 1, 'time': 0.175018310546875}...",,,"{1: [3], 2: [3], 3: [3], 4: [3], 5: [3], 6: [3...",,460,
5,bet-fal17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,bet-fal17,D:\Desktop\Datasets\2019\1.early\bet-fal17.xml,"{'name': 'bet-fal17', 'student_count': 3018, '...","{'success': True, 'operation_count': 7569, 'ba...",True,7569,3293,333.179137,"[{'current_row': 1, 'time': 0.0280003547668457...",D:\Desktop\done tests\Depth_first_search with ...,"[[-1, 0], [30, 2], [31, 5], [28, 10], [29, 5],...","{1: [982, 983, 981], 2: [895, 285, 286, 410, 4...","(0, 409830)",983,D:\Desktop\done tests\Depth_first_search with ...
6,bet-spr18,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,bet-spr18,D:\Desktop\Datasets\2019\3.late\bet-spr18.xml,"{'name': 'bet-spr18', 'student_count': 2921, '...","{'success': False, 'operation_count': 10000, '...",False,10000,4910,207.215608,"[{'current_row': 1, 'time': 0.0299990177154541...",,,"{1: [393, 396, 437, 439], 2: [395, 398, 437, 4...",,1083,
7,iku-fal17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,iku-fal17,D:\Desktop\Datasets\2019\1.early\iku-fal17.xml,"{'name': 'iku-fal17', 'student_count': 0, 'avg...","{'success': False, 'operation_count': 10000, '...",False,10000,4388,1189.769057,"[{'current_row': 1, 'time': 0.1750149726867675...",,,{},,2641,
8,iku-spr18,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,iku-spr18,D:\Desktop\Datasets\2019\3.late\iku-spr18.xml,"{'name': 'iku-spr18', 'student_count': 0, 'avg...","{'success': False, 'operation_count': 10000, '...",False,10000,4464,1532.25177,"[{'current_row': 1, 'time': 0.2000176906585693...",,,{},,2782,
9,lums-fal17,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,lums-fal17,D:\Desktop\Datasets\2019\3.late\lums-fal17.xml,"{'name': 'lums-fal17', 'student_count': 0, 'av...","{'success': True, 'operation_count': 502, 'bac...",True,502,0,16.667898,"[{'current_row': 1, 'time': 0.0975069999694824...",D:\Desktop\done tests\Depth_first_search with ...,"[[5, 23], [5, 17], [5, 33], [13, 7], [6, 17], ...",{},"(0, 2886)",502,D:\Desktop\done tests\Depth_first_search with ...


In [22]:
itc07_t3_dfs_res

Unnamed: 0,folder,folder path,experiment_details,instance_name,instance_file,problem_stats,raw_room_ids,raw_course_ids_for_classes,dfs_results,dfs_success,dfs_operation_count,dfs_backtrack_count,dfs_time,dfs_operation_history,solution_gene_file,solution_gene,cost,class_count,solution_path
0,comp01,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp01,D:\Desktop\Datasets\curriculum\comp01.ctt,"{'name': 'Fis0506-1', 'course_count': 30, 'roo...","[B, C, E, F, G, S]","[c0001, c0001, c0001, c0001, c0001, c0001, c00...","{'success': True, 'operation_count': 160, 'bac...",True,160,0,0.354529,"[{'current_row': 1, 'time': 0.0050010681152343...",D:\Desktop\done tests\Depth_first_search with ...,"[[0, 18], [0, 19], [0, 20], [0, 21], [0, 22], ...","(0, 1095)",160,D:\Desktop\done tests\Depth_first_search with ...
1,comp02,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp02,D:\Desktop\Datasets\curriculum\comp02.ctt,"{'name': 'Ing0203-2', 'course_count': 82, 'roo...","[36, 37, 38, 31, 27, B, D, E, F, G, A, L, 50, ...","[c0131, c0131, c0131, c0211, c0211, c0211, c00...","{'success': False, 'operation_count': 10000, '...",False,10000,4953,30.177686,"[{'current_row': 1, 'time': 0.0080001354217529...",,,,283,
2,comp03,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp03,D:\Desktop\Datasets\curriculum\comp03.ctt,"{'name': 'Ing0304-1', 'course_count': 72, 'roo...","[36, 37, 38, 31, 27, B, D, E, F, G, A, L, 50, ...","[ChiGenAn, ChiGenAn, ChiGenAn, GeoAn, GeoAn, G...","{'success': True, 'operation_count': 251, 'bac...",True,251,0,1.021645,"[{'current_row': 1, 'time': 0.0079996585845947...",D:\Desktop\done tests\Depth_first_search with ...,"[[7, 6], [7, 7], [7, 5], [5, 2], [5, 17], [6, ...","(0, 7260)",251,D:\Desktop\done tests\Depth_first_search with ...
3,comp04,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp04,D:\Desktop\Datasets\curriculum\comp04.ctt,"{'name': 'Ing0405-3', 'course_count': 79, 'roo...","[25, 36, 37, 38, 31, 27, 51, B, D, F, G, A, L,...","[c0012, c0012, c0012, c0013, c0013, c0013, c00...","{'success': True, 'operation_count': 286, 'bac...",True,286,0,1.401608,"[{'current_row': 1, 'time': 0.0089998245239257...",D:\Desktop\done tests\Depth_first_search with ...,"[[0, 15], [0, 17], [1, 2], [0, 12], [0, 13], [...","(0, 4835)",286,D:\Desktop\done tests\Depth_first_search with ...
4,comp05,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp05,D:\Desktop\Datasets\curriculum\comp05.ctt,"{'name': 'Let0405-1', 'course_count': 54, 'roo...","[10, 14, 15, C1, L, M, O, B, A]","[LinTed1, AlfInf, LetIta1, LetIta1, LetIta1, F...","{'success': False, 'operation_count': 10000, '...",False,10000,4993,34.762075,"[{'current_row': 1, 'time': 0.0070049762725830...",,,,152,
5,comp06,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp06,D:\Desktop\Datasets\curriculum\comp06.ctt,"{'name': 'Ing0506-1', 'course_count': 108, 'ro...","[25, 36, 37, 38, 31, 27, 51, B, D, F, G, A, L,...","[c0006, c0006, c0006, c0011, c0011, c0011, c00...","{'success': True, 'operation_count': 361, 'bac...",True,361,0,2.211099,"[{'current_row': 1, 'time': 0.0099995136260986...",D:\Desktop\done tests\Depth_first_search with ...,"[[3, 13], [3, 14], [4, 0], [7, 16], [7, 17], [...","(0, 7853)",361,D:\Desktop\done tests\Depth_first_search with ...
6,comp07,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp07,D:\Desktop\Datasets\curriculum\comp07.ctt,"{'name': 'Ing0607-2', 'course_count': 131, 'ro...","[25, 36, 37, 38, 34, 27, 51, B, D, F, G, A, L,...","[c0007, c0007, c0007, c0009, c0009, c0009, c00...","{'success': True, 'operation_count': 434, 'bac...",True,434,0,3.331126,"[{'current_row': 1, 'time': 0.0159997940063476...",D:\Desktop\done tests\Depth_first_search with ...,"[[12, 16], [13, 3], [13, 7], [13, 20], [13, 23...","(0, 7640)",434,D:\Desktop\done tests\Depth_first_search with ...
7,comp08,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp08,D:\Desktop\Datasets\curriculum\comp08.ctt,"{'name': 'Ing0607-3', 'course_count': 86, 'roo...","[25, 36, 37, 38, 34, 27, 51, B, D, F, G, A, 50...","[c0012, c0012, c0012, c0013, c0013, c0013, c00...","{'success': True, 'operation_count': 324, 'bac...",True,324,0,1.765476,"[{'current_row': 1, 'time': 0.0109999179840087...",D:\Desktop\done tests\Depth_first_search with ...,"[[5, 4], [5, 5], [5, 6], [4, 15], [4, 16], [5,...","(0, 6892)",324,D:\Desktop\done tests\Depth_first_search with ...
8,comp09,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp09,D:\Desktop\Datasets\curriculum\comp09.ctt,"{'name': 'Ing0304-3', 'course_count': 76, 'roo...","[25, 36, 37, 38, 31, 27, 51, B, D, E, F, H, A,...","[c0012, c0012, c0012, c0013, c0013, c0013, c00...","{'success': True, 'operation_count': 279, 'bac...",True,279,0,1.359354,"[{'current_row': 1, 'time': 0.0089998245239257...",D:\Desktop\done tests\Depth_first_search with ...,"[[5, 22], [5, 20], [6, 10], [6, 2], [6, 3], [5...","(0, 6052)",279,D:\Desktop\done tests\Depth_first_search with ...
9,comp10,D:\Desktop\done tests\Depth_first_search with ...,<experiment_details.ExperimentDetails object a...,comp10,D:\Desktop\Datasets\curriculum\comp10.ctt,"{'name': 'Ing0405-2', 'course_count': 115, 'ro...","[25, 36, 37, 38, 31, 27, 51, B, D, F, G, A, L,...","[c1001, c1001, c1001, c1022, c1022, c1022, c01...","{'success': True, 'operation_count': 370, 'bac...",True,370,0,2.345539,"[{'current_row': 1, 'time': 0.0110001564025878...",D:\Desktop\done tests\Depth_first_search with ...,"[[5, 2], [5, 0], [5, 1], [10, 12], [10, 23], [...","(0, 6023)",370,D:\Desktop\done tests\Depth_first_search with ...


In [23]:
# for each instance, pass through the validator and check if the solution is valid by compiling and running the required cpp file


def validate_itc2007_track3(problem_path, solution_path, results_path):
    import subprocess

    validator_exe = 'D:\\Desktop\\itc validators\\2007cb\\x64\\Release\\2007cb.exe'
    command = [validator_exe, problem_path, solution_path]

    process = subprocess.run(command, capture_output=True)

    output_data = process.stdout

    with open(results_path, 'w') as f:
        f.write(output_data.decode())

    # Check the return code
    if process.returncode != 0:
        print(f"Command failed with exit code {process.returncode}")

    output_string = output_data.decode()

    hard_lines = re.findall(r'\(hard\) : (\d+)', output_string)
    hard_constraint_violations = np.sum([int(line) for line in hard_lines])

    soft_constraint_violations = int(re.search(r'Total Cost = (\d+)', output_string).group(1))

    return hard_constraint_violations, soft_constraint_violations


it07t3_dfs_succeeded.loc[:, 'validator_cost'] = it07t3_dfs_succeeded.apply(
    lambda x: validate_itc2007_track3(x['instance_file'], x['solution_path'],
                                      os.path.join(x['folder path'], 'validation_results.txt')), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  it07t3_dfs_succeeded.loc[:, 'validator_cost'] = it07t3_dfs_succeeded.apply(


In [24]:
#read yaml file


def validate_itc2019(problem_path, solution_path, results_path):
    # send an api request similar to the following
    #curl \
    #-u email:password \
    #-H "Content-Type:text/xml;charset=UTF-8" \
    #-d @solution.xml \
    #https://www.itc2019.org/itc2019-validator

    with open('itc2019creds.yaml', 'r') as file:
        credentials = yaml.safe_load(file)

    import requests

    with open(solution_path, 'rb') as f:
        solution_data = f.read()

    url = 'https://www.itc2019.org/itc2019-validator'

    headers = {
        'Content-Type': 'text/xml;charset=UTF-8'
    }

    response = requests.post(url, headers=headers, data=solution_data,
                             auth=(credentials['email'], credentials['password']))

    with open(results_path, 'w') as f:
        f.write(response.text)

    data = json.loads(response.text)

    result = data.get('result')
    soft = data.get('totalCost', {}).get('value', -1)

    hard = 0 if result == 'OK' else 999999999

    return hard, soft


it19_dfs_succeeded['validator_cost'] = it19_dfs_succeeded.apply(
    lambda x: validate_itc2019(x['instance_file'], x['solution_path'],
                               os.path.join(x['folder path'], 'validation_results.txt')), axis=1)

it19_dfs_succeeded[['instance_name', 'cost', 'validator_cost']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  it19_dfs_succeeded['validator_cost'] = it19_dfs_succeeded.apply(


Unnamed: 0,instance_name,cost,validator_cost
1,agh-fis-spr17,"(0, 50280)","(0, 50280)"
5,bet-fal17,"(0, 409830)","(0, 409830)"
9,lums-fal17,"(0, 2886)","(0, 2886)"
10,lums-spr18,"(0, 1231)","(0, 1231)"
11,mary-fal18,"(0, 53127)","(0, 53127)"
12,mary-spr17,"(0, 66642)","(0, 66642)"
13,muni-fi-fal17,"(0, 29054)","(0, 29054)"
14,muni-fi-spr16,"(0, 25394)","(0, 25394)"
15,muni-fi-spr17,"(0, 24979)","(0, 24979)"
16,muni-fsps-spr17,"(0, 235104)","(0, 235104)"


In [29]:
itc07_t3_dfs_res[['instance_name','dfs_success','class_count','cost']]

Unnamed: 0,instance_name,dfs_success,class_count,cost
0,comp01,True,160,"(0, 1095)"
1,comp02,False,283,
2,comp03,True,251,"(0, 7260)"
3,comp04,True,286,"(0, 4835)"
4,comp05,False,152,
5,comp06,True,361,"(0, 7853)"
6,comp07,True,434,"(0, 7640)"
7,comp08,True,324,"(0, 6892)"
8,comp09,True,279,"(0, 6052)"
9,comp10,True,370,"(0, 6023)"
