In [30]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.style.use('ggplot')

%matplotlib inline
%config InlineBackend.figure_format='retina'

# Data preprocessing

In [2]:
results = pd.read_csv("/home/batman/code/fifteen-puzzle/data/experiments/ann-heuristic-benchmark.csv")
results.shape

(221, 9)

In [3]:
results.head(10)

Unnamed: 0,PROCESS_ID,BOARD_ID,ALGORITHM_NAME,HEURISTIC_NAME,BOARDS_GENERATOR_NAME,RUN_TIME,SOLUTION_COST,EXPANDED_NODES,INITIAL_HEURISTIC_PREDICTION
0,2019-03-18 10:16:19.765190 PID23250,1,A*,PDB[Pat:5],RND,0.921609,49,6696,43.0
1,2019-03-18 10:16:19.772191 PID23252,1,A*,PDB[Pat:5],RND,1.239359,46,9477,40.0
2,2019-03-18 10:16:19.783919 PID23253,1,A*,PDB[Pat:5],RND,1.365097,48,7872,42.0
3,2019-03-18 10:16:19.811900 PID23262,1,A*,PDB[Pat:5],RND,2.115139,38,14756,30.0
4,2019-03-18 10:16:19.759208 PID23243,1,A*,PDB[Pat:5],RND,4.059638,48,18718,38.0
5,2019-03-18 10:16:19.773334 PID23248,1,A*,PDB[Pat:5],RND,4.679295,46,24641,38.0
6,2019-03-18 10:16:19.786292 PID23258,1,A*,PDB[Pat:5],RND,8.056643,49,40082,41.0
7,2019-03-18 10:16:19.780042 PID23254,1,A*,PDB[Pat:5],RND,14.105577,54,60798,44.0
8,2019-03-18 10:16:19.772191 PID23252,1,A*,ANN[Const:-2],RND,3.800127,46,270,48.533722
9,2019-03-18 10:16:19.783919 PID23253,1,A*,ANN[Const:-2],RND,11.210799,48,916,47.230282


In [10]:
results["HEURISTIC_NAME"].unique()

array(['PDB[Pat:5]', 'ANN[Const:-2]', 'ANN[Const:0]'], dtype=object)

In [16]:
pdb_results = results[results["HEURISTIC_NAME"] == "PDB[Pat:5]"]
ann_results = results[results["HEURISTIC_NAME"] == "ANN[Const:0]"]
ann_add_const_results = results[results["HEURISTIC_NAME"] == "ANN[Const:-2]"]

In [18]:
def optimal_cost(row):
    process_id = row["PROCESS_ID"]
    board_id = row["BOARD_ID"]
    
    pdb_row_df = pdb_results[(pdb_results.PROCESS_ID == process_id) & (pdb_results.BOARD_ID == board_id)]
    return pdb_row_df.iloc[0]["SOLUTION_COST"]

In [19]:
results_complete = results.copy()

results_complete["OPTIMAL_SOLUTION_COST"] = results_complete.apply(lambda row: optimal_cost(row), axis=1)
results_complete["SOLUTION_COST_ERROR"] = results_complete["SOLUTION_COST"] - results_complete["OPTIMAL_SOLUTION_COST"]
results_complete["INITIAL_HEURISTIC_PREDICTION_ERROR"] = results_complete["INITIAL_HEURISTIC_PREDICTION"] - results_complete["OPTIMAL_SOLUTION_COST"]

In [20]:
results_complete

Unnamed: 0,PROCESS_ID,BOARD_ID,ALGORITHM_NAME,HEURISTIC_NAME,BOARDS_GENERATOR_NAME,RUN_TIME,SOLUTION_COST,EXPANDED_NODES,INITIAL_HEURISTIC_PREDICTION,OPTIMAL_SOLUTION_COST,SOLUTION_COST_ERROR,INITIAL_HEURISTIC_PREDICTION_ERROR
0,2019-03-18 10:16:19.765190 PID23250,1,A*,PDB[Pat:5],RND,0.921609,49,6696,43.000000,49,0,-6.000000
1,2019-03-18 10:16:19.772191 PID23252,1,A*,PDB[Pat:5],RND,1.239359,46,9477,40.000000,46,0,-6.000000
2,2019-03-18 10:16:19.783919 PID23253,1,A*,PDB[Pat:5],RND,1.365097,48,7872,42.000000,48,0,-6.000000
3,2019-03-18 10:16:19.811900 PID23262,1,A*,PDB[Pat:5],RND,2.115139,38,14756,30.000000,38,0,-8.000000
4,2019-03-18 10:16:19.759208 PID23243,1,A*,PDB[Pat:5],RND,4.059638,48,18718,38.000000,48,0,-10.000000
5,2019-03-18 10:16:19.773334 PID23248,1,A*,PDB[Pat:5],RND,4.679295,46,24641,38.000000,46,0,-8.000000
6,2019-03-18 10:16:19.786292 PID23258,1,A*,PDB[Pat:5],RND,8.056643,49,40082,41.000000,49,0,-8.000000
7,2019-03-18 10:16:19.780042 PID23254,1,A*,PDB[Pat:5],RND,14.105577,54,60798,44.000000,54,0,-10.000000
8,2019-03-18 10:16:19.772191 PID23252,1,A*,ANN[Const:-2],RND,3.800127,46,270,48.533722,46,0,2.533722
9,2019-03-18 10:16:19.783919 PID23253,1,A*,ANN[Const:-2],RND,11.210799,48,916,47.230282,48,0,-0.769718


In [21]:
pdb_results_complete = results_complete[results_complete["HEURISTIC_NAME"] == "PDB[Pat:5]"]
ann_results_complete = results_complete[results_complete["HEURISTIC_NAME"] == "ANN[Const:0]"]
ann_add_const_results_complete = results_complete[results_complete["HEURISTIC_NAME"] == "ANN[Const:-2]"]

# Analysis of results

In [49]:
pdb_results_complete.describe()

Unnamed: 0,BOARD_ID,RUN_TIME,SOLUTION_COST,EXPANDED_NODES,INITIAL_HEURISTIC_PREDICTION,OPTIMAL_SOLUTION_COST,SOLUTION_COST_ERROR,INITIAL_HEURISTIC_PREDICTION_ERROR
count,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0
mean,2.613333,750.622772,51.066667,900775.7,40.373333,51.066667,0.0,-10.693333
std,1.683732,1025.158292,5.03859,3469431.0,4.450245,5.03859,0.0,2.760598
min,1.0,0.921609,38.0,4045.0,30.0,38.0,0.0,-18.0
25%,1.0,70.478209,47.0,44662.0,38.0,47.0,0.0,-12.0
50%,2.0,296.982539,51.0,178066.0,40.0,51.0,0.0,-10.0
75%,3.5,968.634909,54.0,597725.5,43.5,54.0,0.0,-8.0
max,8.0,4642.603182,66.0,26343500.0,50.0,66.0,0.0,-6.0


In [47]:
ann_results_complete.describe()

Unnamed: 0,BOARD_ID,RUN_TIME,SOLUTION_COST,EXPANDED_NODES,INITIAL_HEURISTIC_PREDICTION,OPTIMAL_SOLUTION_COST,SOLUTION_COST_ERROR,INITIAL_HEURISTIC_PREDICTION_ERROR
count,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0
mean,2.589041,347.019031,51.575342,10045.013699,51.183076,50.945205,0.630137,0.23787
std,1.698329,428.108545,5.090617,13556.325285,5.186149,5.0467,1.047575,1.697125
min,1.0,6.725801,38.0,225.0,37.134007,38.0,0.0,-2.941002
25%,1.0,84.897011,48.0,2171.0,47.810585,47.0,0.0,-1.004677
50%,2.0,184.204371,51.0,4781.0,51.274147,51.0,0.0,0.121681
75%,3.0,403.898095,55.0,11956.0,55.203773,54.0,2.0,1.426552
max,8.0,2054.381989,66.0,67573.0,66.748535,66.0,4.0,4.693413


In [48]:
ann_add_const_results_complete.describe()

Unnamed: 0,BOARD_ID,RUN_TIME,SOLUTION_COST,EXPANDED_NODES,INITIAL_HEURISTIC_PREDICTION,OPTIMAL_SOLUTION_COST,SOLUTION_COST_ERROR,INITIAL_HEURISTIC_PREDICTION_ERROR
count,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0
mean,2.589041,336.743119,51.575342,10045.013699,49.183076,50.945205,0.630137,-1.76213
std,1.698329,451.827812,5.090617,13556.325285,5.186149,5.0467,1.047575,1.697125
min,1.0,3.800127,38.0,225.0,35.134007,38.0,0.0,-4.941002
25%,1.0,70.69177,48.0,2171.0,45.810585,47.0,0.0,-3.004677
50%,2.0,158.016346,51.0,4781.0,49.274147,51.0,0.0,-1.878319
75%,3.0,376.345007,55.0,11956.0,53.203773,54.0,2.0,-0.573448
max,8.0,2220.624066,66.0,67573.0,64.748535,66.0,4.0,2.693413


# Unused code

In [22]:
# results_wide = (
#     results[results.ALGORITHM_NAME == 'A*']
#     .drop('ALGORITHM_NAME', 1)
#     .pivot(index='BOARD_ID', columns='HEURISTIC_NAME')
# )

In [46]:
# groups = dict(list(results.groupby(['ALGORITHM_NAME', 'HEURISTIC_NAME'])))
# groups.keys()

dict_keys([('AStarSearch', 'ANNHeuristic[Additive constant: -2]'), ('AStarSearch', 'ANNHeuristic[Additive constant: 0]'), ('AStarSearch', 'ANNHeuristic[Additive constant: 2]'), ('AStarSearch', 'PatternDatabaseHeuristic'), ('IDAStarSearch', 'ANNHeuristic[Additive constant: -2]'), ('IDAStarSearch', 'ANNHeuristic[Additive constant: 0]'), ('IDAStarSearch', 'ANNHeuristic[Additive constant: 2]'), ('IDAStarSearch', 'PatternDatabaseHeuristic')])

In [27]:
# results.groupby(['ALGORITHM_NAME', 'HEURISTIC_NAME']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,BOARD_ID,RUN_TIME,SOLUTION_COST,EXPANDED_NODES
ALGORITHM_NAME,HEURISTIC_NAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AStarSearch,ANNHeuristic[Additive constant: -2],15.5,0.411816,19.0,120.933333
AStarSearch,ANNHeuristic[Additive constant: 0],15.5,0.406954,19.0,120.933333
AStarSearch,ANNHeuristic[Additive constant: 2],15.5,0.401545,19.0,120.933333
AStarSearch,PatternDatabaseHeuristic,15.5,0.018106,19.0,65.3
IDAStarSearch,ANNHeuristic[Additive constant: -2],15.5,1.231351,23.0,385.6
IDAStarSearch,ANNHeuristic[Additive constant: 0],15.5,1.264256,25.0,385.6
IDAStarSearch,ANNHeuristic[Additive constant: 2],15.0,1.251357,26.965517,379.965517
IDAStarSearch,PatternDatabaseHeuristic,15.5,0.015557,19.0,73.566667
