# Calculate statistics for IPC domains

This notebook calculates the performance statistics for our approach and also for `R&G` and `Landmark` approaches. Notice that the source csv from our approach and other approaches are slightly different, so we need to call different functions to run the codes.

1. Our approach:
2. Others:

In [1]:
import pandas as pd 
import numpy as np
from tabulate import tabulate


def func_precision(stringList, answer):
    goal_count = 0
    found = 0
    for result in stringList:
        if result == str(answer):
            found = 1
        goal_count += 1
    return found/(goal_count-1)

def func_recall(stringList, answer):
    found = 0
    for result in stringList:
        if result == str(answer):
            found = 1
            break
    return found

def func_accuracy(total, stringList, answer):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for result in stringList[0:-1]:
        if result == str(answer):
            tp += 1
        else:
            fp += 1
    
    fn = 1 - tp
    
    # total is the number of all goals
    tn = total - tp - fp - fn
    return (tp + tn)/(tn + tp + fp + fn)



# Calculate P, R, A and T for one domain with a specific percentage of observation (all rows)
def calculate_statistics(rows, approach = "others"):
    length = rows.shape[0]
    
    precision = 0
    recall = 0
    accuracy = 0
    time = 0
        
    for index, row in rows.iterrows():
        
        answer = row["Real_Goal"]
        
        if row["Results"] is np.nan:
            p = 0
            r = 0
            a = 0
        else:
            results = row["Results"].split("/")

            if approach == "others":   # other approaches
                total = row["Candidates"]
            else:    # for our approach
                all_candidates = row["Cost"].split("/")
                total = len(all_candidates)-1   # the last one is /

            p = func_precision(results, answer)
            r = func_recall(results, answer)
            a = func_accuracy(total, results, answer)
        

        t = row["Time"]
        precision += p
        recall += r
        accuracy += a
        time += t
        
    precision = precision/length
    recall = recall/length
    accuracy = accuracy/length
    time = time/length
    
    return precision, recall, accuracy, time

### Codes for checking source_csv data structure

Load a source_csv file from the correct path and project the table. The codes below is just an example for one source_csv file, you can check others by changing the path.

In [6]:
source_csv = "./comparison/our_default/dwr.csv"
source_csv = "./comparison/landmarks/filter0/dwr.csv"
source_csv = "./comparison/RG/dwr.csv"
data = pd.read_csv(source_csv)
data.head(5)
data.shape[0]

312

### Code blocks for running statistical calculation iteratively

In [112]:
domains = ["blocks-world", "campus", "depots", "driverlog", "dwr", "easy-ipc-grid", "ferry", "intrusion-detection", 
           "kitchen", "logistics", "miconic", "rovers", "satellite", "sokoban", "zeno-travel"]

In [274]:
def display_stats(source_data, apporoach):
    matrix = [] 
    for per in [10, 30, 50, 70, 100]:
        rows = source_data.loc[source_data['Percent'] == per]
        p,r,a,t = calculate_statistics(rows, apporoach)
        matrix.append([p, r, a, t])

    # table = tabulate(matrix, headers=["p", "r", "a", "t"])
    # print(table)
    return matrix
    
def encode_tex(source_list, domain_list):
    
    empty_total = True
    approach = "our"   # always measure first
    count_app = 0
    for source in source_list:
        
        empty_inner = True
        for domain in domain_list:
            source_csv = source + domain + ".csv"
            source_data = pd.read_csv(source_csv)
            
            m = np.matrix(display_stats(source_data, approach))
        
            if empty_inner:
                inner_m = m
                empty_inner = False
            else:
                inner_m = np.concatenate((inner_m, m))
                
        if empty_total:
            total_m = inner_m
            empty_total = False
        else:
            total_m = np.concatenate((total_m, inner_m), axis=1)
        
        count_app += 1
        if count_app == 2:
            approach = "others"
        
    return total_m

In [275]:
domains = ["blocks-world", "campus", "depots", "driverlog", "dwr", "easy-ipc-grid", "ferry", "intrusion-detection", 
           "kitchen", "logistics", "miconic", "rovers", "satellite", "sokoban", "zeno-travel"]

source_list = ["./comparison/our_mid_points_topk/", "./comparison/our_default/", "./comparison/landmarks/filter0/", "./comparison/RG/"]

total_matrix = encode_tex(source_list, domains)

In [276]:
## export the table to csv
# A = np.array(matrix)
# np.savetxt("blocksworld.csv", A, delimiter=",")

In [277]:
rows = total_matrix.shape[0]

copy_domains = ["blocks-world", "campus", "depots", "driverlog", "dwr", "easy-ipc-grid", "ferry", "intrusion-detection", 
           "kitchen", "logistics", "miconic", "rovers", "satellite", "sokoban", "zeno-travel"]

percentage = ["10", "30", "50", "70", "100"]

for row in range(rows):
    if row % 5 == 2:
        dn = copy_domains.pop(0)
        print(dn, end="")
        
    print("&" + percentage[row % 5], end="")
    A = np.asarray(total_matrix[row]).reshape(-1)
    for num in A:
        # convert time to seconds
        if num > 10000:
            num = num / 1000000000
        
        # check decimal
        if len(str(num).split(".")[1]) > 4:
            print("&" + "{:.4f}".format(num), end='')
        else:
            print("&" + str(num), end='')
            
    print("\\\\")
    
    if row % 5 == 4:
        print("\\hline")

&10&0.0872&0.5202&0.5168&0.0592&0.0491&1.0&0.0491&0.0743&0.4078&0.4697&0.9353&0.4226&0.9899&1.0&0.9990&102.8212\\
&30&0.2566&0.7222&0.7548&0.0682&0.0505&1.0&0.0587&0.0804&0.5042&0.5404&0.9475&0.4376&0.9899&1.0&0.9990&21.8386\\
blocks-world&50&0.3442&0.7374&0.8406&0.0778&0.0731&1.0&0.2286&0.0784&0.6019&0.6616&0.9583&0.4532&0.9899&1.0&0.9990&11.9247\\
&70&0.6113&0.8788&0.9328&0.0944&0.1831&0.9899&0.5684&0.0938&0.7820&0.8384&0.9761&0.5093&0.9848&1.0&0.9985&5.5898\\
&100&0.8941&1.0&0.9857&0.1143&0.4569&1.0&0.8366&0.1128&0.8235&1.0&0.9764&0.5076&0.9803&1.0&0.9980&5.6161\\
\hline
&10&0.5&1.0&0.5&0.0263&0.5&1.0&0.5&0.0279&0.8&0.8667&0.8&0.3590&1.0&1.0&1.0&0.1325\\
&30&0.6333&0.8667&0.6333&0.0188&0.5&1.0&0.5&0.0186&0.8&0.8667&0.8&0.3778&1.0&1.0&1.0&0.1328\\
campus&50&0.6&0.7333&0.6&0.0242&0.5&1.0&0.5&0.0293&0.8667&0.9333&0.8667&0.3556&1.0&1.0&1.0&0.1319\\
&70&0.8&0.9333&0.8&0.0215&0.5667&1.0&0.5667&0.0265&1.0&1.0&1.0&0.3606&1.0&1.0&1.0&0.1366\\
&100&0.9333&0.9333&0.9333&0.0257&0.8333&1.0&0.833

### Legacy codes

In [75]:
length = data.shape[0]
percent = 10

matrix = []
temp_storage = [0,0,0,0] # p, r, accuracy, time
num = 0

count_goals = 0

print(length)

for index in range(length):
    new_percent = data.loc[index, "Percent"]
    answer = data.loc[index, "Real_Goal"]
    results = data.loc[index, "Results"].split("/")
    """
    all_candidates = data.loc[index, "Cost"].split("/")
    total = len(all_candidates)-1   # the last one is /
    """
    total = data.loc[index, "Candidates"]
    p = func_precision(results, answer)
    r = func_recall(results, answer)
    a = func_accuracy(total, results, answer)
    
    if r == 0:
        print(new_percent)
    
    count_goals += total
    
    time = data.loc[index, "Time"]
    num += 1
    
    if (percent != new_percent):
        print(temp_storage[1], num)
        temp_storage[0] = temp_storage[0]/num
        temp_storage[1] = temp_storage[1]/num
        temp_storage[2] = temp_storage[2]/num
        temp_storage[3] = temp_storage[3]/num
        
        matrix.append(temp_storage)
        temp_storage = [0,0,0,0]
        num = 0
        percent = new_percent
        
        
    temp_storage[0] += p
    temp_storage[1] += r
    temp_storage[2] += a
    temp_storage[3] += time
    
num += 1
temp_storage[0] = temp_storage[0]/num
temp_storage[1] = temp_storage[1]/num
temp_storage[2] = temp_storage[2]/num
temp_storage[3] = temp_storage[3]/num   
matrix.append(temp_storage)
    
    


208
48 49
48 48
48 48
48 48


In [115]:
rrrr = [1,2,3]
ttt = [4,5,6]

rrrr+ttt

[1, 2, 3, 4, 5, 6]

In [121]:
m = np.matrix(matrix)
n = np.matrix(matrix)

In [120]:
m

matrix([[0.9375    , 1.        , 0.97916667, 0.45499851],
        [0.9375    , 1.        , 0.97916667, 0.73706695],
        [0.9375    , 1.        , 0.97916667, 1.38035454],
        [0.9375    , 1.        , 0.97916667, 2.92791591],
        [0.9375    , 1.        , 0.97916667, 7.89003892]])

In [126]:
np.concatenate((m,n, n), axis=1)

matrix([[0.9375    , 1.        , 0.97916667, 0.45499851, 0.9375    ,
         1.        , 0.97916667, 0.45499851, 0.9375    , 1.        ,
         0.97916667, 0.45499851],
        [0.9375    , 1.        , 0.97916667, 0.73706695, 0.9375    ,
         1.        , 0.97916667, 0.73706695, 0.9375    , 1.        ,
         0.97916667, 0.73706695],
        [0.9375    , 1.        , 0.97916667, 1.38035454, 0.9375    ,
         1.        , 0.97916667, 1.38035454, 0.9375    , 1.        ,
         0.97916667, 1.38035454],
        [0.9375    , 1.        , 0.97916667, 2.92791591, 0.9375    ,
         1.        , 0.97916667, 2.92791591, 0.9375    , 1.        ,
         0.97916667, 2.92791591],
        [0.9375    , 1.        , 0.97916667, 7.89003892, 0.9375    ,
         1.        , 0.97916667, 7.89003892, 0.9375    , 1.        ,
         0.97916667, 7.89003892]])

In [135]:
total_m = np.matrix((15*5, 1))

In [136]:
total_m

matrix([[75,  1]])

In [137]:
np.concatenate(total_m, m)

TypeError: only integer scalar arrays can be converted to a scalar index

In [114]:
domains = ["blocks-world", "campus", "depots", "driverlog", "dwr", "easy-ipc-grid", "ferry", "intrusion-detection", 
           "kitchen", "logistics", "miconic", "rovers", "satellite", "sokoban", "zeno-travel"]

for domain in domains:
    source_csv = "./comparison/landmarks/filter0/" + domain + ".csv"
    data = pd.read_csv(source_csv)
    display_stats(domain)

blocks-world
       p         r         a         t
--------  --------  --------  --------
0.407828  0.469697  0.935257  0.422569
0.504209  0.540404  0.947499  0.437558
0.601852  0.661616  0.958345  0.453212
0.781987  0.838384  0.976094  0.509276
0.823465  1         0.976441  0.507588
campus
       p         r         a         t
--------  --------  --------  --------
0.8       0.866667  0.8       0.358956
0.8       0.866667  0.8       0.377834
0.866667  0.933333  0.866667  0.355641
1         1         1         0.360626
1         1         1         0.361002
depots
       p         r         a         t
--------  --------  --------  --------
0.34      0.36      0.788     0.840311
0.565972  0.666667  0.890625  0.822957
0.71875   0.833333  0.927083  0.82046
0.8125    0.833333  0.954167  0.84114
0.9375    1         0.9875    0.874982
driverlog
       p         r         a         t
--------  --------  --------  --------
0.458333  0.541667  0.833333  0.406346
0.5625    0.666667  0.861111 