In [1]:
import sys
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import itertools
from util.save_load import load_kernel_model
from kernels.wrapper import MODELS, KernelModelWrapper
from dataset.ipc2023_learning_domain_info import IPC2023_LEARNING_DOMAINS, get_number_of_ipc2023_training_data
from itertools import product
from IPython.display import display, HTML

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
_LOG_DIR = "icaps24_train_logs"

ITERATIONS = [1, 2, 3, 4, 5, 6, 7, 8]
PRUNES = [0]

configs = product(
    ["1wl", "2gwl", "2lwl"],  # wl algorithms
    ITERATIONS,  # iterations
    PRUNES,  # prune
    ["ilg"],  # representation
    ["blocksworld", "childsnack", "ferry", "floortile", "miconic", "rovers", "satellite", "sokoban", "spanner", "transport"],  # domains
    ["linear-svr"],  # models
)
CONFIGS = list(configs)

DOMAINS = IPC2023_LEARNING_DOMAINS

PLT_DIR = "plots"
os.makedirs(PLT_DIR, exist_ok=True)

### Train metrics

In [3]:
def get_data_from_log_file(log_file):
    assert os.path.exists(log_file), log_file
    stats = {}
    lines = list(open(log_file, 'r').readlines())
    for line in lines:
      toks = line.split()
      if "train_mse" in line:
        stats["train_mse"] = float(toks[-1])
      elif "train_f1_macro" in line:
        stats["train_f1"] = float(toks[-1])
      elif "val_mse" in line:
        stats["val_mse"] = float(toks[-1])
      elif "val_f1_macro" in line:
        stats["val_f1"] = float(toks[-1])
      elif "zero_weights" in line:
        weights = int(toks[1].split('/')[1])
        zeros = int(toks[1].split('/')[0])
        stats["nonzero_weights"] = weights - zeros
      elif "Model training completed in " in line:
        stats["time"] = float(toks[-1].replace("s", ""))
    
    if "nonzero_weights" not in stats:
      stats["nonzero_weights"] = "na"
    
    return stats

In [4]:
""" display df over all domains """
def get_data(domain, target):
  d = {
    "config": [],
    "train_mse": [],
    "val_mse": [],
    "train_f1": [],
    "val_f1": [],
    "nonzero_weights": [],
    "time": [],
  }

  assert target in {"H", "D"}

  for wl, iterations, prune, rep, domain, model in CONFIGS:
    desc = "_".join([domain, rep, wl, str(iterations), str(prune), model, target])
    log_file = _LOG_DIR + "/" + desc + ".log"

    if not os.path.exists(log_file):
      continue

    stats = get_data_from_log_file(log_file)
    stats["config"] = desc

    if len(stats) != len(d):
      continue

    for key in stats:
      d[key].append(stats[key])

  return d

def get_df(domain, target):
  d = get_data(domain, target)
  return pd.DataFrame(d)

# max_times = []
# for domain in IPC2023_LEARNING_DOMAINS:
#   data = get_df(domain, "H")
#   max_times.append(max(data.to_numpy()[:,-1]))
#   display(data)
# print("max time:", max(max_times))
df = get_df("blocksworld", "H")
df

Unnamed: 0,config,train_mse,val_mse,train_f1,val_f1,nonzero_weights,time
0,blocksworld_ilg_1wl_1_0_linear-svr_H,1.41,3.06,0.53,0.20,593,0.72
1,childsnack_ilg_1wl_1_0_linear-svr_H,0.42,0.39,0.62,0.60,79,0.50
2,ferry_ilg_1wl_1_0_linear-svr_H,0.12,0.23,0.84,0.68,260,2.37
3,floortile_ilg_1wl_1_0_linear-svr_H,0.71,1.04,0.43,0.28,480,9.12
4,miconic_ilg_1wl_1_0_linear-svr_H,0.75,0.86,0.42,0.41,98,1.62
...,...,...,...,...,...,...,...
147,transport_ilg_2lwl_7_0_linear-svr_H,0.01,0.91,1.00,0.38,1574432,109.48
148,blocksworld_ilg_2lwl_8_0_linear-svr_H,0.01,2.72,1.00,0.24,4655119,20.14
149,childsnack_ilg_2lwl_8_0_linear-svr_H,0.29,0.53,0.71,0.55,95310,1747.45
150,ferry_ilg_2lwl_8_0_linear-svr_H,0.01,0.42,1.00,0.50,2659903,12.02


In [9]:
rep = "ilg"
target = "H"
prune = 0
model = "linear-svr"
for metric, domain in product(["train_mse", "train_f1", "val_mse", "val_f1"], DOMAINS):
    for wl in ["1wl", "2gwl", "2lwl"]:
        xs = []
        ys = []
        for iterations in ITERATIONS:
            desc = "_".join([domain, rep, wl, str(iterations), str(prune), model, target])
            log_file = _LOG_DIR + "/" + desc + ".log"
            if not os.path.exists(log_file):
                continue
            stats = get_data_from_log_file(log_file)
            if metric not in stats:
                continue
            xs.append(iterations)
            ys.append(stats[metric])
        plt.plot(xs, ys, label=wl)
    if "f1" in metric:
        plt.ylim((0, 1))
    elif "mse" in metric:
        plt.ylim((1e-1, 1e2))
        plt.yscale("log")
    # plt.xscale("log")
    # power_of_2_ticks = [2**i for i in range(int(np.log2(min(ITERATIONS))), int(np.log2(max(ITERATIONS))) + 1)]
    # plt.xticks(power_of_2_ticks, [str(tick) for tick in power_of_2_ticks])
    plt.title(f"{metric} {domain}")
    plt.legend(bbox_to_anchor=(1, 1), loc='upper left')
    try:
        plt.savefig(f"{PLT_DIR}/{metric}_{domain}.png", dpi=480, bbox_inches="tight")
    except:
        pass
    plt.clf()

<Figure size 640x480 with 0 Axes>