In [1]:
COLAB_MODE = False
SETUP_RUNTIME = False
# google colab shenanigans
if COLAB_MODE:
    if SETUP_RUNTIME:
        from google.colab import drive
        drive.mount('/content/drive')
        !pip install torch==1.11.0
        !mkdir /content/data
        !rsync --progress /content/drive/MyDrive/colab_data/deep-ls/old-tsp-data.tar.gz /content/data
        !tar -xzvf data/old-tsp-data.tar.gz -C /content/data/

        !git clone https://github.com/whong92/deep-ls.git
        %cd deep-ls
        !git checkout greedy_postproc
    else:
        %cd deep-ls

    data_root = '/content/data/tsp-data/'
    model_root = '/content/drive/MyDrive/colab_data/deep-ls/'
else:
    data_root = '../graph-convnet-tsp/data/'
    model_root = '.'

In [2]:
import cv2
import numpy as np 
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from deepls.TSP2OptEnv import TSP2OptMultiEnv, TSP2OptEnv, TSP2OptEnvBase, TSP2OptState

font = cv2.FONT_HERSHEY_COMPLEX_SMALL

In [3]:
from deepls.agent import AverageStateRewardBaselineAgent, GreedyAgent
from torch import nn
import torch
from tqdm import tqdm
import copy
from PIL import Image

In [4]:
# MODEL SPECIFICATION
PROBLEM_SZ = 20
MODEL_CKPT=f'{model_root}/model-15-layer-RGCN-20-nodes-51999-val-0.016.ckpt'
NUM_GREEDY_POSTROC_STEPS = 5
# NUMBER OF SAMPLES
NUM_INSTANCE_EVAL = 3
NUM_SAMPLES_EVAL = 1
MINIBATCH_SZ = 1
# Rendering settings
RENDER = True
NUM_RENDER = 5
RENDER_EDGE_MARGINALS = False
RENDER_OPTIMAL_TOUR = True
if RENDER:
    NUM_INSTANCE_EVAL=NUM_RENDER

In [5]:
env = TSP2OptMultiEnv(
    max_num_steps=PROBLEM_SZ, 
    num_nodes=PROBLEM_SZ, 
    data_f=f'{data_root}/tsp{PROBLEM_SZ}_test_concorde.txt', 
    num_samples_per_batch=MINIBATCH_SZ,
    same_instance_per_batch=True,
    shuffle_data=True, 
    ret_log_tour_len=False,
    ret_opt_tour=True
)
env.reset()

In [6]:
agent_config = {
    'replay_buffer_size': 3,
    'minibatch_sz': 1,
    'batch_sz': 1,
    'policy_optimize_every': 2,
    'critic_optimize_every': 1,
    'model': {   
        "voc_edges_in": 3,
        "hidden_dim": 128,
        "num_layers": 15,
        "mlp_layers": 3,
        "aggregation": "mean",
        "node_dim": 2,
        'dont_optimize_policy_steps': 1000,
        'value_net_type': 'normal'
    },
    'optim': {
        'step_size': 1e-7, 
        'step_size_critic': 2e-5,
        'beta_m': 0.9, 
        'beta_v': 0.999,
        'epsilon': 1e-8
    },
    'device': 'cuda'
}

In [7]:
from deepls.solver import greedy_postproc

In [8]:
agent = AverageStateRewardBaselineAgent() # GreedyAgent()
agent.agent_init(agent_config)

In [9]:
agent.load(MODEL_CKPT, init_config=False)

In [10]:
all_opt_gaps = []
all_opt_gaps_pre = []
all_tour_lens = []
all_opts = []
all_states = []

In [None]:
for _ in range(NUM_INSTANCE_EVAL):
    
    tour_lens = []
    opts = []
    opt_gaps_pre = []
    opt_gaps = []
    state_list = [[] for _ in range(env.envs[0].max_num_steps + 2)] # includes post-proc states

    agent.set_eval()
    agent.set_greedy(False)
    pbar = tqdm(range(NUM_SAMPLES_EVAL // MINIBATCH_SZ))
    env.reset(fetch_next=True)
    for episode in pbar:
        env.reset(fetch_next=False)
        states = env.get_state()
        if RENDER and len(all_states) < NUM_RENDER:
            state_list[env.envs[0].cur_step].extend(copy.deepcopy(states))
        actions = agent.agent_start(states)
        while True:
            # Take a random action
            rets = env.step(actions)
            states = [ret[0] for ret in rets]
            rewards = [ret[1] for ret in rets]
            dones = [ret[2] for ret in rets]
            if RENDER and len(all_states) < NUM_RENDER:
                state_list[env.envs[0].cur_step].extend(copy.deepcopy(states))

            if dones[0] == True:
                agent.agent_end(rewards)
                opt_gaps_pre.extend(
                    [(state[1].tour_len / state[0].opt_tour_len) - 1. for state in states]
                )
                post_proc_states = greedy_postproc(states, num_postproc_steps=NUM_GREEDY_POSTROC_STEPS)
                if RENDER and len(all_states) < NUM_RENDER:
                    state_list[env.envs[0].cur_step + 1].extend(copy.deepcopy(post_proc_states))
                tour_lens.extend([state[1].tour_len for state in post_proc_states])
                opts.extend([state[0].opt_tour_len for state in post_proc_states])
                opt_gaps.extend(
                    [(state[1].tour_len / state[0].opt_tour_len) - 1. for state in post_proc_states]
                )
                break
            else:
                actions = agent.agent_step(rewards, states)
    all_opt_gaps.append(opt_gaps)
    all_opt_gaps_pre.append(opt_gaps_pre)
    all_tour_lens.append(tour_lens)
    all_opts.append(opts)
    if RENDER and len(all_states) < NUM_RENDER:
        all_states.append(state_list)

In [12]:
best_samples = np.argmin(all_opt_gaps, axis=1)

In [13]:
from deepls.gcn_model import model_input_from_states
from deepls.graph_utils import tour_nodes_to_W
from deepls.plot_utils import plot_tsp_heatmap

def plot_action_heatmap(W_pred, x_coord, W_val, W):
    f = plt.figure(figsize=(8, 8))
    a = f.add_subplot(111)
    plot_tsp_heatmap(a, x_coord, W_val, W_pred, W=W, thres=0.)
    f.canvas.draw()
    img = np.frombuffer(f.canvas.tostring_rgb(), dtype=np.uint8)
    img = img.reshape(f.canvas.get_width_height()[::-1] + (3,))
    plt.close(f)
    return img

def plot_edge_marginals(net, input_state, device):
    best_states = [input_state[1]]
    states = [input_state[0]]
    # cur state
    x_edges, x_edges_values, x_nodes_coord, x_tour = list(model_input_from_states(states))
    # best_state
    _, _, _, x_best_tour = model_input_from_states(best_states)
    # get x_tour_directed
    x_tour_directed = torch.stack([
        torch.as_tensor(tour_nodes_to_W(state.tour_nodes, directed=True))
        for state in states
    ], dim=0)

    model_input = [x_edges, x_edges_values, x_nodes_coord, x_tour, x_best_tour, x_tour_directed]
    model_input = [t.to(device) for t in model_input]
    tour_logits, tour_indices_cat = net.get_tour_logits(*model_input)
    
    tour_logits = tour_logits.detach().cpu()
    tour_indices_cat = tour_indices_cat.detach().cpu()
    
    u = tour_indices_cat[0, :, 1]
    v = tour_indices_cat[0, :, 2]
    x = tour_indices_cat[0, :, 4]
    y = tour_indices_cat[0, :, 5]
    
    tour_probs = torch.softmax(tour_logits[:, :, 0]/2., dim=1).detach().float()
    edge_marginals = torch.zeros_like(x_edges, dtype=float, requires_grad=False).float()
    for p, _u, _v, _x, _y in zip(tour_probs[0], u, v, x, y):
        edge_marginals[0, _u, _v] += p
        edge_marginals[0, _x, _y] += p
    
    img = plot_action_heatmap(edge_marginals[0].numpy(), x_nodes_coord[0].numpy(), x_edges_values[0].numpy(), W=x_edges[0].numpy())
    
    return img

In [14]:
if RENDER:
    for instance in range(len(all_states[])):
        b = best_samples[instance]
        instance_episode_states = all_states[instance]
        for episode in range(len(instance_episode_states)):
            best_sample_state = instance_episode_states[episode][b]
            img = best_sample_state[0].render(mode='rgb_array')[:, :, [2,1,0]]
            img_best_state = best_sample_state[1].render(mode='rgb_array')[:, :, [2,1,0]]
            gallery = [img, img_best_state]
            if RENDER_EDGE_MARGINALS:
                img_edge_marginals = plot_edge_marginals(agent.net, best_sample_state, agent.device)
            if RENDER_OPTIMAL_TOUR:
                opt_state = TSP2OptState(
                    best_sample_state[0].nodes_coord,
                    best_sample_state[0].edge_weights,
                    best_sample_state[0].opt_tour,
                    best_sample_state[0].opt_tour_len
                )
                img_opt_tour = opt_state.render(mode='rgb_array')[:, :, [2,1,0]]
                gallery.append(img_opt_tour)
            img = np.concatenate(gallery, axis=1)
            img = Image.fromarray(img)
            img.save(f'{model_root}/renders/renders-tsp-{PROBLEM_SZ}-opt-viz/render_instance_{instance:03d}_ep_{episode:03d}.jpg')

In [16]:
best_opt_gaps = np.min(all_opt_gaps, axis=1)
best_opt_gaps_pre = np.min(all_opt_gaps_pre, axis=1)

In [17]:
all_opt_gaps = np.array(all_opt_gaps)
all_opt_gaps_pre = np.array(all_opt_gaps_pre)

In [None]:
num_samples_to_consider = [1, 10, 20, 50, 100]
mean_best_opt_gaps = []
mean_best_opt_gaps_pre = []
for n in num_samples_to_consider:
    mean_best_opt_gaps.append(
        np.mean(np.min(all_opt_gaps[:, :n], axis=1))
    )
    mean_best_opt_gaps_pre.append(
        np.mean(np.min(all_opt_gaps_pre[:, :n], axis=1))
    )

In [None]:
plt.figure()
plt.plot(num_samples_to_consider, mean_best_opt_gaps, '-o')
plt.plot(num_samples_to_consider, mean_best_opt_gaps_pre, '-o')
plt.grid()
plt.title(f'optimality gap vs samples for tsp n={PROBLEM_SZ}')
plt.savefig(f'{model_root}/sample-efficiency-tsp-{PROBLEM_SZ}.jpg')

In [18]:
np.mean(best_opt_gaps), np.mean(best_opt_gaps_pre)

(0.001754596578996385, 0.022474331142738297)