In [1]:
COLAB_MODE = False
SETUP_RUNTIME = False
# google colab shenanigans
if COLAB_MODE:
    if SETUP_RUNTIME:
        from google.colab import drive
        drive.mount('/content/drive')
        !pip install torch==1.11.0
        !mkdir /content/data
        !gdown 1-5W-S5e7CKsJ9uY9uVXIyxgbcZZNYBrp
        !mv old-tsp-data.tar.gz /content/data/
        !tar -xzvf data/old-tsp-data.tar.gz -C /content/data/

        !git clone https://github.com/whong92/deep-ls.git
        %cd deep-ls
        !git checkout multi-sample-episodes
    else:
        %cd deep-ls

    data_root = '/content/data/tsp-data/'
    model_root = '/content/drive/MyDrive/colab_data/deep-ls/'
else:
    data_root = '../graph-convnet-tsp/data/'
    model_root = '.'

In [2]:
import cv2
import numpy as np 
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from deepls.TSP2OptEnv import TSP2OptMultiEnv, TSP2OptEnv

font = cv2.FONT_HERSHEY_COMPLEX_SMALL

In [3]:
from deepls.agent import AverageStateRewardBaselineAgent
from torch import nn
import torch
from tqdm import tqdm
import copy
from PIL import Image

In [4]:
NUM_INSTANCE_EVAL = 100
NUM_SAMPLES_EVAL = 100
MINIBATCH_SZ = 10
RENDER = True
NUM_RENDER = 10

In [8]:
env = TSP2OptMultiEnv(
    max_num_steps=20, 
    num_nodes=20, 
    data_f=f'{data_root}/tsp20_test_concorde.txt', 
    num_samples_per_batch=MINIBATCH_SZ,
    same_instance_per_batch=True,
    shuffle_data=True, 
    ret_log_tour_len=False
)
env.reset()

In [9]:
agent_config = {
    'replay_buffer_size': 3,
    'minibatch_sz': 1,
    'batch_sz': 1,
    'policy_optimize_every': 2,
    'critic_optimize_every': 1,
    'model': {   
        "voc_edges_in": 3,
        "hidden_dim": 128,
        "num_layers": 15,
        "mlp_layers": 3,
        "aggregation": "mean",
        "node_dim": 2,
        'dont_optimize_policy_steps': 1000,
        'value_net_type': 'normal'
    },
    'optim': {
        'step_size': 1e-7, 
        'step_size_critic': 2e-5,
        'beta_m': 0.9, 
        'beta_v': 0.999,
        'epsilon': 1e-8
    },
    'device': 'cpu'
}

In [11]:
agent = AverageStateRewardBaselineAgent()
agent.agent_init(agent_config)

In [12]:
agent.load(f'{model_root}/model-15-layer-RGCN-20-nodes-51999-val-0.016.ckpt', init_config=False)

In [13]:
all_opt_gaps = []
all_tour_lens = []
all_opts = []
all_states = []

In [None]:
for _ in range(NUM_INSTANCE_EVAL):
    
    tour_lens = []
    opts = []
    opt_gaps = []
    state_list = [[] for _ in range(env.envs[0].max_num_steps + 1)]

    agent.set_eval()
    agent.set_greedy(False)
    pbar = tqdm(range(NUM_SAMPLES_EVAL // MINIBATCH_SZ))
    env.reset(fetch_next=True)
    for episode in pbar:
        env.reset(fetch_next=False)
        states = env.get_state()
        if RENDER and len(all_states) < NUM_RENDER:
            state_list[env.envs[0].cur_step].extend(copy.deepcopy(states))
        actions = agent.agent_start(states)
        # avg_starts.append(np.mean([state[0].tour_len for state in states]))
        while True:
            # Take a random action
            rets = env.step(actions)
            states = [ret[0] for ret in rets]
            rewards = [ret[1] for ret in rets]
            dones = [ret[2] for ret in rets]
            if RENDER and len(all_states) < NUM_RENDER:
                state_list[env.envs[0].cur_step].extend(copy.deepcopy(states))

            if dones[0] == True:
                agent.agent_end(rewards)
                tour_lens.extend([state[1].tour_len for state in states])
                opts.extend([state[0].opt_tour_len for state in states])
                opt_gaps.extend(
                    [(state[1].tour_len / state[0].opt_tour_len) - 1. for state in states]
                )
                break
            else:
                actions = agent.agent_step(rewards, states)
    
    all_opt_gaps.append(opt_gaps)
    all_tour_lens.append(tour_lens)
    all_opts.append(opts)
    if RENDER and len(all_states) < NUM_RENDER:
        all_states.append(state_list)

In [19]:
best_samples = np.argmin(all_opt_gaps, axis=1)

In [12]:
if RENDER:
    for instance in range(len(all_states)):
        b = best_samples[instance]
        instance_episode_states = all_states[instance]
        for episode in range(len(instance_episode_states)):
            best_sample_state = instance_episode_states[episode][b]
            img = best_sample_state[0].render(mode='rgb_array')
            img_best_state = best_sample_state[1].render(mode='rgb_array')
            img = np.concatenate([img, img_best_state], axis=1)
            img = img[:, :, [2,1,0]]
            img = Image.fromarray(img)
            img.save(f'{model_root}/renders/render_instance_{instance:03d}_ep_{episode:03d}.jpg')

In [None]:
best_opt_gaps = np.min(all_opt_gaps, axis=1)

In [None]:
all_opt_gaps = np.array(all_opt_gaps)

In [None]:
num_samples_to_consider = [1, 10, 20, 50, 100]
mean_best_opt_gaps = []
for n in num_samples_to_consider:
    mean_best_opt_gaps.append(
        np.mean(np.min(all_opt_gaps[:, :n], axis=1))
    )

In [None]:
plt.figure()
plt.plot(num_samples_to_consider, mean_best_opt_gaps, '-o')
plt.grid()
plt.title('optimality gap vs samples for tsp n=100')
plt.savefig(f'{model_root}/plot.jpg')

In [None]:
np.mean(best_opt_gaps)