In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1
import torch
from torch import nn
from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary
from lightning.pytorch.loggers import WandbLogger
torch.cuda.device_count()

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1


1

In [2]:
from rl4co.envs import SSPEnv
from rl4co.models.zoo.am import AttentionModelPolicy, AttentionModel
from rl4co.utils.trainer import RL4COTrainer
from rl4co.utils.decoding import random_policy, rollout
from rl4co.utils.ops import gather_by_index

In [3]:
class SSPInitEmbedding(nn.Module):

    def __init__(self, embedding_dim, fixed_len, linear_bias=True):
        super(SSPInitEmbedding, self).__init__()
        node_dim = fixed_len  # x, y
        self.init_embed = nn.Linear(node_dim, embedding_dim, linear_bias)

    def forward(self, td):
        out = self.init_embed(td["codes"])
        return out

class SSPContext(nn.Module):
    """Context embedding for the Traveling Salesman Problem (TSP).
    Project the following to the embedding space:
        - first node embedding
        - current node embedding
    """

    def __init__(self, embedding_dim,  linear_bias=True):
        super(SSPContext, self).__init__()
        self.W_placeholder = nn.Parameter(
            torch.Tensor(embedding_dim).uniform_(-1, 1)
        )
        self.project_context = nn.Linear(
            embedding_dim, embedding_dim, bias=linear_bias
        )

    def forward(self, embeddings, td):
        batch_size = embeddings.size(0)
        # By default, node_dim = -1 (we only have one node embedding per node)
        node_dim = (
            (-1,) if td["current_node"].dim() == 1 else (td["current_node"].size(-1), -1)
        )
        if td["i"][(0,) * td["i"].dim()].item() < 1:  # get first item fast
            context_embedding = self.W_placeholder[None, :].expand(
                batch_size, self.W_placeholder.size(-1)
            )
        else:
            context_embedding = gather_by_index(
                embeddings,
                torch.stack([td["current_node"]], -1).view(
                    batch_size, -1
                ),
            ).view(batch_size, *node_dim)
        return self.project_context(context_embedding)
        
class StaticEmbedding(nn.Module):
    def __init__(self, *args, **kwargs):
        super(StaticEmbedding, self).__init__()

    def forward(self, td):
        return 0, 0, 0

In [4]:
num_loc = 100
fixed_len = 15
emb_dim = 128

env = SSPEnv(generator_params={"num_loc":num_loc,
                              "fixed_len":fixed_len},
            test_file = "ssp_100_15.npz")

checkpoint_path = "/home/yining/ssp/rl4co/checkpoints_ssp/epoch_epoch=531.ckpt"

# Model: default is AM with REINFORCE and greedy rollout baseline
model = AttentionModel.load_from_checkpoint(checkpoint_path)
model.env = env
model.data_cfg["test_batch_size"] = 10000
model.data_cfg["test_data_size"] = 10000

from rl4co.utils.trainer import RL4COTrainer

# We use our own wrapper around Lightning's `Trainer` to make it easier to use
trainer = RL4COTrainer(max_epochs=1000, 
                       accelerator = 'gpu', 
                       devices=1,   
                       # logger=logger,
                       # callbacks=callbacks,
                      )

out = trainer.test(model)

/home/yining/miniconda3/envs/ai4co/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
/home/yining/miniconda3/envs/ai4co/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.
/home/yining/miniconda3/envs/ai4co/lib/python3.11/site-packages/lightning/pytorch/core/saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['baseline.baseline.policy.encoder.init_embedding.init_embed.weight', 'baseline.baseline.policy.encoder.init_embedding.init_embed.bias', 'baseline.baseline.policy.encoder.net.layers.0.0.module.Wqkv.weight', 'baseline.baseline.policy.encoder.net.layers.0.0.mod

Testing: |                                                                                        | 0/? [00:00…

In [5]:
import numpy as np
ks = [1, 2, 3, 5, 10, 14, 15][::-1]
time = ['1h50m', '1h8m','43m02s','20m59s', '6m10s', '1m36s', '38s'][::-1]
RL = out[0]["test/reward"] * env.generator.num_loc * -1
print(f"RL constructive length:\t {RL:.2f},\tgap: {0:.2f}%,\t time: 1s \t(GPU in parallel)")
print('-' * 90)

for k, t in zip(ks, time):
    greedy_baseline = np.load(f'greedy_{k}-mers_output.npz')["arr_0"].mean()
    gap = (greedy_baseline - RL) / RL * 100
    print(f"{k}-mers-greedy length:\t {greedy_baseline:.2f},\tgap: {gap:.2f}%,\t time: {t} \t(CPU in series)")

RL constructive length:	 1014.75,	gap: 0.00%,	 time: 1s 	(GPU in parallel)
------------------------------------------------------------------------------------------
15-mers-greedy length:	 1483.70,	gap: 46.21%,	 time: 38s 	(CPU in series)
14-mers-greedy length:	 1444.29,	gap: 42.33%,	 time: 1m36s 	(CPU in series)
10-mers-greedy length:	 1264.77,	gap: 24.64%,	 time: 6m10s 	(CPU in series)
5-mers-greedy length:	 977.88,	gap: -3.63%,	 time: 20m59s 	(CPU in series)
3-mers-greedy length:	 940.53,	gap: -7.31%,	 time: 43m02s 	(CPU in series)
2-mers-greedy length:	 920.37,	gap: -9.30%,	 time: 1h8m 	(CPU in series)
1-mers-greedy length:	 905.13,	gap: -10.80%,	 time: 1h50m 	(CPU in series)


In [6]:
from tensordict.tensordict import TensorDict
from tqdm import tqdm
td = TensorDict({"codes": torch.tensor(np.load("data/ssp_100_15.npz")["codes"])}, batch_size = 10000)

In [7]:
# Sampling rollouts over trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
td_init = model.env.reset(td).to(device)
model = model.to(device)

rewards_best = None
with torch.no_grad():
    for i in tqdm(range(400)):
        out = model(td_init.clone(), phase="test", decode_type="sampling", return_actions=False)
        rewards_now = out['reward'].cpu().detach()
        if rewards_best is None:
            rewards_best = rewards_now
        else:
            rewards_best = torch.max(rewards_now, rewards_best)

        if i % 100 == 99:
            obj = -rewards_best.mean().numpy() * env.generator.num_loc
            print('Sampling:', i+1, 'length:', obj,  'gap:', (obj - RL) / RL * 100, '%')

 25%|████████████████████▎                                                            | 100/400 [05:55<18:27,  3.69s/it]

Sampling: 100 length: 988.6712074279785 gap: -2.569926751011708 %


 50%|████████████████████████████████████████▌                                        | 200/400 [12:02<12:11,  3.66s/it]

Sampling: 200 length: 985.7806205749512 gap: -2.854783927702102 %


 75%|████████████████████████████████████████████████████████████▊                    | 300/400 [18:07<06:05,  3.65s/it]

Sampling: 300 length: 984.1910362243652 gap: -3.0114318795686636 %


100%|█████████████████████████████████████████████████████████████████████████████████| 400/400 [24:12<00:00,  3.63s/it]

Sampling: 400 length: 983.0849647521973 gap: -3.1204313363570235 %



