# Pretrain Policy with  `MPC dataset`

In [1]:
import numpy as np
import pickle
from typing import Tuple

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal
import torch.optim as optim

from tqdm import tqdm

from torch.utils.data import DataLoader
from torch.utils.data import Dataset

from policy import GaussianPolicy

  from .autonotebook import tqdm as notebook_tqdm


### Load  `MPC dataset`

In [41]:
with open(file='../data/dataset.pkl', mode='rb') as f:
    dataset = pickle.load(f)

action_batch = torch.Tensor(dataset['action'])
qpos_batch = torch.Tensor(dataset['qpos'])
qvel_batch = torch.Tensor(dataset['qvel'])

# print("qpos : ", qpos_batch.shape)
# print("qvel : ", qvel_batch.shape)

del dataset

qpos_batch = qpos_batch[:, 2:]
qpos_batch = torch.cat((qpos_batch[:-2, :],qpos_batch[1:-1, :],qpos_batch[2:, :]),dim=1)

qvel_batch = torch.cat((qvel_batch[:-2, :],qvel_batch[1:-1, :],qvel_batch[2:, :]),dim=1)

obs_batch = torch.cat((qpos_batch, qvel_batch), dim=1)

action_batch = action_batch[2:]
obs_dim = obs_batch.shape[1]
action_dim = action_batch.shape[1]
hidden_dim = 512

print("obs : ", obs_batch.shape)
print("action : ", action_batch.shape)


qpos :  torch.Size([1136000, 50])
qvel :  torch.Size([1136000, 49])


In [44]:
policy = GaussianPolicy(
    input_dim=obs_dim,
    output_dim=action_dim,
    hidden_dim=hidden_dim,
    is_deterministic=False,
)

RunningMeanStd:  291
RunningMeanStd:  26


In [45]:
class MPCDataset(Dataset):
    def __init__(self, obs, act):
        self.obs = obs
        self.act = act
        assert self.obs.shape[0] == self.act.shape[0]

    def __len__(self):
        return self.obs.shape[0]

    def __getitem__(self,idx):
        return self.obs[idx], self.act[idx]

In [46]:
train_dataset = MPCDataset(obs_batch, action_batch)
train_dataloader = DataLoader(train_dataset, batch_size=2048, shuffle=True)
# test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [47]:
num_epoch = 100
optimizer = optim.Adam(policy.parameters(), lr=5e-4, eps=1e-7, betas=(0.9, 0.95))
loss = torch.nn.MSELoss()
def criterion(output: torch.tensor, y: torch.tensor):
    diff = output - y
    return torch.mean(torch.sqrt(torch.mean(torch.square(diff),dim=1)))
    # return loss(output, y)

for epoch in range(num_epoch):

    with tqdm(train_dataloader, unit="batch") as tepoch:
        
        for x, y in tepoch:
            
            tepoch.set_description(f"Epoch {epoch+1}")

            optimizer.zero_grad()
                
            output, _ = policy(x)
            l = criterion(output, y)
            l.backward()
            optimizer.step()
            
            tepoch.set_postfix(loss=l.item())


Epoch 1: 100%|██████████| 555/555 [00:16<00:00, 34.41batch/s, loss=0.145]
Epoch 2: 100%|██████████| 555/555 [00:16<00:00, 33.48batch/s, loss=0.135]
Epoch 3: 100%|██████████| 555/555 [00:16<00:00, 32.99batch/s, loss=0.13] 
Epoch 4:  38%|███▊      | 211/555 [00:06<00:10, 33.91batch/s, loss=0.128]

In [None]:
torch.save(policy.state_dict(), "pretrained2.pth")

In [None]:
torch.load("pretrained.pth")

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.