In [1]:
# modified from https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/RND%20Montezuma's%20revenge%20PyTorch/model.py

import torch.nn.functional as F
import torch.nn as nn
import torch
import torch.optim as optim
import numpy as np
import math
from torch.nn import init

if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

print(device.type)

# ---

torch.manual_seed(42)
np.random.seed(42)

cuda


In [2]:
class RND(nn.Module):
    def __init__(self, input_size, output_size):
        super(RND, self).__init__()

        self.input_size = input_size
        self.output_size = output_size

        # Prediction network
        self.predictor = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
            nn.Linear(128, 1)
        )

        # Target network
        self.target = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
            nn.Linear(128, 1)
        )

        # Initialize the weights and biases
        for p in self.modules():
            if isinstance(p, nn.Conv2d):
                init.orthogonal_(p.weight, np.sqrt(2))
                p.bias.data.zero_()

            if isinstance(p, nn.Linear):
                init.orthogonal_(p.weight, np.sqrt(2))
                p.bias.data.zero_()

        # Set that target network is not trainable
        for param in self.target.parameters():
            param.requires_grad = False

    def forward(self, next_obs):
        target_feature = self.target(next_obs)
        predict_feature = self.predictor(next_obs)

        return predict_feature, target_feature

In [3]:
rnd = RND(4, -1)  # output_size is not used here
rnd.to(device)
optimizer = optim.Adam(list(rnd.predictor.parameters()), lr=1e-4)

mse = nn.MSELoss()

In [18]:
# load data
def load_data():
    # load dx.npy + dg.npy, concat
    # flatten the first dimension to get [N*400,4] array of states
    dx = np.load('/home/yigit/projects/cnmp/data/scand/dx.npy')
    dg = np.load('/home/yigit/projects/cnmp/data/scand/dg.npy')
    
    x = np.concatenate((dx, dg), axis=2)
    x = x.reshape(-1, x.shape[-1])
    print(x.shape)
    
    return torch.from_numpy(x).float()   

x = load_data().to(device)
observation_size = x.shape[0]
batch_size = 128

# get a batch
def get_batch(size=128):
    ids = np.random.permutation(observation_size)
    return x[ids[:size], :]

epochs = 20
losses = []

for i in range(epochs):
    running_loss = 0
    for b in range(int(observation_size/batch_size)):  # observation size = #traj * 400 obs/traj
        print(f'Epoch: {i}  - {b*batch_size}/{observation_size}', end='\r')
        batch = get_batch(batch_size)
        pred_f, target_f = rnd(batch)
#         print(pred_f, target_f)
        loss = mse(pred_f, target_f.detach())
        running_loss += loss.data
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    losses.append(running_loss)

(494000, 4)
Epoch: 0  - 0/494000

TypeError: 'NoneType' object is not subscriptable

In [25]:
torch.save(rnd.state_dict(), 'trained/rnd_1000.pth')

In [29]:
print(rnd(torch.Tensor([0.2, 0.1, 0.1, 0.05]).cuda()))

# import matplotlib.pyplot as plt

# lcpu = []

# for l in losses:
#     lcpu.append(l.cpu())

# plt.plot(range(len(lcpu[3:])), lcpu[3:])
# plt.show()

(tensor([0.0978], device='cuda:0', grad_fn=<AddBackward0>), tensor([0.0977], device='cuda:0'))
