In [1]:
import torch
import numpy as np
import gym

env = gym.make("CartPole-v1")

In [19]:
def _create_grid(lower, upper, bins, offsets):
    return [np.linspace(lower[dim], upper[dim], bins[dim] + 1)[1:-1] + offsets[dim] for dim in range(len(bins))]

def _create_tilings(lower, upper, specs):
    return [_create_grid(lower, upper, bins, offsets) for bins, offsets in specs]

def _discretize(sample, grid):
    return tuple(int(np.digitize(s, g)) for s, g in zip(sample, grid))

def _tile_encoding(sample, tilings):
    return [_discretize(sample, grid) for grid in tilings]

def _get_indices(tile_encoding):
    n_bins = 5
    n_tilings = len(tile_encoding)

    indices = [i*n_bins + j + n*(n_bins**2) for n, (i, j) in enumerate(tile_encoding)]
    print(indices)
    features = np.zeros(n_bins**2*n_tilings)
    features[indices]=1

    return features

def _tiling_features(x, lower, upper, specs):

    tilings = _create_tilings(lower, upper, specs)
    print(tilings)
    
    if len(x.size()) == 1:
        tile_encoding = _tile_encoding(x[[0,2]], tilings)
        features = _get_indices(tile_encoding)
        
        return torch.cat([x[[1,3]],torch.Tensor(features)], -1)
    
    elif len(x.size()) == 2:       
        features = []
        for xi in x:
            tile_encoding = _tile_encoding(xi[[0,2]], tilings)
            print(tile_encoding)
            features.append(_get_indices(tile_encoding))

        return torch.cat([x[:,[1,3]],torch.Tensor(features)], -1)



In [4]:
x = torch.Tensor([[1,0,3,1],[1,2,3,0],[2,3,1,1]])
x1 = torch.Tensor([1,2,3,4])
len(x.size())

2

In [21]:
print(_tiling_features(x, lower=[-3,-0.3], upper=[3,0.3], specs=[([5,5],[0,0]),([5,5],[0.2,0.02]),([5,5],[-0.2,-0.02])]))

[[array([-1.8, -0.6,  0.6,  1.8]), array([-0.18, -0.06,  0.06,  0.18])], [array([-1.6, -0.4,  0.8,  2. ]), array([-0.16, -0.04,  0.08,  0.2 ])], [array([-2. , -0.8,  0.4,  1.6]), array([-0.2 , -0.08,  0.04,  0.16])]]
[(3, 4), (3, 4), (3, 4)]
[19, 44, 69]
[(3, 4), (3, 4), (3, 4)]
[19, 44, 69]
[(4, 4), (4, 4), (4, 4)]
[24, 49, 74]
tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0.],
        [2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [76]:
env.observation_space.low

array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38],
      dtype=float32)