In [None]:
import gymnasium as gym
import numpy as np

I'll try to make tile coding for mountain car problem
4 tiles each with 16 tilings resulting in 64 parameters for my x(s) vector

In [None]:
class Bin2D():
    def __init__(self, posx, posy, offset):
        self.offset = offset
        self.posx = posx
        self.posy = posy

    def check(self, x, y):
        if (self.posx > x or self.posx + self.offset < x):
            return 0
        if (self.posy > y or self.posy + self.offset < y):
            return 0
        return 1

class Tiling2D():
    def __init__(self, posx, posy, offset, bins):
        self.x = posx
        self.y = posy
        self.offset = offset
        self.n_bins = bins

    def setup(self):
        self.bins = list()
        for i in range(self.n_bins):
            for j in range(self.n_bins):
                self.bins.append(Bin2D((i*self.offset)+self.x,
                                        (j*self.offset)+self.y,
                                        self.offset/self.n_bins))

    def check(self, x, y):
        return [bin2d.check(x, y) for bin2d in self.bins]

class Tile2D():
    def __init__(self, x, y, offset:float, tilings:int, bins:int):
        self.n_tilings = tilings
        self.bins = bins
        self.offset = offset
        self.x = x
        self.y = y
        self.offset = offset

    def setup(self):
        self.tilings = list()
        for i in range(self.n_tilings):
            for j in range(self.n_tilings):
                self.tilings.append(Tiling2D((i*self.offset)+self.x,
                                             (j*self.offset)+self.y,
                                             self.offset/self.n_tilings,
                                             self.bins))
                
        for tiling in self.tilings:
            tiling.setup()

    def check(self, x, y):
        return [tiling2d.check(x, y) for tiling2d in self.tilings]

In [63]:
tile = Tile2D(-1.2, -0.5, 0.5, 4, 4)

In [64]:
tile.setup()

In [65]:
np.matrix(tile.check(0.3, 0.3))

matrix([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [76]:
import numpy as np

class Bin2D:
    def __init__(self, posx, posy, size):
        self.posx = posx
        self.posy = posy
        self.size = size  # side length of the bin

    def check(self, x, y):
        if self.posx <= x < self.posx + self.size and self.posy <= y < self.posy + self.size:
            return 1
        return 0


class Tiling2D:
    def __init__(self, offset_x, offset_y, width, height, n_bins):
        self.offset_x = offset_x
        self.offset_y = offset_y
        self.width = width
        self.height = height
        self.n_bins = n_bins

    def setup(self):
        """Create grid of bins for this tiling."""
        self.bins = []
        cell_w = self.width / self.n_bins
        cell_h = self.height / self.n_bins

        for i in range(self.n_bins):
            for j in range(self.n_bins):
                x = self.offset_x + i * cell_w
                y = self.offset_y + j * cell_h
                self.bins.append(Bin2D(x, y, cell_w))

    def check(self, x, y):
        """Return binary vector for this tiling."""
        return [b.check(x, y) for b in self.bins]


class Tile2D:
    def __init__(self, x_range, y_range, n_tilings, n_bins):
        self.x_range = x_range
        self.y_range = y_range
        self.n_tilings = n_tilings
        self.n_bins = n_bins

    def setup(self):
        """Create multiple slightly offset tilings."""
        self.tilings = []
        x_min, x_max = self.x_range
        y_min, y_max = self.y_range
        width = x_max - x_min
        height = y_max - y_min

        for i in range(self.n_tilings):
            # small offset for each tiling (staggered grids)
            offset_x = x_min + (i / self.n_tilings) * (width / self.n_bins)
            offset_y = y_min + (i / self.n_tilings) * (height / self.n_bins)
            tiling = Tiling2D(offset_x, offset_y, width, height, self.n_bins)
            tiling.setup()
            self.tilings.append(tiling)

    def check(self, x, y):
        """Return flattened binary vector of all tilings."""
        features = []
        for tiling in self.tilings:
            features.extend(tiling.check(x, y))
        return np.array(features, dtype=np.float32)


def x_of_s_a(s, a, tile2d, n_actions=3):
    """Return full state-action feature vector x(s,a)."""
    phi = tile2d.check(*s)
    n = len(phi)
    x = np.zeros(n_actions * n)
    x[a * n:(a + 1) * n] = phi
    return x


# ---------------- Example ----------------

if __name__ == "__main__":
    # Mountain Car ranges
    pos_range = (-1.2, 0.6)
    vel_range = (-0.07, 0.07)

    tile = Tile2D(pos_range, vel_range, n_tilings=4, n_bins=4)
    tile.setup()

    # Example state and action
    s = (-0.5, 0.02)
    a = 2  # "push right"

    x = x_of_s_a(s, a, tile)

    print("Feature vector length:", len(x))
    print("Number of active features:", np.sum(x))


Feature vector length: 192
Number of active features: 11.0


In [None]:
env = gym.make("MountainCar-v0") #EPISODEC SEMI GRADIENT SARSA - MOUNTAIN CAR!
obs, _ = env.reset()

EPS = 0.05
N_ACTIONS = 3
LR = 10e-4
DF = 1

episodes = 1000
terminated, truncated = False, False
weights = np.random.random(192)
np.append(weights, 1) # bias

for _ in range(episodes):
    while not terminated and not truncated:
        if EPS > np.random.rand():
            action = np.random.randint(0,3)
        else:
            qvalues = list()
            for i in range(3):
                qvalues.append(np.dot(x_of_s_a(obs, i, tile), weights[:192]) + weights[-1])
            action = np.argmax(qvalues)
            
        obs, r, terminated, truncated, _ = env.step(action)
        if terminated or truncated:
            weights += LR*(r + DF) ##ALGORITHM NOT FINISHED!
            break
    env.reset()

[np.float64(5.45040116138144), np.float64(4.8594103748152975), np.float64(5.779977162149351)]
[np.float64(5.45040116138144), np.float64(4.8594103748152975), np.float64(5.779977162149351)]
[np.float64(5.45040116138144), np.float64(4.8594103748152975), np.float64(5.779977162149351)]
[np.float64(5.45040116138144), np.float64(4.8594103748152975), np.float64(5.779977162149351)]
[np.float64(5.45040116138144), np.float64(4.8594103748152975), np.float64(5.779977162149351)]
[np.float64(5.45040116138144), np.float64(4.8594103748152975), np.float64(5.779977162149351)]
[np.float64(5.45040116138144), np.float64(4.8594103748152975), np.float64(5.779977162149351)]
[np.float64(6.359766737487862), np.float64(5.404579463780228), np.float64(5.829852233710417)]
[np.float64(6.359766737487862), np.float64(5.404579463780228), np.float64(5.829852233710417)]
[np.float64(5.569365669542701), np.float64(5.089314915919158), np.float64(5.856601074882448)]
[np.float64(5.569365669542701), np.float64(5.089314915919158

In [102]:
obs

(array([-0.5424907,  0.       ], dtype=float32), {})