In [3]:
import gymnasium as gym
import numpy as np

I'll try to make tile coding for mountain car problem
4 tiles each with 16 tilings resulting in 64 parameters for my x(s) vector

In [8]:
import numpy as np

class Bin2D:
    def __init__(self, posx, posy, size):
        self.posx = posx
        self.posy = posy
        self.size = size  # side length of the bin

    def check(self, x, y):
        if self.posx <= x < self.posx + self.size and self.posy <= y < self.posy + self.size:
            return 1
        return 0


class Tiling2D:
    def __init__(self, offset_x, offset_y, width, height, n_bins):
        self.offset_x = offset_x
        self.offset_y = offset_y
        self.width = width
        self.height = height
        self.n_bins = n_bins

    def setup(self):
        """Create grid of bins for this tiling."""
        self.bins = []
        cell_w = self.width / self.n_bins
        cell_h = self.height / self.n_bins

        for i in range(self.n_bins):
            for j in range(self.n_bins):
                x = self.offset_x + i * cell_w
                y = self.offset_y + j * cell_h
                self.bins.append(Bin2D(x, y, cell_w))

    def check(self, x, y):
        """Return binary vector for this tiling."""
        return [b.check(x, y) for b in self.bins]


class Tile2D:
    def __init__(self, x_range, y_range, n_tilings, n_bins):
        self.x_range = x_range
        self.y_range = y_range
        self.n_tilings = n_tilings
        self.n_bins = n_bins

    def setup(self):
        """Create multiple slightly offset tilings."""
        self.tilings = []
        x_min, x_max = self.x_range
        y_min, y_max = self.y_range
        width = x_max - x_min
        height = y_max - y_min

        for i in range(self.n_tilings):
            # small offset for each tiling (staggered grids)
            offset_x = x_min + (i / self.n_tilings) * (width / self.n_bins)
            offset_y = y_min + (i / self.n_tilings) * (height / self.n_bins)
            tiling = Tiling2D(offset_x, offset_y, width, height, self.n_bins)
            tiling.setup()
            self.tilings.append(tiling)

    def check(self, x, y):
        """Return flattened binary vector of all tilings."""
        features = []
        for tiling in self.tilings:
            features.extend(tiling.check(x, y))
        return np.array(features, dtype=np.float32)


def x_of_s_a(s, a, tile2d, n_actions=3):
    """Return full state-action feature vector x(s,a)."""
    phi = tile2d.check(*s)
    n = len(phi)
    x = np.zeros(n_actions * n)
    x[a * n:(a + 1) * n] = phi
    return x

if __name__ == "__main__":
    # Mountain Car ranges
    pos_range = (-1.2, 0.6)
    vel_range = (-0.07, 0.07)

    tile = Tile2D(pos_range, vel_range, n_tilings=4, n_bins=4)
    tile.setup()

    # Example state and action
    s = (-0.5, 0.02)
    a = 2  # "push right"

    x = x_of_s_a(s, a, tile)

    print("Feature vector length:", len(x))
    print("Number of active features:", np.sum(x))


def getQValue(action, obs, weights):
    qval = np.dot(x_of_s_a(obs, action, tile), weights[:-1]) + weights[-1]
    return qval

def getGreddyAction(obs, weights):
    qvalues = np.array([getQValue(i, obs, weights) for i in range(3)])
    return np.argmax(qvalues)

weights = np.append(np.random.random(192), 1)

def mountain_sarsa(render, n):
    global weights
    if render:
        env = gym.make("MountainCar-v0", render_mode='human')
    else:    
        env = gym.make("MountainCar-v0")
    obs, _ = env.reset()

    EPS = 0.05
    N_ACTIONS = 3
    LR = 10e-4
    DF = 1

    episodes = n
    terminated, truncated = False, False

    for _ in range(episodes):
        while not terminated and not truncated:
            if EPS > np.random.rand():
                action = np.random.randint(0,3)
            else:
                action = getGreddyAction(obs, weights)

            old_obs = obs
            old_obs_features = x_of_s_a(old_obs, action, tile)
            old_obs_features = np.append(old_obs_features, 0)
            obs, r, terminated, truncated, _ = env.step(action)
            if terminated or truncated:
                weights += LR*(r - DF*getQValue(action, old_obs, weights))*old_obs_features ##ALGORITHM NOT FINISHED!
                break

            if EPS > np.random.rand():
                next_action = np.random.randint(0,3)
            else:
                next_action = getGreddyAction(obs, weights)

            update_step = LR*(r + DF*getQValue(next_action, obs, weights) - getQValue(action, old_obs, weights))
            weights += update_step*old_obs_features
        env.reset()
        terminated, truncated = False, False
    env.close()

Feature vector length: 192
Number of active features: 11.0


In [9]:
mountain_sarsa(False, 1000)

In [10]:
mountain_sarsa(True, 1)