In [None]:
# Parameters
GROUP_ID = "Group36"
ALGORITHM = "SARSA"  # ValItr | QLrng | SARSA
TRACK_NAME = "Tracks/Provided/U-track.txt"
CRASH_POS = "NRST"  # NRST | STRT

In [22]:
def main():
    """
    Main function to run the racing algorithms
    """
    
    print(f"\nRacing Algorithm Execution")
    print(f"Group ID: {GROUP_ID}")
    print(f"Algorithm: {ALGORITHM}")
    print(f"Track: {TRACK_NAME}")
    print(f"Crash Scenario: {CRASH_POS}")
    print(f"{'-'*50}")
    
    # Check which algorithm to run
    if ALGORITHM == "ValItr":
        # Run Value Iteration
        racer = run_value_iteration(GROUP_ID, TRACK_NAME, CRASH_POS)
        
    elif ALGORITHM == "QLrng":
        # Run Q-Learning
        racer = run_q_learning(GROUP_ID, TRACK_NAME, CRASH_POS)
        
    elif ALGORITHM == "SARSA":
        # Run SARSA
        racer = run_sarsa(GROUP_ID, TRACK_NAME, CRASH_POS)
        
    else:
        print(f"Error: Unknown algorithm '{ALGORITHM}'")
        print("Valid algorithms are: ValItr, QLrng, SARSA")
        racer = None
    
    return racer


if __name__ == "__main__":
    main()


Racing Algorithm Execution
Group ID: Group36
Algorithm: SARSA
Track: Tracks/Provided/2-track.txt
Crash Scenario: NRST
--------------------------------------------------

Running SARSA Algorithm
Group: Group36
Track: Tracks/Provided/2-track.txt
Crash Scenario: NRST
Track loaded: 28 x 30
Start positions: 5
Finish positions: 5
Starting SARSA training
Episode 100: 0.00
Episode 100: 0.00
Episode 200: -1.00
Episode 200: -1.00
Episode 300: 0.00
Episode 300: 0.00
Episode 400: 0.00
Episode 400: 0.00
Episode 500: 0.00
Episode 500: 0.00
Episode 600: 0.00
Episode 600: 0.00
Episode 700: 0.00
Episode 700: 0.00
Episode 800: 0.00
Episode 800: 0.00
Episode 900: 0.00
Episode 900: 0.00
Episode 1000: 0.00
Episode 1000: 0.00
Episode 1100: 0.00
Episode 1100: 0.00
Episode 1200: 0.00
Episode 1200: 0.00
Episode 1300: 0.00
Episode 1300: 0.00
Episode 1400: 0.00
Episode 1400: 0.00
Episode 1500: 0.00
Episode 1500: 0.00
Episode 1600: 0.00
Episode 1600: 0.00
Episode 1700: 0.00
Episode 1700: 0.00
Episode 1800: 0.00


In [23]:
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import itertools
import random
import math
import sys

class Racetrack:
    def __init__(self, track_file, crash_scenario="NRST"):

        with open(track_file, 'r') as f:
            # Read dimensions from first line
            dims = f.readline().strip()
            self.rows, self.cols = map(int, dims.split(','))
            
            # Read track
            self.track = []
            for _ in range(self.rows):
                line = f.readline().rstrip('\n')
                # Pad line to ensure it has exactly cols characters
                if len(line) < self.cols:
                    line = line + ' ' * (self.cols - len(line))
                self.track.append(list(line[:self.cols]))
        
        self.crash_scenario = crash_scenario
        
        # Find all start and finish positions
        self.start_positions = []
        self.finish_positions = []
        for r in range(self.rows):
            for c in range(self.cols):
                if self.track[r][c] == 'S':
                    self.start_positions.append((r, c))
                elif self.track[r][c] == 'F':
                    self.finish_positions.append((r, c))
        
        # Check if position is on track
        self.track_positions = []
        for r in range(self.rows):
            for c in range(self.cols):
                if self.track[r][c] in ['.', 'S', 'F']:
                    self.track_positions.append((r, c))
    
    def is_on_track(self, position):
        """Check if a position is on the track"""
        r, c = position
        if 0 <= r < self.rows and 0 <= c < self.cols:
            return self.track[r][c] in ['.', 'S', 'F']
        return False
    
    def is_finish(self, position):
        """Check if position is a finish line cell"""
        r, c = position
        if 0 <= r < self.rows and 0 <= c < self.cols:
            return self.track[r][c] == 'F'
        return False
    
    def get_nearest_track_position(self, position):
        """Find nearest track position using Manhattan distance"""
        r, c = position
        min_dist = float('inf')
        nearest = None
        
        for tr, tc in self.track_positions:
            dist = abs(tr - r) + abs(tc - c)
            if dist < min_dist:
                min_dist = dist
                nearest = (tr, tc)
        
        return nearest
    
    def bresenham_line(self, start, end):
        """
        Return all points on the line from start to end using Bresenham's algorithm
        """
        r1, c1 = start
        r2, c2 = end
        
        points = []
        
        # Check for special cases
        if r1 == r2:
            # Horizontal line
            step = 1 if c2 > c1 else -1
            for c in range(c1, c2 + step, step):
                points.append((r1, c))
            return points
        
        if c1 == c2:
            # Vertical line
            step = 1 if r2 > r1 else -1
            for r in range(r1, r2 + step, step):
                points.append((r, c1))
            return points
        
        # General case
        dr = abs(r2 - r1)
        dc = abs(c2 - c1)
        
        if dr > dc:
            # More vertical than horizontal
            if r1 > r2:
                r1, r2 = r2, r1
                c1, c2 = c2, c1
            dx = c2 - c1
            dy = r2 - r1
            xi = 1
            if dx < 0:
                xi = -1
                dx = -dx
            D = 2*dx - dy
            c = c1
            
            for r in range(r1, r2 + 1):
                points.append((r, c))
                if D > 0:
                    c += xi
                    D -= 2*dy
                D += 2*dx
        else:
            # More horizontal than vertical
            if c1 > c2:
                r1, r2 = r2, r1
                c1, c2 = c2, c1
            dx = c2 - c1
            dy = r2 - r1
            yi = 1
            if dy < 0:
                yi = -1
                dy = -dy
            D = 2*dy - dx
            r = r1
            
            for c in range(c1, c2 + 1):
                points.append((r, c))
                if D > 0:
                    r += yi
                    D -= 2*dx
                D += 2*dy
        
        return points
    
    def check_crash(self, old_pos, new_pos):
        """
        Check if car crashes between old_pos and new_pos
        Returns: (crashed, crash_position)
        """
        line_points = self.bresenham_line(old_pos, new_pos)
        
        for point in line_points:
            r, c = point
            if not (0 <= r < self.rows and 0 <= c < self.cols):
                return True, point
            if self.track[r][c] == '#':
                return True, point
        
        return False, None


class ValueIterationRacer:
    def __init__(self, racetrack, gamma=0.9, theta=1e-4):
        """
        Initialize Value Iteration algorithm for racetrack
        """
        self.track = racetrack
        self.gamma = gamma
        self.theta = theta
        
        # Define state space
        # State: (row, col, vel_r, vel_c) where velocities are in [-5, 5]
        self.velocities = list(range(-5, 6))
        self.actions = [(a_r, a_c) for a_r in [-1, 0, 1] for a_c in [-1, 0, 1]]
        
        # Initialize value function and policy
        self.V = defaultdict(float)  # Value function
        self.policy = defaultdict(lambda: (0, 0))  # Policy
        
        # Initialize Q-values
        self.Q = defaultdict(lambda: defaultdict(float))
    
    def get_next_state(self, state, action):
        """
        Get possible next states and probabilities for a given state and action
        Returns: list of (next_state, probability, cost) tuples
        """
        r, c, v_r, v_c = state
        a_r, a_c = action
        
        # Try to apply acceleration (80% success rate)
        # With 20% probability, acceleration fails
        outcomes = []
        
        # Outcome 1: Acceleration succeeds (80% probability)
        new_v_r = max(-5, min(5, v_r + a_r))
        new_v_c = max(-5, min(5, v_c + a_c))
        new_r = r + new_v_r
        new_c = c + new_v_c
        
        # Check for crash
        crashed, crash_pos = self.track.check_crash((r, c), (new_r, new_c))
        
        if crashed:
            if self.track.crash_scenario == "NRST":
                # Move to nearest track position, velocity set to 0
                nearest = self.track.get_nearest_track_position(crash_pos)
                next_state = (nearest[0], nearest[1], 0, 0)
            else:  # "STRT"
                # Move to random start position, velocity set to 0
                start_pos = random.choice(self.track.start_positions)
                next_state = (start_pos[0], start_pos[1], 0, 0)
        else:
            next_state = (new_r, new_c, new_v_r, new_v_c)
        
        # Check if next state is finish
        if self.track.is_finish((next_state[0], next_state[1])):
            cost = 0  # Finish state has 0 cost
        else:
            cost = 1  # Regular move cost
        
        outcomes.append((next_state, 0.8, cost))
        
        # Outcome 2: Acceleration fails (20% probability)
        new_v_r = v_r
        new_v_c = v_c
        new_r = r + new_v_r
        new_c = c + new_v_c
        
        # Check for crash
        crashed, crash_pos = self.track.check_crash((r, c), (new_r, new_c))
        
        if crashed:
            if self.track.crash_scenario == "NRST":
                nearest = self.track.get_nearest_track_position(crash_pos)
                next_state = (nearest[0], nearest[1], 0, 0)
            else:  # "STRT"
                start_pos = random.choice(self.track.start_positions)
                next_state = (start_pos[0], start_pos[1], 0, 0)
        else:
            next_state = (new_r, new_c, new_v_r, new_v_c)
        
        # Check if next state is finish
        if self.track.is_finish((next_state[0], next_state[1])):
            cost = 0
        else:
            cost = 1
        
        outcomes.append((next_state, 0.2, cost))
        
        return outcomes
    
    def train(self):
        """
        Perform Value Iteration algorithm
        """
        print("Starting Value Iteration...")
        
        iteration = 0
        max_delta = float('inf')
        
        # Initialize all states
        states = []
        for r in range(self.track.rows):
            for c in range(self.track.cols):
                if self.track.is_on_track((r, c)):
                    for v_r in self.velocities:
                        for v_c in self.velocities:
                            states.append((r, c, v_r, v_c))
        
        while max_delta > self.theta:
            max_delta = 0
            delta = 0
            
            for state in states:
                r, c, v_r, v_c = state
                
                # Skip if this is a finish state
                if self.track.is_finish((r, c)):
                    continue
                
                # Store old value
                old_value = self.V[state]
                
                # Find best action value
                best_value = float('inf')
                best_action = None
                
                for action in self.actions:
                    action_value = 0
                    outcomes = self.get_next_state(state, action)
                    
                    for next_state, prob, cost in outcomes:
                        # Bellman equation
                        action_value += prob * (cost + self.gamma * self.V[next_state])
                    
                    # Update Q-value
                    self.Q[state][action] = action_value
                    
                    if action_value < best_value:
                        best_value = action_value
                        best_action = action
                
                # Update value function
                self.V[state] = best_value
                self.policy[state] = best_action
                
                # Update delta
                delta = abs(old_value - self.V[state])
                if delta > max_delta:
                    max_delta = delta
            
            iteration += 1
            if iteration % 10 == 0:
                print(f"Iteration {iteration}, Max Delta: {max_delta:.6f}")
        
        print(f"Value Iteration converged after {iteration} iterations")
        
        return self.policy
    
    def extract_policy(self):
        """Extract optimal policy from value function"""
        for state in self.V.keys():
            best_action = None
            best_value = float('inf')
            
            for action in self.actions:
                if self.Q[state][action] < best_value:
                    best_value = self.Q[state][action]
                    best_action = action
            
            self.policy[state] = best_action
        
        return self.policy
    
    def run_simulation(self, policy=None, max_steps=1000):
        """
        Run a simulation using the learned policy
        Returns: (path, steps, success)
        """
        if policy is None:
            policy = self.policy
        
        # Start from a random start position with 0 velocity
        start_pos = random.choice(self.track.start_positions)
        state = (start_pos[0], start_pos[1], 0, 0)
        
        path = [state[:2]]  # Store only positions
        steps = 0
        success = False
        
        while steps < max_steps:
            r, c, v_r, v_c = state
            
            # Check if we reached finish
            if self.track.is_finish((r, c)):
                success = True
                break
            
            # Get action from policy
            if state in policy:
                action = policy[state]
            else:
                # If state not in policy, choose random action
                action = random.choice(self.actions)
            
            # Simulate with stochastic outcomes
            if random.random() < 0.8:
                # Acceleration succeeds
                a_r, a_c = action
                new_v_r = max(-5, min(5, v_r + a_r))
                new_v_c = max(-5, min(5, v_c + a_c))
            else:
                # Acceleration fails
                new_v_r = v_r
                new_v_c = v_c
            
            new_r = r + new_v_r
            new_c = c + new_v_c
            
            # Check for crash
            crashed, crash_pos = self.track.check_crash((r, c), (new_r, new_c))
            
            if crashed:
                if self.track.crash_scenario == "NRST":
                    nearest = self.track.get_nearest_track_position(crash_pos)
                    state = (nearest[0], nearest[1], 0, 0)
                else:  # "STRT"
                    start_pos = random.choice(self.track.start_positions)
                    state = (start_pos[0], start_pos[1], 0, 0)
            else:
                state = (new_r, new_c, new_v_r, new_v_c)
            
            path.append(state[:2])
            steps += 1
        
        return path, steps, success
    
    def plot_path(self, path, group_id, algorithm, track_name, crash_pos):
        """
        Plot the path taken by the agent
        """
        fig, ax = plt.subplots(figsize=(12, 10))
        
        # Plot track
        track_grid = np.zeros((self.track.rows, self.track.cols))
        for r in range(self.track.rows):
            for c in range(self.track.cols):
                if self.track.track[r][c] == '#':
                    track_grid[r, c] = 1  # Wall
                elif self.track.track[r][c] == 'S':
                    track_grid[r, c] = 2  # Start
                elif self.track.track[r][c] == 'F':
                    track_grid[r, c] = 3  # Finish
                else:
                    track_grid[r, c] = 0  # Track
        
        # Create custom colormap
        from matplotlib.colors import ListedColormap
        cmap = ListedColormap(['white', 'black', 'green', 'red'])
        
        ax.imshow(track_grid, cmap=cmap, origin='upper')
        
        # Plot path
        if path:
            path_y, path_x = zip(*path)
            ax.plot(path_x, path_y, 'b-', linewidth=2, alpha=0.7)
            ax.plot(path_x, path_y, 'bo', markersize=4, alpha=0.5)
            
            # Mark start and end
            ax.plot(path_x[0], path_y[0], 'go', markersize=10, label='Start')
            if len(path) > 1:
                ax.plot(path_x[-1], path_y[-1], 'ro', markersize=10, label='End')
        
        ax.set_title(f'{algorithm} on {track_name} ({crash_pos} crash scenario)')
        ax.set_xlabel('Column')
        ax.set_ylabel('Row')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        # Save figure
        filename = f"{group_id}_{algorithm}_{track_name.replace('.txt', '')}_{crash_pos}.png"
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"Saved plot to {filename}")


# Base class for all algorithms
class BaseRacingAlgorithm:
    def __init__(self, racetrack, **kwargs):
        self.track = racetrack
        self.params = kwargs
        
    def train(self):
        """Train the algorithm - to be implemented by subclasses"""
        raise NotImplementedError
        
    def run_simulation(self, max_steps=1000):
        """Run a simulation - to be implemented by subclasses"""
        raise NotImplementedError
        
    def plot_path(self, path, group_id, algorithm, track_name, crash_pos):
        """Plot the path - common for all algorithms"""
        fig, ax = plt.subplots(figsize=(12, 10))
        
        # Plot track
        track_grid = np.zeros((self.track.rows, self.track.cols))
        for r in range(self.track.rows):
            for c in range(self.track.cols):
                if self.track.track[r][c] == '#':
                    track_grid[r, c] = 1  # Wall
                elif self.track.track[r][c] == 'S':
                    track_grid[r, c] = 2  # Start
                elif self.track.track[r][c] == 'F':
                    track_grid[r, c] = 3  # Finish
                else:
                    track_grid[r, c] = 0  # Track
        
        # Create custom colormap
        from matplotlib.colors import ListedColormap
        cmap = ListedColormap(['white', 'black', 'green', 'red'])
        
        ax.imshow(track_grid, cmap=cmap, origin='upper')
        
        # Plot path
        if path:
            path_y, path_x = zip(*path)
            ax.plot(path_x, path_y, 'b-', linewidth=2, alpha=0.7)
            ax.plot(path_x, path_y, 'bo', markersize=4, alpha=0.5)
            
            # Mark start and end
            ax.plot(path_x[0], path_y[0], 'go', markersize=10, label='Start')
            if len(path) > 1:
                ax.plot(path_x[-1], path_y[-1], 'ro', markersize=10, label='End')
        
        ax.set_title(f'{algorithm} on {track_name} ({crash_pos} crash scenario)')
        ax.set_xlabel('Column')
        ax.set_ylabel('Row')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        # Save figure
        filename = f"{group_id}_{algorithm}_{track_name.replace('.txt', '')}_{crash_pos}.png"
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"Saved plot to {filename}")


def run_value_iteration(group_id, track_name, crash_pos):
    """Run Value Iteration algorithm"""
    print(f"\n{'='*60}")
    print(f"Running Value Iteration Algorithm")
    print(f"Group: {group_id}")
    print(f"Track: {track_name}")
    print(f"Crash Scenario: {crash_pos}")
    print(f"{'='*60}")
    
    # Load racetrack
    try:
        racetrack = Racetrack(track_name, crash_scenario=crash_pos)
        print(f"Track loaded: {racetrack.rows} x {racetrack.cols}")
        print(f"Start positions: {len(racetrack.start_positions)}")
        print(f"Finish positions: {len(racetrack.finish_positions)}")
    except FileNotFoundError:
        print(f"Error: Track file '{track_name}' not found.")
        return None
    
    # Adjust parameters based on crash scenario
    if crash_pos == "STRT":
        gamma = 0.8
        theta = 1e-3
    else:
        gamma = 0.9
        theta = 1e-3
    
    # Create and train Value Iteration racer
    vi_racer = ValueIterationRacer(racetrack, gamma=gamma, theta=theta)
    policy = vi_racer.train()
    
    # Run multiple simulations
    num_simulations = 10
    successful_runs = 0
    total_steps = 0
    best_path = None
    best_steps = float('inf')
    
    print(f"\nRunning {num_simulations} simulations...")
    for i in range(num_simulations):
        path, steps, success = vi_racer.run_simulation(policy, max_steps=1000)
        
        if success:
            successful_runs += 1
            total_steps += steps
            
            if steps < best_steps:
                best_steps = steps
                best_path = path
            
            print(f"  Simulation {i+1}: Success in {steps} steps")
        else:
            print(f"  Simulation {i+1}: Failed to reach finish")
    
    # Print statistics
    print(f"\nSimulation Results:")
    print(f"  Successful runs: {successful_runs}/{num_simulations}")
    if successful_runs > 0:
        avg_steps = total_steps / successful_runs
        print(f"  Average steps: {avg_steps:.1f}")
        print(f"  Best run: {best_steps} steps")
        
        # Plot best path
        track_base = track_name.split('/')[-1].replace('.txt', '')
        vi_racer.plot_path(best_path, group_id, "ValItr", track_base, crash_pos)
    else:
        print("  No successful runs to plot.")
    
    return vi_racer




In [24]:
import os

class QLearningRacer(BaseRacingAlgorithm):
    def __init__(
        self,
        racetrack,
        learning_rate=0.1,
        discount_factor=0.95,
        exploration_rate=0.1,
        number_episodes=4000,       #may run slowly but is more reliable on hard tracks
        max_steps=1000,
    ):
        super().__init__(racetrack)     # store racetrack object
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.number_episodes = number_episodes
        self.max_steps = max_steps

        self.actions = []       # all possible accelerations
        for acc_row in [-1, 0, 1]:
            for acc_col in [-1, 0, 1]:
                self.actions.append((acc_row, acc_col))
        self.q_table = {}

    def get_q_val(self, state, action):     # return Q(s,a) default to 0.0
        return self.q_table.get((state, action), 0.0)

    def set_q_val(self, state, action, val):
        self.q_table[(state, action)] = val

    def choose_greedy_action(self, state):      # choose action with highest Q-value
        best_val = float("-inf")
        best_actions = []

        for action in self.actions:
            q_val = self.get_q_val(state, action)

            if q_val > best_val:
                best_val = q_val
                best_actions = [action]      # create best action list
            elif q_val == best_val:
                best_actions.append(action)   # tie: add to list

        if not best_actions: # fallback to random action
            return random.choice(self.actions)

        return random.choice(best_actions)      # randomly choose between equally good actions


    def choose_action(self, state, use_exploration=True):
        if use_exploration and random.random() < self.exploration_rate:
            return random.choice(self.actions)      # explore random action
        else:
            return self.choose_greedy_action(state) # choose best action

    def step_env(self, state, action):      # apply one step at a time
        pos_row, pos_col, vel_row, vel_col = state
        acc_row, acc_col = action

        if random.random() < 0.8:   #80% chance of applying acceleration
            new_vel_row = min(5, max(-5, vel_row + acc_row))
            new_vel_col = min(5, max(-5, vel_col + acc_col))
        else:
            new_vel_row = vel_row
            new_vel_col = vel_col

        new_pos_row = pos_row + new_vel_row
        new_pos_col = pos_col + new_vel_col

        #   crash detection
        crashed, crash_pos = self.track.check_crash((pos_row, pos_col), (new_pos_row, new_pos_col))

        if crashed:
            if self.track.crash_scenario == "NRST":
                near_row, near_col = self.track.get_nearest_track_position(crash_pos)
                next_state = (near_row, near_col, 0, 0)

            else:       #   "STRT
                start_row, start_col = random.choice(self.track.start_positions)
                next_state = (start_row, start_col, 0, 0)
        else:
            next_state = (new_pos_row, new_pos_col, new_vel_row, new_vel_col)

        # check if state is on the finish line
        finished = self.track.is_finish((next_state[0], next_state[1]))
        reward = 0.0 if finished else -1.0

        if crashed and not finished:    # extra penalty for crashing
            reward -= 10.0

        done = finished

        return next_state, reward, done

    def train(self):            #Q-learning update
        print("Starting Q-learning training")
        for episode in range(self.number_episodes):
            start_row, start_col = random.choice(self.track.start_positions)
            state = (start_row, start_col, 0, 0)

            for step in range(self.max_steps):      # iterate through environment
                action = self.choose_action(state)
                next_state, reward, done = self.step_env(state, action)

                q_val = self.get_q_val(state, action)

                if done:
                    target = reward
                else:
                    next_values = [self.get_q_val(next_state, a) for a in self.actions]
                    best_next = max(next_values)
                    target = reward + self.discount_factor * best_next

                # update Q value
                new_q_val = q_val + self.learning_rate * (target - q_val)
                self.set_q_val(state, action, new_q_val)

                state = next_state

                if done:
                    break

            #progess checking of rewards
            if (episode+1) % 100 == 0:
                print(f"Episode {episode+1}: {reward:.2f}")

        print("Q-learning training complete")

    def run_simulation(self, max_steps=1000):
        start_row, start_col = random.choice(self.track.start_positions)
        state = (start_row, start_col, 0, 0)
        path = [state[:2]]
        step_count = 0
        success = False

        while step_count < max_steps:
            pos_row, pos_col, vel_row, vel_col = state

            if self.track.is_finish((pos_row, pos_col)):
                success = True
                break

            action = self.choose_greedy_action(state)
            next_state, reward, done = self.step_env(state, action)
            path.append(next_state[:2])
            state = next_state
            step_count += 1

            if done:
                success = True
                break
        return path, step_count, success

def run_q_learning(group_id, track_name, crash_pos):
    """Run Q-Learning algorithm"""
    print(f"\n{'='*60}")
    print("Running Q-Learning Algorithm")
    print(f"Group: {group_id}")
    print(f"Track: {track_name}")
    print(f"Crash Scenario: {crash_pos}")
    print(f"{'='*60}")

    # Load racetrack
    try:
        racetrack = Racetrack(track_name, crash_scenario=crash_pos)
        print(f"Track loaded: {racetrack.rows} x {racetrack.cols}")
        print(f"Start positions: {len(racetrack.start_positions)}")
        print(f"Finish positions: {len(racetrack.finish_positions)}")
    except FileNotFoundError:
        print(f"Error: Track file '{track_name}' not found.")
        return None

    # Create and train Q-Learning racer
    q_racer = QLearningRacer(racetrack)
    q_racer.train()

    # Evaluate learned policy
    num_simulations = 10
    successful_runs = 0
    total_steps = 0
    best_path = None
    best_steps = float('inf')

    print(f"\nRunning {num_simulations} simulations with learned policy...")
    for i in range(num_simulations):
        path, steps, success = q_racer.run_simulation(max_steps=1500)

        if success:
            successful_runs += 1
            total_steps += steps
            if steps < best_steps:
                best_steps = steps
                best_path = path
            print(f"  Simulation {i+1}: Success in {steps} steps")
        else:
            print(f"  Simulation {i+1}: Failed to reach finish")

    print("\nSimulation Results (Q-Learning):")
    print(f"  Successful runs: {successful_runs}/{num_simulations}")
    if successful_runs > 0:
        avg_steps = total_steps / successful_runs
        print(f"  Average steps: {avg_steps:.1f}")
        print(f"  Best run: {best_steps} steps")

        # Plot best path
        track_base = os.path.splitext(os.path.basename(track_name))[0]
        q_racer.plot_path(best_path, group_id, "QLrng", track_base, crash_pos)
    else:
        print("  No successful runs to plot.")

    return q_racer


In [None]:
#SARSA

class SARSARacer(BaseRacingAlgorithm):
    def __init__(
        self,
        racetrack,
        learning_rate=0.1,
        discount_factor=0.95,
        exploration_rate=0.1,
        number_episodes=4000,
        max_steps=1000,
    ):
        super().__init__(racetrack)
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.number_episodes = number_episodes
        self.max_steps = max_steps

        self.actions = []
        for acc_row in [-1, 0, 1]:
            for acc_col in [-1, 0, 1]:
                self.actions.append((acc_row, acc_col))
        self.q_table = {}

    def get_q_val(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def set_q_val(self, state, action, val):
        self.q_table[(state, action)] = val

    def choose_greedy_action(self, state):
        best_val = float("-inf")
        best_actions = []

        for action in self.actions:
            q_val = self.get_q_val(state, action)

            if q_val > best_val:
                best_val = q_val
                best_actions = [action]
            elif q_val == best_val:
                best_actions.append(action)

        if not best_actions:
            return random.choice(self.actions)

        return random.choice(best_actions)

    def choose_action(self, state, use_exploration=True):
        if use_exploration and random.random() < self.exploration_rate:
            return random.choice(self.actions)
        else:
            return self.choose_greedy_action(state)

    def step_env(self, state, action):
        pos_row, pos_col, vel_row, vel_col = state
        acc_row, acc_col = action

        if random.random() < 0.8:
            new_vel_row = min(5, max(-5, vel_row + acc_row))
            new_vel_col = min(5, max(-5, vel_col + acc_col))
        else:
            new_vel_row = vel_row
            new_vel_col = vel_col

        new_pos_row = pos_row + new_vel_row
        new_pos_col = pos_col + new_vel_col

        crashed, crash_pos = self.track.check_crash((pos_row, pos_col), (new_pos_row, new_pos_col))

        if crashed:
            if self.track.crash_scenario == "NRST":
                near_row, near_col = self.track.get_nearest_track_position(crash_pos)
                next_state = (near_row, near_col, 0, 0)
            else:
                start_row, start_col = random.choice(self.track.start_positions)
                next_state = (start_row, start_col, 0, 0)
        else:
            next_state = (new_pos_row, new_pos_col, new_vel_row, new_vel_col)

        finished = self.track.is_finish((next_state[0], next_state[1]))
        reward = 0.0 if finished else -1.0

        if crashed and not finished:
            reward -= 10.0

        done = finished

        return next_state, reward, done

    def train(self):
        print("Starting SARSA training")
        for episode in range(self.number_episodes):
            start_row, start_col = random.choice(self.track.start_positions)
            state = (start_row, start_col, 0, 0)
            
            action = self.choose_action(state)

            for step in range(self.max_steps):
                next_state, reward, done = self.step_env(state, action)
                
                next_action = self.choose_action(next_state)

                q_val = self.get_q_val(state, action)

                if done:
                    target = reward
                else:
                    next_q = self.get_q_val(next_state, next_action)
                    target = reward + self.discount_factor * next_q

                new_q_val = q_val + self.learning_rate * (target - q_val)
                self.set_q_val(state, action, new_q_val)

                state = next_state
                action = next_action

                if done:
                    break

            if (episode + 1) % 100 == 0:
                print(f"Episode {episode + 1}: {reward:.2f}")

        print("SARSA training complete")

    def run_simulation(self, max_steps=1000):
        start_row, start_col = random.choice(self.track.start_positions)
        state = (start_row, start_col, 0, 0)
        path = [state[:2]]
        step_count = 0
        success = False

        while step_count < max_steps:
            pos_row, pos_col, vel_row, vel_col = state

            if self.track.is_finish((pos_row, pos_col)):
                success = True
                break

            action = self.choose_greedy_action(state)
            next_state, reward, done = self.step_env(state, action)
            path.append(next_state[:2])
            state = next_state
            step_count += 1

            if done:
                success = True
                break
        
        return path, step_count, success


def run_sarsa(group_id, track_name, crash_pos):
    """Run SARSA algorithm"""
    print(f"\n{'='*60}")
    print("Running SARSA Algorithm")
    print(f"Group: {group_id}")
    print(f"Track: {track_name}")
    print(f"Crash Scenario: {crash_pos}")
    print(f"{'='*60}")

    # Load racetrack
    try:
        racetrack = Racetrack(track_name, crash_scenario=crash_pos)
        print(f"Track loaded: {racetrack.rows} x {racetrack.cols}")
        print(f"Start positions: {len(racetrack.start_positions)}")
        print(f"Finish positions: {len(racetrack.finish_positions)}")
    except FileNotFoundError:
        print(f"Error: Track file '{track_name}' not found.")
        return None

    sarsa_racer = SARSARacer(racetrack)
    sarsa_racer.train()

    num_simulations = 10
    successful_runs = 0
    total_steps = 0
    best_path = None
    best_steps = float('inf')

    print(f"\nRunning {num_simulations} simulations with learned policy...")
    for i in range(num_simulations):
        path, steps, success = sarsa_racer.run_simulation(max_steps=1500)

        if success:
            successful_runs += 1
            total_steps += steps
            if steps < best_steps:
                best_steps = steps
                best_path = path
            print(f"  Simulation {i+1}: Success in {steps} steps")
        else:
            print(f"  Simulation {i+1}: Failed to reach finish")

    print("\nSimulation Results (SARSA):")
    print(f"  Successful runs: {successful_runs}/{num_simulations}")
    if successful_runs > 0:
        avg_steps = total_steps / successful_runs
        print(f"  Average steps: {avg_steps:.1f}")
        print(f"  Best run: {best_steps} steps")

        track_base = os.path.splitext(os.path.basename(track_name))[0]
        sarsa_racer.plot_path(best_path, group_id, "SARSA", track_base, crash_pos)
    else:
        print("  No successful runs to plot.")

    return sarsa_racer