In [None]:
# Try 1 on Simple Parking Problem with 3 Parking lots and just choosing which one to go to
import numpy as np
import random


# Define parameters
parking_lots = ["Roth Way Garage", "Museum Way Parking", "Hoover Parking Garage", "Stock Farm Garage", "Roble Field"]
probabilities = [0.05, 0.1, 0.55, 0.25, 0.05]  # Probabilities
costs = [12, 13, 25, 17, 7]
rewards = 75  # Reward for successfully parking
episodes = 10000
alpha = 0.1  # Learning rate
gamma = 0.15  # Discount factor
epsilon = 0.1  # Exploration rate

# Initialize Q-table
states = [tuple()]  # Start with empty state (no lots visited)
actions = parking_lots
Q = {state: {action: 0 for action in actions if action not in state} for state in states}

# Helper functions
def is_parking_available(lot, probabilities):
    """Simulate parking availability based on probability."""
    return np.random.rand() < probabilities[parking_lots.index(lot)]

def get_next_state(state, action):
    """Generate the next state by adding the current action to the visited list."""
    return tuple(sorted(list(state) + [action]))

def expand_state_space(state, action):
    """Dynamically expand the state space by adding new states."""
    next_state = get_next_state(state, action)
    if next_state not in Q:
        Q[next_state] = {a: 0 for a in actions if a not in next_state}
    return next_state

# Q-learning with fix for terminal states
for episode in range(episodes):
    state = tuple()  # Start from no lots visited
    done = False
    while not done:
        # Choose action (epsilon-greedy)
        valid_actions = [a for a in actions if a not in state]
        if not valid_actions:
            break  # Terminal state, no actions left
        if random.uniform(0, 1) < epsilon:
            action = random.choice(valid_actions)
        else:
            action = max(Q[state], key=Q[state].get)

        # Check if parking is available
        if is_parking_available(action, probabilities):
            reward = rewards - costs[parking_lots.index(action)]
            next_state = get_next_state(state, action)
            Q[state][action] += alpha * (reward - Q[state][action])
            done = True
        else:
            reward = -costs[parking_lots.index(action)]
            next_state = expand_state_space(state, action)
            Q[state][action] += alpha * (
                reward + gamma * max(Q[next_state].values(), default=0) - Q[state][action]
            )
            state = next_state

# Derive policy from Q-table
policy = {}
for state in Q:
    if Q[state]:
        policy[state] = max(Q[state], key=Q[state].get)

# Display the learned policy
print("Learned Policy:")
for state, action in policy.items():
    print(f"State {state} -> Action {action}")


Learned Policy:
State () -> Action Hoover Parking Garage
State ('Roth Way Garage',) -> Action Stock Farm Garage
State ('Museum Way Parking', 'Roth Way Garage') -> Action Hoover Parking Garage
State ('Museum Way Parking',) -> Action Hoover Parking Garage
State ('Hoover Parking Garage', 'Museum Way Parking', 'Roth Way Garage') -> Action Stock Farm Garage
State ('Hoover Parking Garage', 'Museum Way Parking', 'Roth Way Garage', 'Stock Farm Garage') -> Action Roble Field
State ('Hoover Parking Garage',) -> Action Roble Field
State ('Hoover Parking Garage', 'Roth Way Garage') -> Action Roble Field
State ('Hoover Parking Garage', 'Museum Way Parking', 'Roble Field', 'Roth Way Garage') -> Action Stock Farm Garage
State ('Stock Farm Garage',) -> Action Hoover Parking Garage
State ('Museum Way Parking', 'Stock Farm Garage') -> Action Roble Field
State ('Museum Way Parking', 'Roth Way Garage', 'Stock Farm Garage') -> Action Roble Field
State ('Roble Field',) -> Action Museum Way Parking
State ('R

In [None]:
!pip install osrm geopy polyline==1.4.0 matplotlib

Collecting osrm
  Downloading osrm-0.11.3-py3-none-any.whl.metadata (414 bytes)
Collecting polyline==1.4.0
  Downloading polyline-1.4.0-py2.py3-none-any.whl.metadata (2.6 kB)
Downloading polyline-1.4.0-py2.py3-none-any.whl (4.4 kB)
Downloading osrm-0.11.3-py3-none-any.whl (10 kB)
Installing collected packages: polyline, osrm
Successfully installed osrm-0.11.3 polyline-1.4.0


In [1]:
import numpy as np
import random
import requests
import geopy
import pprint as pp
from typing import List, Tuple
from scipy.stats import beta

In [2]:
API_URL = "http://router.project-osrm.org/route/v1/{profile}/{lon1},{lat1};{lon2},{lat2}"
GEOLOCATOR = geopy.geocoders.Nominatim(user_agent="My Geocoder")

class Location:
    def __init__(self, name: str, lat: float = None, lon: float = None):
        self.name = name
        self.lat = lat
        self.lon = lon

    def __repr__(self):
        return str(self.__dict__)

    @classmethod
    def from_name(cls, name: str):
        coords = GEOLOCATOR.geocode(name, exactly_one=True)
        if coords:
            return Location(name=name, lat=coords.latitude, lon=coords.longitude)
        else:
            return None

    @staticmethod
    def get_distance(profile: str, lat1: float, lon1: float, lat2: float, lon2: float):
        """
        Get the distance between two points using the OSRM API.
        - profile: Either "walking" or "driving"
        - lat1, lon1: Coordinates of the first location
        - lat2, lon2: Coordinates of the second location
        Returns the distance in meters.
        """
        url = API_URL.format(profile=profile, lat1=lat1, lon1=lon1, lat2=lat2, lon2=lon2)
        params = {"overview": "false"}
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            distance = data["routes"][0]["distance"]  # distance in meters
            return distance
        else:
            raise Exception(f"Error: {response.status_code} - {response.text}")

    @staticmethod
    def get_duration_by_distance(distance: float, speed: float):
        """
        Calculate duration by dividing distance by speed.
        - distance: in meters
        - speed: in meters per second
        Returns the duration in minutes.
        """
        duration_seconds = distance / speed
        return duration_seconds / 60.0  # convert to minutes

    def get_walking_duration_to(self, other, speed_mph: float = 3):
        # Get the distance in meters for walking
        distance = self.get_distance("walking", self.lat, self.lon, other.lat, other.lon)
        # Convert MPH to M/S
        speed_mps = speed_mph * 0.44704
        return self.get_duration_by_distance(distance, speed_mps)

    def get_driving_duration_to(self, other, speed_mph: float = 15):
        # Get the distance in meters for driving
        distance = self.get_distance("driving", self.lat, self.lon, other.lat, other.lon)
        # Convert MPH to M/S
        speed_mps = speed_mph * 0.44704
        return self.get_duration_by_distance(distance, speed_mps)


In [3]:
class ParkingStructure:
    def __init__(self, index: int, location: Location, is_full: bool = False):
        self.index = index
        self.location = location  # Location object representing the parking structure

    def __repr__(self):
        return str(self.__dict__)

In [11]:
class QLearningParking:
    def __init__(self,
                 parking_structures: List[ParkingStructure],
                 start: Location,
                 end: Location,
                 prior: List[List[int]] = None,
                 alpha: float = 0.1,
                 gamma: float = 0.9,
                 epsilon: float= 0.1,
                 precomputed_duration: List[List[float]] = None):
        self.parking_structures = parking_structures
        self.start = start
        self.end = end
        self.alpha = alpha # Learning rate
        self.gamma = gamma # Discount factor
        self.epsilon = epsilon # Exploration rate

        # Initialize prior belief about the probability of finding parking at each structure using the Beta distribution.
        # prior_alpha represents success counts, prior_beta represents fail counts
        if prior:
            self.prior = prior
        else:
            self.prior = [[1.0, 1.0]] * len(parking_structures) # Uniform prior

        if precomputed_duration:
            self.duration_matrix = precomputed_duration
        else:
            self.duration_matrix = self.precompute_duration()

        self.start_duration = [self.start.get_driving_duration_to(parking.location) for parking in self.parking_structures]
        self.end_duration = [parking.location.get_walking_duration_to(self.end) for parking in self.parking_structures]

        self.table = dict()


    @classmethod
    def success_prob_to_prior(cls, prob: float):
        return [int(round(prob * 100.0)), int(round((1 - prob) * 100.0))]

    def precompute_duration(self):
        """
        Precompute the duration of time (driving) between all pairs of parking structures
        and store them in a matrix.
        """
        num_structures = len(self.parking_structures)
        duration_matrix = np.zeros((num_structures, num_structures))
        print("Precomputing duration of time between all pairs of parking structures.")
        for i in range(num_structures):
            for j in range(i + 1, num_structures):
                # Calculate driving durations between parking structures i and j

                duration = self.parking_structures[i].location.get_driving_duration_to(self.parking_structures[j].location)
                print(self.parking_structures[i].location.name, "->", self.parking_structures[j].location.name, ":", duration, "mins")
                duration_matrix[i][j] = duration
                duration_matrix[j][i] = duration # Mirror the values for j->i
        return duration_matrix

    def choose_action(self, state: Tuple[int], valid_actions: List[int]):
        """
        Using Epsilon-greedy, choose an action (parking structure).
        - valid_actions: List of indices of parking structures that are not full.
        - state: Current state as a list.
        """

        # Ensure the current state is in the Q-table
        if state not in self.table:
            self.table[state] = np.zeros(len(self.parking_structures))

        if random.uniform(0, 1) < self.epsilon:
            # Choose a random valid action
            action = random.choice(valid_actions)
        else:
            # Choose the action with the highest Q-value among valid actions
            mask = np.ones_like(self.table[state], dtype=bool)
            for visited in state:
                mask[visited] = False

            # mask off indices of parking structures we already visited
            masked_q_values = np.where(mask, self.table[state], -np.inf)
            action = np.argmax(masked_q_values)

        return action

    def get_reward(self, current: ParkingStructure, next: ParkingStructure, found_parking: bool):
        """
        Determine reward
        """
        reward = 0.0
        if current == self.start:
            # driving duration from start to first parking tried
            reward -= self.start_duration[next.index]
        else:
          if found_parking:
              # success, found parking
              self.prior[current.index][0] += 1
              # walking duration to final destination from successful parking
              reward -= self.end_duration[current.index]
          else:
              # failure, did not find parking
              self.prior[current.index][1] += 1

              # have to spend time driving to the next parking structure
              reward -= self.duration_matrix[current.index][next.index]

        return reward

    def update(self, state: List[int], action: int, reward: float, next_state: List[int]):
        """
        Update Q-value table
        - state: Current state as a list (binary vector indicating visited parking structures)
        - action: The index of the action (parking structure) taken
        - reward: The reward received
        - next_state: Next state as a list
        """

        # Ensure the current and next states are in the Q-table
        if state not in self.table:
            self.table[state] = np.zeros(len(self.parking_structures))

        if next_state not in self.table:
            self.table[next_state] = np.zeros(len(self.parking_structures))

        # Get the current Q-value
        current_q = self.table[state][action]

        # Calculate the maximum Q-value for the next state
        max_future_q = max(self.table[next_state])

        # Update Q-value using the Bellman equation
        self.table[state][action] = current_q + self.alpha * (reward + self.gamma * max_future_q - current_q)

    def train(self, episodes: int = 1000):
        """
        Train the model
        """
        for episode in range(episodes):
            state = tuple()
            current = self.start
            while True:
                # Determine valid actions based on state
                valid_actions = list(set(range(len(self.parking_structures))) - set(state))
                if not valid_actions:
                    # No valid actions left, didn't find parking, apply large negative reward.
                    reward = -9999
                    self.update(state, action, reward, state)
                    break

                action = self.choose_action(state, valid_actions)

                next_state = state + tuple((action,))
                next = self.parking_structures[action]

                success_prob = beta.mean(*self.prior[action])
                found_parking = random.random() < success_prob

                reward = self.get_reward(current, next, found_parking)
                self.update(state, action, reward, next_state)

                state = next_state
                current = next

In [21]:
# Prior for likelihood of finding parking at each parking structure
parking_probability_map = {
    "Roble Field Garage" : 0.30,
    "Via Ortega Garage": 0.05,
#    "Stock Farm Garage": 0.10,
#    "Tressider Student Union": 0.10,
    "Wilbur Field Garage": 0.01,
}


parking_structures = []
prior = []
for i, (name, prob) in enumerate(parking_probability_map.items()):
    prior.append(QLearningParking.success_prob_to_prior(prob))
    location = Location.from_name(name)
    parking_structures.append(ParkingStructure(i, location))

start_point_name = "Galvez Street & El Camino Real"
start_point = Location.from_name(start_point_name)
end_point_name = "NVIDIA Auditorium"
end_point = Location.from_name(end_point_name)
print(start_point)
print(end_point)
print(prior)

{'name': 'Galvez Street & El Camino Real', 'lat': 37.4369847, 'lon': -122.1611878}
{'name': 'NVIDIA Auditorium', 'lat': 37.4281301, 'lon': -122.1742004}
[[30, 70], [5, 95], [1, 99]]


In [29]:
q_learn = QLearningParking(parking_structures=parking_structures,
                           start=start_point,
                           end=end_point,
                           prior=prior,
                           alpha=0.1,
                           gamma=0.15,
                           epsilon=0.1)

Precomputing duration of time between all pairs of parking structures.
Roble Field Garage -> Via Ortega Garage : 0.5589855245367057 mins
Roble Field Garage -> Wilbur Field Garage : 4.15374214586813 mins
Via Ortega Garage -> Wilbur Field Garage : 4.712976218881732 mins


In [30]:
pp.pprint(q_learn.duration_matrix)

array([[0.        , 0.55898552, 4.15374215],
       [0.55898552, 0.        , 4.71297622],
       [4.15374215, 4.71297622, 0.        ]])


In [31]:
q_learn.train(episodes=1000)

In [32]:
print(q_learn.start_duration)
print(q_learn.end_duration)

[7.0490833532172115, 6.51395848246242, 6.077755905511812]
[4.717450091465839, 3.3665891195418753, 25.43396564065855]


In [33]:
print(q_learn.table)

{(): array([-7.00871736, -6.48948488, -6.07775591]), (0,): array([ 0.        , -0.76164036, -1.17132447]), (0, 1): array([ 0.       ,  0.       , -4.5203862]), (0, 1, 2): array([    0.        ,     0.        , -9920.45618305]), (1,): array([-1.08056248,  0.        , -1.27721656]), (1, 0): array([ 0.       ,  0.       , -4.3393061]), (1, 0, 2): array([    0.       ,     0.       , -9947.4674017]), (2,): array([-5.47437967, -9.76006747,  0.        ]), (2, 0): array([ 0.        , -0.81419971,  0.        ]), (2, 0, 1): array([    0., -9999.,     0.]), (2, 1): array([-0.95315415,  0.        ,  0.        ]), (2, 1, 0): array([-9999.,     0.,     0.]), (1, 2): array([-3.25368647,  0.        ,  0.        ]), (1, 2, 0): array([-2709.729,     0.   ,     0.   ]), (0, 2): array([ 0.       , -3.1421056,  0.       ]), (0, 2, 1): array([    0.   , -2709.729,     0.   ])}


In [34]:

policy = {}
for state, q_values in q_learn.table.items():
    mask = np.ones_like(q_values, dtype=bool)
    for visited in state:
        mask[visited] = False

    # mask off indices of parking structures we already visited
    masked_q_values = np.where(mask, q_values, -np.inf)
    best_action = np.argmax(masked_q_values)
    if best_action in state:
        policy[state] = None
    else:
        policy[state] = np.argmax(masked_q_values)


In [35]:
print("Learned Policy:")
names = list(parking_probability_map.keys())
results = []
for state, action in policy.items():
    # Pair each number in the order with the corresponding name
    name_state = ' -> '.join([names[visited] for visited in state])
    optimal_action_name = names[action] if action else None
    print("Visited:", name_state)
    print("Optimal Action:", optimal_action_name)
    print()
    results.append((name_state, optimal_action_name))




Learned Policy:
Visited: 
Optimal Action: Wilbur Field Garage

Visited: Roble Field Garage
Optimal Action: Via Ortega Garage

Visited: Roble Field Garage -> Via Ortega Garage
Optimal Action: Wilbur Field Garage

Visited: Roble Field Garage -> Via Ortega Garage -> Wilbur Field Garage
Optimal Action: None

Visited: Via Ortega Garage
Optimal Action: None

Visited: Via Ortega Garage -> Roble Field Garage
Optimal Action: Wilbur Field Garage

Visited: Via Ortega Garage -> Roble Field Garage -> Wilbur Field Garage
Optimal Action: None

Visited: Wilbur Field Garage
Optimal Action: None

Visited: Wilbur Field Garage -> Roble Field Garage
Optimal Action: Via Ortega Garage

Visited: Wilbur Field Garage -> Roble Field Garage -> Via Ortega Garage
Optimal Action: None

Visited: Wilbur Field Garage -> Via Ortega Garage
Optimal Action: None

Visited: Wilbur Field Garage -> Via Ortega Garage -> Roble Field Garage
Optimal Action: None

Visited: Via Ortega Garage -> Wilbur Field Garage
Optimal Action: No