In [None]:
import gym
import numpy as np
import tools
import time

env = gym.make('FrozenLake8x8-v1')

gamma = 0.99
theta = 0.000001
def argmax(env, V, pi,s, gamma):
    e = np.zeros(4)
    for a in range(4):                         # iterate for every action possible 
        q=0
        P = np.array(env.env.P[s][a])                   
        (x,y) = np.shape(P)                             # for Bellman Equation 
        
        for i in range(x):                              # iterate for every possible states
            s_= int(P[i][1])                            # S' - Sprime - possible succesor states
            p = P[i][0]                                 # Transition Probability P(s'|s,a) 
            r = P[i][2]                                 # Reward
            
            q += p*(r+gamma*V[s_])                      # calculate action_ value q(s|a)
            e[a] = q
            
    m = np.argmax(e) 
    pi[s]=m                                           # Take index which has maximum value 
                                   # update pi(a|s) 

    return pi


def bellman_optimality_update(env, V, s, gamma):  # update the stae_value V[s] by taking 
    pi = np.zeros((64))       # action which maximizes current value
    e = np.zeros(4)                       
                                            # STEP1: Find 
    for a in range(4):             
        q=0                                 # iterate for all possible action
        P = np.array(env.env.P[s][a])
        (x,y) = np.shape(P)
        
        for i in range(x):
            s_= int(P[i][1])
            p = P[i][0]
            r = P[i][2]
            q += p*(r+gamma*V[s_])
            e[a] = q
            
    m = np.argmax(e)
    pi[s] = m
    
    value = 0
    for a in range(4):
        u = 0
        P = np.array(env.env.P[s][a])
        (x,y) = np.shape(P)
        for i in range(x):
            
            s_= int(P[i][1])
            p = P[i][0]
            r = P[i][2]
            
            u += p*(r+gamma*V[s_])
            
        if(pi[s]==a):
            value+=u
            
  
    V[s]=value
    return V[s]



def value_iteration(env, gamma, theta):
    start=time.time()
    i=0
    V = np.zeros(64)                                       # initialize v(0) to arbitory value, my case "zeros"
    while True:
        i=i+1
        delta = 0
        for s in range(64):                       # iterate for all states
            v = V[s]
            bellman_optimality_update(env, V, s, gamma)   # update state_value with bellman_optimality_update
            delta = max(delta, abs(v - V[s]))             # assign the change in value per iteration to delta  
        if delta < theta:                                       
            break                                         # if change gets to negligible 
                                                          # --> converged to optimal value         
    pi = np.zeros((64)) 

    for s in range(64):
        pi = argmax(env, V, pi, s, gamma)         # extract optimal policy using action value 
    end=time.time()
    print("iteration counts:",i)
    print("time:",end-start,"s")
    return V, pi
# Naive implementation (for loops are slow), but matches the box
def policy_iter(env, gamma, theta):
    """Policy Iteration Algorithm
    
    Params:
        env - environment with following required memebers:
            env.nb_states - number of states
            env.nb_action - number of actions
            env.model     - prob-transitions and rewards for all states and actions, see note #1
        gamma (float) - discount factor
        theta (float) - termination condition
    """
    
    # 1. Initialization
    i=0
    V = np.zeros(64)
    pi = np.zeros(64, dtype=int)  # greedy, always pick action 0
    start=time.time()
    while True:
        i=i+1
        j=0
    
        # 2. Policy Evaluation
        while True:
            j=j+1
            delta = 0
            for s in range(64):                       # iterate for all states
                v = V[s]
                bellman_optimality_update(env, V, s, gamma)   # update state_value with bellman_optimality_update
                delta = max(delta, abs(v - V[s]))             # assign the change in value per iteration to delta  
            if delta < theta:  
                print("value iter count:",j)
                break       
                

        # 3. Policy Improvement
        policy_stable = True
        for s in range(64):
            old_action = pi[s]
            pi[s] = np.argmax([sum_sr(env, V=V, s=s, a=a, gamma=gamma)  # list comprehension
                               for a in range(4)])
            if old_action != pi[s]: policy_stable = False
        if policy_stable: break
    end=time.time()        
    print("iteration counts:",i)
    print("time:",end-start,"s")
    return V, pi
def sum_sr(env, V, s, a, gamma):
    """Calc state-action value for state 's' and action 'a'"""
    tmp = 0  # state value for state s
    for p, s_, r, _ in env.env.P[s][a]:     # see note #1 !
        # p  - transition probability from (s,a) to (s')
        # s_ - next state (s')
        # r  - reward on transition from (s,a) to (s')
        tmp += p * (r + gamma * V[s_])
    return tmp
env.render()

print("random policy:")
e=0;
for i_episode in range(100):
    observation = env.reset()
    for t in range(10000):
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            if reward==1:
                e+=1
            break
print(e);




print("value iteration:")


V, pi= value_iteration(env, gamma, theta)

# discrete action to take in given state
print(np.reshape(V,(8,8)))
print(np.reshape(pi,(8,8)))

e=0
for i_episode in range(100):
    c = env.reset()
    for t in range(10000):
        c, reward, done, info = env.step(pi[c])
        if done:
            if reward == 1:
                e +=1
            break
print(e);




print("Policy iteration:")



V, pi = policy_iter(env, gamma, theta)


print(np.reshape(V,(8,8)))
print(np.reshape(pi,(8,8)))
e=0
for i_episode in range(100):
    c = env.reset()
    for t in range(10000):
        c, reward, done, info = env.step(pi[c])
        if done:
            if reward == 1:
                e +=1
            break
print(e)
env.close()


In [None]:
import numpy as np
import gym

env = gym.make('CartPole-v0')
observation = env.reset()


num_observation_dimensions = np.size(observation)
num_actions = env.action_space.n

observation_space_high = env.observation_space.high
observation_space_low = env.observation_space.low
observation_space_high
num_bins_per_observation_dimension = 7 # Could try different number of bins for the different dimensions
num_states = pow(num_bins_per_observation_dimension,num_observation_dimensions)

def make_observation_bins(min, max, num_bins):
    if(min <=-3.4e+38):
        min = -5 # Should really learn this const instead
    if(max >= 3.4e+38):
        max = 5
    print("min:",min)
    print("max:",max)
    bins = np.arange(min, max, (float(max)-float(min))/((num_bins)-2))
    bins = np.sort(np.append(bins, [0])) # Ensure we split at 0
    
    return bins

observation_dimension_bins = []
for observation_dimension in range(num_observation_dimensions):
    observation_dimension_bins.append(make_observation_bins(observation_space_low[observation_dimension], \
                                                            observation_space_high[observation_dimension], \
                                                           num_bins_per_observation_dimension))
    
print (observation_dimension_bins)
def observation_to_state(observation):
    state = 0
    for observation_dimension in range(num_observation_dimensions):
        state = state + np.digitize(observation[observation_dimension],observation_dimension_bins[observation_dimension]) \
        * pow(num_bins_per_observation_dimension, observation_dimension)
        
    return state
  
print ("Sense Check: Min State: {} Max State: {} Num States: {}".format(observation_to_state([-5,-5,-5,-5.5]), observation_to_state([5,5,5,5.5]),
                                                          num_states))
                                                          
import time
state_values = np.random.rand(num_states) * 0.1
state_rewards = np.zeros((num_states))
state_transition_probabilities = np.ones((num_states, num_states, num_actions)) / num_states
state_transition_counters = np.zeros((num_states, num_states, num_actions))

def pick_best_action(current_state, state_values, state_transition_probabilities):
    best_action = -1
    best_action_value = -np.Inf
    for a_i in range(num_actions):
        action_value = state_transition_probabilities[current_state,:,a_i].dot(state_values)
        if (action_value > best_action_value):
            best_action_value = action_value
            best_action = a_i
        elif (action_value == best_action_value):
            if np.random.randint(0,2) == 0:
                best_action = a_i
            
    return best_action


def update_state_transition_probabilities_from_counters(probabilities, counters):
    for a_i in range(num_actions):
        for s_i in range(num_states):
            total_transitions_out_of_state = np.sum(counters[s_i,:,a_i])
            if(total_transitions_out_of_state > 0):
                probabilities[s_i,:,a_i] = counters[s_i,:,a_i] / total_transitions_out_of_state
            
    return probabilities


def run_value_iteration(state_values, state_transition_probabilities, state_rewards):
    start=time.time()
    gamma = 0.995
    convergence_tolerance = 0.01
    iteration = 0
    max_dif = np.Inf
    
    while max_dif > convergence_tolerance:  
        iteration = iteration + 1
        old_state_values = np.copy(state_values)

        best_action_values = np.zeros((num_states)) - np.Inf
        for a_i in range(num_actions):
            best_action_values = \
                np.maximum(best_action_values, state_transition_probabilities[:,:,a_i].dot(state_values))

        state_values = state_rewards + gamma * best_action_values
        max_dif = np.max(np.abs(state_values - old_state_values))  
    end=time.time()

    
    return state_values
episode_rewards = []
#env.monitor.start('training_dir3', force=True)
for i_episode in range(150):
    current_observation = env.reset()
    current_state = observation_to_state(current_observation)
    
    episode_reward = 0
    
    for t in range(1000):
        action = pick_best_action(current_state, state_values, state_transition_probabilities)
        #print("action:",action)
        old_state = current_state
        observation, reward, done, info = env.step(action)
        current_state = observation_to_state(observation)
         
        state_transition_counters[old_state, current_state, action] = \
            state_transition_counters[old_state, current_state, action] + 1
        
        episode_reward = episode_reward + reward        
        
        if done:
            episode_rewards.append(episode_reward)
            print ("Reward: {}, Average reward over {} trials: {}".format(episode_reward, i_episode, np.mean(episode_rewards[-100:]))      )      
            
            if(t < 195):
                reward = -1
            else:
                reward = 0
            state_rewards[current_state] = reward

            state_transition_probabilities = update_state_transition_probabilities_from_counters(state_transition_probabilities, state_transition_counters)
            state_values = run_value_iteration(state_values, state_transition_probabilities, state_rewards)
            break
mean_rewards=np.empty_like(episode_rewards);
for i in range(150):
    mean_rewards[i]=np.mean(episode_rewards[0:i])

from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

plt.figure()
plt.plot(mean_rewards)
plt.plot(episode_rewards)

plt.xlabel("episode count")
plt.ylabel("Reward")
plt.legend(["reward", "average"], loc ="lower right")

plt.figure(figsize=(24,24))

In [None]:
def run_policy_iteration(state_values, state_transition_probabilities, state_rewards,state_policy):
    start=time.time()
    gamma = 0.995
    convergence_tolerance = 0.01
    iteration = 0
    max_dif = np.Inf
    while(True):
        iteration = iteration + 1
        while max_dif > convergence_tolerance:  
            
            old_state_values = np.copy(state_values)

            best_action_values = np.zeros((num_states)) - np.Inf
            for a_i in range(num_actions):
                best_action_values = \
                    np.maximum(best_action_values, state_transition_probabilities[:,:,a_i].dot(state_values))

            state_values = state_rewards + gamma * best_action_values
            max_dif = np.max(np.abs(state_values - old_state_values)) 
        
        new_policies=improve_policies(state_rewards,state_values,gamma)
        
        print(new_policies)
        if(np.array_equal(new_policies,state_policy)):
            break;
        else:
            state_policy=new_policies
    
    end=time.time()
    print("policy iteration:")
    print("iter count:",iteration)
    print("time:",end-start,"s")
    return state_policy,state_values;

def improve_policies(state_rewards,state_values,gamma):
    new_state_policy=np.zeros((num_states),dtype=int)
    for s_i in range(num_states):
        rate_0=state_rewards[s_i];
        rate_1=state_rewards[s_i];
        for s_j in range(num_states):
            rate_0+=gamma*state_transition_probabilities[s_i,s_j,0]*state_values[s_j]
            rate_1+=gamma*state_transition_probabilities[s_i,s_j,1]*state_values[s_j]
        if(rate_0>=rate_1):
            new_state_policy[s_i]=0
        else:
            new_state_policy[s_i]=1
    return new_state_policy
import time
state_values = np.random.rand(num_states) * 0.1
state_rewards = np.zeros((num_states))
state_transition_probabilities = np.ones((num_states, num_states, num_actions)) / num_states
state_transition_counters = np.zeros((num_states, num_states, num_actions))

state_policy=np.zeros((num_states),dtype=int)



episode_rewards = []
#env.monitor.start('training_dir3', force=True)
for i_episode in range(50):
    current_observation = env.reset()
    current_state = observation_to_state(current_observation)
    
    episode_reward = 0
    
    for t in range(1000):
        action = state_policy[current_state]
        
        old_state = current_state
        observation, reward, done, info = env.step(action)
        current_state = observation_to_state(observation)
         
        state_transition_counters[old_state, current_state, action] = \
            state_transition_counters[old_state, current_state, action] + 1
        
        episode_reward = episode_reward + reward        
        
        if done:
            episode_rewards.append(episode_reward)
            print ("Reward: {}, Average reward over {} trials: {}".format(episode_reward, i_episode, np.mean(episode_rewards[-100:]))      )      
            
            if(t < 195):
                reward = -1
            else:
                reward = 0
            state_rewards[current_state] = reward

            state_transition_probabilities = update_state_transition_probabilities_from_counters(state_transition_probabilities, state_transition_counters)
            state_policy,state_values = run_policy_iteration(state_values, state_transition_probabilities, state_rewards,state_policy)
            
            break

In [None]:
import gym
import numpy as np
import time, pickle, os

env = gym.make('FrozenLake-v1')

epsilon = 0.9
total_episodes = 10000
max_steps = 10000

lr_rate = 0.81
gamma = 0.99

Q = np.zeros((env.observation_space.n, env.action_space.n))
    
def choose_action(state):
    action=0
    #if np.random.uniform(0, 1) < epsilon:
     #   action = env.action_space.sample()
    #else:
    action = np.argmax(Q[state, :])
    return action

def learn(state, state2, reward, action):
    predict = Q[state, action]
    target = reward + gamma * np.max(Q[state2, :])
    Q[state, action] = Q[state, action] + lr_rate * (target - predict)

# Start
for episode in range(total_episodes):
    print("episode:",episode)
    state = env.reset()
    t = 0
    
    while t < max_steps:
        #env.render()

        action = choose_action(state)  

        state2, reward, done, info = env.step(action)  

        learn(state, state2, reward, action)

        state = state2

        t += 1
       
        if done:
            break

        time.sleep(0.1)

print(Q)

with open("frozenLake_qTable.pkl", 'wb') as f:
    pickle.dump(Q, f)
import gym
import numpy as np
import time
import pickle, os

env = gym.make('FrozenLake-v1')

with open("frozenLake_qTable.pkl", 'rb') as f:
    Q = pickle.load(f)

print(Q)
def choose_action(state):
    action = np.argmax(Q[state, :])
    return action

# start
e=0
for episode in range(5):
    state = env.reset()
    print("*** Episode: ", episode)
    t = 0
    while t < 100:
        env.render()
        action = choose_action(state)  
        state2, reward, done, info = env.step(action)
        state = state2
        t=t+1
        if done:
            if(reward==1):
                e+=1
            print(t)
            break
        time.sleep(0.5)
        os.system('clear')
print("Q learning achives "+str(e)+" out of 100 episodes")

In [None]:
import gym
import torch
import time
import matplotlib.pyplot as plt
from gym.envs.registration import register
register(
    id='FrozenLake-v1',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name' : '4x4', 'is_slippery': True},
)

env = gym.make('FrozenLake-v1')

# Instantiate the Environment.
# env = gym.make('FrozenLake-v0')

# To check all environments present in OpenAI
# print(envs.registry.all())
# Total number of States and Actions
number_of_states = env.observation_space.n
number_of_actions = env.action_space.n
print( "States = ", number_of_states)
print( "Actions = ", number_of_actions)

num_episodes = 10000
steps_total = []
rewards_total = []
egreedy_total = []
# PARAMS 

# Discount on reward
gamma = 0.99

# Factor to balance the ratio of action taken based on past experience to current situtation
learning_rate = 0.9
egreedy = 0.7
egreedy_final = 0.1
egreedy_decay = 0.999
Q = torch.zeros([number_of_states, number_of_actions])
import time
start=time.time()
for i_episode in range(num_episodes):
    
    
    # resets the environment
    state = env.reset()
    step = 0

    while True:
        
        step += 1
        
        random_for_egreedy = torch.rand(1)[0]
        

        if random_for_egreedy > egreedy:      
            random_values = Q[state] + torch.rand(1,number_of_actions) / 1000      
            action = torch.max(random_values,1)[1][0]  
            action = action.item()
        else:
            action = env.action_space.sample()
            
        if egreedy > egreedy_final:
            egreedy *= egreedy_decay
        
        new_state, reward, done, info = env.step(action)

        # Filling the Q Table
        Q[state, action] = reward + gamma * torch.max(Q[new_state])
        
        # Setting new state for next action
        state = new_state
        
        # env.render()
        # time.sleep(0.4)
        
        if done:
            steps_total.append(step)
            rewards_total.append(reward)
            egreedy_total.append(egreedy)
            if i_episode % 10 == 0:
                print('Episode: {} Reward: {} Steps Taken: {}'.format(i_episode,reward, step))
            break
end=time.time()
print(end-start)
print(Q)
def choose_action(state):
    action = np.argmax(Q[state, :])
    return action

# start
e=0
for episode in range(100):
    state = env.reset()
    print("*** Episode: ", episode)
    t = 0
    while t < 100:
        
        random_values = Q[state] + torch.rand(1,number_of_actions) / 1000      
        action = torch.max(random_values,1)[1][0]  
        action = action.item()
        state2, reward, done, info = env.step(action)
        state = state2
        t=t+1
        if done:
            if(reward==1):
                e+=1
            print(t)
            break
        
print("Q learning achives "+str(e)+" out of 100 episodes")

In [None]:
import gym
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
"""
Base code taken from: 
https://github.com/IsaacPatole/CartPole-v0-using-Q-learning-SARSA-and-DNN/blob/master/Qlearning_for_cartpole.py
"""

class CartPoleQAgent():
    def __init__(self, buckets=(7, 7, 7, 7), 
                 num_episodes=10000, min_lr=0.1, 
                 min_epsilon=0.01, discount=0.995, decay=25):
        self.buckets = buckets
        self.num_episodes = num_episodes
        self.min_lr = min_lr
        self.min_epsilon = min_epsilon
        self.discount = discount
        self.decay = decay

        self.env = gym.make('CartPole-v0')
        
        # This is the action-value function being initialized to 0's
        self.Q_table = np.zeros(self.buckets + (self.env.action_space.n,))

        # [position, velocity, angle, angular velocity]
        self.upper_bounds = [self.env.observation_space.high[0], 0.5, self.env.observation_space.high[2], math.radians(50) / 1.]
        self.lower_bounds = [self.env.observation_space.low[0], -0.5, self.env.observation_space.low[2], -math.radians(50) / 1.]
        
        #
        self.steps = np.zeros(self.num_episodes)
        
        

    def discretize_state(self, obs):
        """
        Takes an observation of the environment and aliases it.
        By doing this, very similar observations can be treated
        as the same and it reduces the state space so that the 
        Q-table can be smaller and more easily filled.
        
        Input:
        obs (tuple): Tuple containing 4 floats describing the current
                     state of the environment.
        
        Output:
        discretized (tuple): Tuple containing 4 non-negative integers smaller 
                             than n where n is the number in the same position
                             in the buckets list.
        """
        discretized = list()
        for i in range(len(obs)):
            scaling = ((obs[i] + abs(self.lower_bounds[i])) 
                       / (self.upper_bounds[i] - self.lower_bounds[i]))
            new_obs = int(round((self.buckets[i] - 1) * scaling))
            new_obs = min(self.buckets[i] - 1, max(0, new_obs))
            discretized.append(new_obs)
        return tuple(discretized)

    def choose_action(self, state):
        """
        Implementation of e-greedy algorithm. Returns an action (0 or 1).
        
        Input:
        state (tuple): Tuple containing 4 non-negative integers within
                       the range of the buckets.
        
        Output:
        (int) Returns either 0 or 1
        """
        if (np.random.random() < self.epsilon):
            return self.env.action_space.sample() 
        else:
            return np.argmax(self.Q_table[state])
        
    def get_action(self, state, e):
        """
        Another policy based on the Q-table. Slight variation from 
        e-greedy. It assumes the state fed hasn't been discretized and 
        returns a vector with probabilities for each action.
        
        Input: 
        state (tuple): Contains the 4 floats used to describe
                       the current state of the environment.
        e (int): Denotes the episode at which the agent is supposed
                 to be, helping balance exploration and exploitation.
                 
        Output:
        action_vector (numpy array): Vector containing the probability
                                     of each action being chosen at the
                                     current state.
        """
        obs = self.discretize_state(state)
        action_vector = self.Q_table[obs]
        epsilon = self.get_epsilon(e)
        action_vector = self.normalize(action_vector, epsilon)
        return action_vector

    def normalize(self, action_vector, epsilon):
        """
        Returns a vector with components adding to 1. Ensures 
        
        Input:
        action_vector (numpy array): Contains expected values for each
                                     action at current state from Q-table.
        epsilon (float): Chances that the e-greedy algorithm would 
                         choose an action at random. With this pol
        
        Output:
        new_vector (numpy array): Vector containing the probability
                                  of each action being chosen at the
                                  current state.
        """
        
        total = sum(action_vector)
        new_vector = (1-epsilon)*action_vector/(total)
        new_vector += epsilon/2.0
        return new_vector

    def update_q(self, state, action, reward, new_state):
        """
        Updates Q-table using the rule as described by Sutton and Barto in
        Reinforcement Learning.
        """
        self.Q_table[state][action] += (self.learning_rate * 
                                        (reward 
                                         + self.discount * np.max(self.Q_table[new_state]) 
                                         - self.Q_table[state][action]))

    def get_epsilon(self, t):
        """Gets value for epsilon. It declines as we advance in episodes."""
        # Ensures that there's almost at least a min_epsilon chance of randomly exploring
        return max(self.min_epsilon, min(1., 1. - math.log10((t + 1) / self.decay)))

    def get_learning_rate(self, t):
        """Gets value for learning rate. It declines as we advance in episodes."""
        # Learning rate also declines as we add more episodes
        return max(self.min_lr, min(1., 1. - math.log10((t + 1) / self.decay)))

    def train(self):
        """
        Trains agent making it go through the environment and choose actions
        through an e-greedy policy and updating values for its Q-table. The 
        agent is trained by default for 500 episodes with a declining 
        learning rate and epsilon values that with the default values,
        reach the minimum after 198 episodes.
        """
        # Looping for each episode
        for e in range(self.num_episodes):
            # Initializes the state
            current_state = self.discretize_state(self.env.reset())

            self.learning_rate = self.get_learning_rate(e)
            self.epsilon = self.get_epsilon(e)
            done = False
            
            # Looping for each step
            while not done:
                self.steps[e] += 1
                # Choose A from S
                action = self.choose_action(current_state)
                # Take action
                obs, reward, done, _ = self.env.step(action)
                new_state = self.discretize_state(obs)
                # Update Q(S,A)
                self.update_q(current_state, action, reward, new_state)
                current_state = new_state
                
                # We break out of the loop when done is False which is
                # a terminal state.
        print('Finished training!')
    
    def plot_learning(self):
        """
        Plots the number of steps at each episode and prints the
        amount of times that an episode was successfully completed.
        """
        mean_rewards=np.empty_like(self.steps);
        n=len(self.steps);
        for i in range(n):
            mean_rewards[i]=np.mean(self.steps[0:i])
        plt.figure()
        plt.plot(mean_rewards)
        plt.plot(self.steps)
        plt.xlabel("episode count")
        plt.ylabel("Reward")
        plt.legend(["reward", "average"], loc ="lower right")
        plt.figure(figsize=(24,24))
        t = 0
        for i in range(self.num_episodes):
            if self.steps[i] == 200:
                t+=1
        print(t, "episodes were successfully completed.",mean_rewards[-1])
        

    def run(self):
        """Runs an episode while displaying the cartpole environment."""
        self.env = gym.wrappers.Monitor(self.env,'cartpole')
        t = 0
        done = False
        current_state = self.discretize_state(self.env.reset())
        while not done:
                self.env.render()
                t = t+1
                action = self.choose_action(current_state)
                obs, reward, done, _ = self.env.step(action)
                new_state = self.discretize_state(obs)
                current_state = new_state
            
        return t   
def load_q_learning():
    agent = CartPoleQAgent()
    agent.train()
    agent.plot_learning()

    return agent
start=time.time()
agent = load_q_learning()
end=time.time()
print(end-start)