In [34]:
import numpy as np
import matplotlib.pyplot as plt
import random

In [35]:
class WindyGridworld:
    def __init__(self, size_x, size_y):
        self.size_x = size_x
        self.size_y = size_y
        self.start = (3,0)
        self.goal = (3,7)
        self.actions = [(0,1), (1,0), (0,-1), (-1,0)]
        self.wind = [0,0,0,1,1,1,2,2,1,0]
        
    def step(self, state, action):
        next_state = np.array(state) + np.array(action)
        next_state[0] -= self.wind[state[1]]
        next_state = tuple(next_state)
        reward = -1
        if(next_state[0] < 0 or next_state[1] < 0 or next_state[0] >= self.size_x or next_state[1] >= self.size_y):
            next_state = state
        return next_state, reward
    
    def episode(self, action_values):
        state = self.start
        time = 0
        
        action = None
        if(random.random() < 0.1):
            ind = np.random.choice(range(len(self.actions)))
            action = self.actions[ind]
        else:
            values = action_values[state[0], state[1], :]
            ind = np.argmax(values)
            action = self.actions[ind]
        
        while(True):
            next_state, reward = self.step(state, action)
            
            next_action = None
            if(random.random() < 0.1):
                ind = np.random.choice(range(len(self.actions)))
                next_action = self.actions[ind]
            else:
                values = action_values[state[0], state[1], :]
                ind = np.argmax(values)
                next_action = self.actions[ind]
            
            action_values[state[0], state[1], self.actions.index(action)] += 0.5*(reward + action_values[next_state[0], next_state[1], self.actions.index(next_action)] - action_values[state[0], state[1], self.actions.index(action)])
            state = next_state
            action = next_action
            time+=1
            print(state)
#             print(action_values)
            if(state == self.goal):
                break
        return time
    
    def figure(self):
        action_values = np.zeros((self.size_x, self.size_y, len(self.actions)))
        episode_limit = 500
        
        step= []
        ep = 0
        while(ep < episode_limit):
            t = self.episode(action_values)
            print(t)
#             print(action_values)
            step.append(t)
            ep += 1
            
        step = np.add.accumulate(step)
        
        plt.figure(figsize=(20,10))
        plt.plot(step, np.arange(1, len(step) + 1)) 
#         print(action_values)

In [36]:
grid = WindyGridworld(7,10)
grid.figure()

[3, 1]
[2, 1]
[2, 2]
[2, 3]
[1, 4]
[0, 5]
[0, 6]
[0, 6]
[0, 7]
[0, 8]
[0, 9]
[0, 9]
[0, 9]
[1, 9]
[2, 9]
[2, 9]
[2, 9]
[3, 9]
[4, 9]
[4, 9]
[4, 9]
[5, 9]
[6, 9]
[6, 9]
[6, 9]
[6, 9]
[6, 9]
[6, 8]
[5, 7]
[2, 7]
[0, 8]
[0, 9]
[1, 9]
[1, 8]
[0, 9]
[0, 9]
[0, 8]
[0, 7]
[0, 8]
[0, 9]
[0, 8]
[0, 8]
[0, 8]
[0, 8]
[0, 8]
[0, 8]
[0, 8]
[0, 7]
[0, 6]
[0, 6]
[0, 5]
[0, 4]
[0, 4]
[0, 5]
[0, 6]
[0, 6]
[0, 6]
[0, 6]
[0, 7]
[0, 8]
[0, 8]
[0, 7]
[0, 6]
[0, 6]
[0, 5]
[0, 4]
[0, 4]
[0, 3]
[0, 2]
[0, 3]
[0, 4]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 4]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[0, 2]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[1, 1]
[2, 1]
[2, 2]
[3, 2]
[4, 2]
[4, 3]
[3, 4]
[2, 5]
[2, 5]
[1, 6]
[0, 7]
[0, 8]
[0, 8]
[0, 9]
[0, 9]
[0, 9]
[0, 9]
[0, 9]
[0, 9]
[1, 9]
[2, 9]
[2, 9]
[2, 8]
[1, 7]
[0, 8]
[0, 9]
[0, 8]
[0, 7]
[0, 6]
[0, 6]
[0, 5]
[0, 4]
[0, 5]
[0, 5]
[0, 6]
[0, 6]
[0, 6]
[0, 7]
[0, 8]
[0, 8]
[0, 8]
[0, 8]
[0, 9]
[1, 9]
[2, 9]
[2, 9]
[1, 9]
[0, 9]
[0, 9]

[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[4, 8]
[2, 8]
[0, 8]
[0, 7]
[0, 7]
[0, 8]
[0, 9]
[0, 9]
[0, 8]
[0, 9]
[0, 9]
[0, 9]
[0, 9]
[1, 9]
[2, 9]
[2, 9]
[3, 9]
[4, 9]
[4, 8]
[3, 9]
[4, 9]
[3, 9]
[3, 9]
[3, 8]
[2, 7]
[0, 7]
[0, 6]
[0, 6]
[0, 5]
[0, 4]
[0, 4]
[0, 3]
[0, 2]
[1, 2]
[1, 1]
[1, 2]
[1, 3]
[1, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 4]
[0, 5]
[0, 4]
[0, 4]
[0, 3]
[0, 2]
[1, 2]
[1, 1]
[2, 1]
[1, 1]
[2, 1]
[1, 1]
[2, 1]
[1, 1]
[2, 1]
[1, 1]
[2, 1]
[1, 1]
[2, 1]
[1, 1]
[2, 1]
[2, 2]
[3, 2]
[3, 3]
[2, 2]
[2, 1]
[3, 1]
[4, 1]
[4, 0]

[5, 9]
[6, 9]
[6, 8]
[5, 9]
[6, 9]
[6, 8]
[5, 9]
[5, 9]
[5, 8]
[4, 7]
[1, 7]
[0, 8]
[0, 8]
[0, 7]
[0, 6]
[0, 6]
[0, 6]
[0, 7]
[0, 8]
[0, 8]
[0, 9]
[0, 9]
[1, 9]
[2, 9]
[2, 9]
[3, 9]
[4, 9]
[4, 9]
[5, 9]
[6, 9]
[6, 8]
[4, 8]
[4, 8]
[3, 7]
[1, 6]
[0, 6]
[0, 6]
[0, 6]
[0, 6]
[0, 6]
[0, 7]
[0, 8]
[0, 8]
[0, 9]
[0, 9]
[1, 9]
[2, 9]
[2, 9]
[3, 9]
[4, 9]
[5, 9]
[4, 9]
[4, 8]
[4, 8]
[3, 7]
[1, 6]
[0, 6]
[0, 6]
[0, 5]
[0, 4]
[0, 5]
[0, 5]
[0, 6]
[0, 7]
[0, 8]
[0, 8]
[0, 9]
[0, 9]
[1, 9]
[2, 9]
[2, 9]
[2, 8]
[2, 8]
[1, 7]
[0, 6]
[0, 6]
[0, 6]
[0, 6]
[0, 6]
[0, 5]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 3]
[0, 2]
[0, 2]
[1, 2]
[2, 2]
[2, 3]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[1, 1]
[1, 2]
[0, 2]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[1, 1]
[1, 2]
[0, 2]
[0, 3]
[0, 2]
[0, 1]
[1, 1]
[1, 2]
[0, 2]
[0, 3]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[1, 1]
[1, 2]
[0, 2]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[1, 1]
[1, 2]
[0, 2]
[0, 3]
[0, 3]
[0, 3]
[0, 3]

[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 3]
[4, 4]
[4, 4]
[3, 5]
[2, 6]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 4]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[2, 0]
[2, 1]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[3, 2]
[2, 2]
[2, 3]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[0, 1]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[2, 0]
[2, 0]
[3, 0]
[4, 0]
[5, 0]
[5, 0]
[6, 0]
[6, 0]
[6, 1]
[6, 2]
[6, 2]
[6, 3]
[5, 4]
[5, 4]
[3, 4]
[1, 4]
[0, 4]
[0, 5]
[0, 4]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[2, 0]
[3, 0]
[4, 0]
[5, 0]
[5, 0]
[5, 0]
[6, 0]
[6, 0]
[5, 0]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 2]
[5, 1]
[5, 0]
[5, 0]
[5, 0]
[6, 0]
[6, 0]
[6, 1]
[6, 2]
[6, 2]
[6, 3]
[5, 4]
[5, 4]
[3, 4]
[1, 4]
[1, 4]
[0, 5]
[0, 6]
[0, 5]
[0, 4]
[0, 3]
[0, 3]
[0, 2]

[4, 4]
[4, 4]
[3, 5]
[2, 6]
[1, 6]
[0, 6]
[0, 5]
[0, 4]
[0, 4]
[0, 3]
[0, 2]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]
[3, 0]
[3, 1]
[3, 2]
[3, 3]
[2, 4]
[1, 5]
[1, 5]
[1, 5]
[0, 4]
[0, 3]
[0, 2]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]
[2, 0]
[1, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]
[3, 1]
[4, 1]
[4, 0]
[5, 0]
[5, 1]
[5, 2]
[5, 3]
[4, 4]
[4, 4]
[4, 4]
[4, 4]
[2, 4]
[0, 4]
[0, 4]
[0, 3]
[0, 2]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]
[3, 1]
[3, 2]
[3, 1]
[3, 2]
[3, 1]
[3, 2]
[2, 2]
[2, 3]
[1, 2]
[1, 3]
[1, 3]
[1, 3]
[1, 3]
[1, 3]
[1, 3]
[0, 4]
[0, 3]
[0, 2]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]
[4, 0]
[5, 0]
[5, 1]
[5, 2]
[5, 3]
[4, 4]
[4, 4]
[4, 4]
[3, 5]
[2, 6]
[1, 6]
[0, 6]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 4]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[1, 0]
[2, 0]
[1, 0]
[1, 0]
[1, 0]
[0, 0]
[0, 0]
[0, 1]
[0, 2]
[0, 2]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[1, 0]
[1, 0]
[0, 0]
[0, 0]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]

[3, 9]
[4, 9]
[4, 9]
[4, 8]
[3, 7]
[1, 6]
[0, 6]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 4]
[0, 3]
[0, 2]
[0, 2]
[1, 2]
[2, 2]
[2, 3]
[1, 2]
[1, 1]
[1, 2]
[2, 2]
[2, 3]
[1, 2]
[1, 3]
[0, 4]
[0, 3]
[0, 4]
[0, 3]
[0, 4]
[0, 3]
[0, 4]
[0, 3]
[0, 4]
[0, 3]
[0, 4]
[0, 3]
[0, 2]
[0, 1]
[0, 1]
[0, 0]
[0, 0]
[0, 0]
[0, 1]
[0, 2]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]
[4, 0]
[5, 0]
[5, 0]
[5, 1]
[5, 2]
[6, 2]
[6, 2]
[6, 3]
[5, 4]
[5, 4]
[5, 4]
[5, 4]
[3, 4]
[1, 4]
[1, 4]
[0, 5]
[0, 4]
[0, 4]
[0, 5]
[0, 6]
[0, 6]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 5]
[0, 4]
[0, 3]
[0, 2]
[0, 1]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]
[4, 0]
[5, 0]
[5, 0]
[5, 1]
[5, 2]
[6, 2]
[6, 3]
[5, 4]
[5, 4]
[5, 4]
[5, 4]
[5, 4]
[5, 4]
[4, 5]
[3, 6]
[2, 6]
[0, 5]
[0, 5]
[0, 4]
[0, 3]
[0, 3]
[0, 2]
[0, 1]
[0, 1]
[0, 0]
[0, 0]
[1, 0]
[2, 0]
[3, 0]
[3, 0]

KeyboardInterrupt: 