In [4]:
import sys
sys.path.append('scripts/')
from puddle_world import *
import itertools
import collections
from copy import copy
import cv2
import seaborn as sns

In [7]:
class DynamicProgramming:
    def __init__(self, map_image, widths, goal, time_interval, sampling_num, value=None):
        # マップ牙城のy軸を反転
        self.map_image = map_image.T[:, ::-1]
        # ピクセル数
        x_pixel, y_pixel = map_image.shape
        nt = int(math.pi*2/widths[2])
        self.index_nums = np.array([x_pixel, y_pixel, nt])
        self.indexes = list(itertools.product(range(x_pixel), range(y_pixel), range(nt)))

        self.pose_min = np.array([0, 0, 0])
        self.pose_max = np.array([x_pixel*widths[0], y_pixel*widths[1], math.pi*2])

        self.widths = widths
        self.goal = goal

        self.value_function, self.final_state_flags, self.obstacle_state_flags =\
                self.init_value_function()
        self.policy = self.init_policy()
        self.actions = [(0.1, 0.0), (0.0, 0.5), (0.0, -0.5)]

        self.state_transition_probs = self.init_state_transition_probs(time_interval, sampling_num)

        self.time_interval = time_interval

    def calc_stp(self, stp_file_name):
        stp = self.state_transition_probs
        actions_name = ['fw', 'ccw', 'cw']
        with open('stp/' + stp_file_name + '.stp', "w") as f:
            def write(s):
                f.write(s + '\n')
            for a_i, action in enumerate(self.actions):
                for i in range(self.index_nums[2]):
                    for delta, prob in stp[(action, i)]:
                        print(actions_name[a_i], i, prob, delta[0], delta[1], delta[2])
                        write('{} {} {} {} {} {}'.format(actions_name[a_i], i, prob, delta[0], delta[1], delta[2]))

    def edge_correction(self, index): #変更
        edge_reward = 0.0
        index[2] = (index[2] + self.index_nums[2])%self.index_nums[2] #方角の処理

        for i in range(2):
            if index[i] < 0:
                index[i] = 0
                edge_reward =1e10
            elif index[i] >= self.index_nums[i]:
                index[i] = self.index_nums[i]-1
                edge_reward =1e10

        return index, edge_reward

    def init_policy(self):
        tmp = np.zeros(np.r_[self.index_nums, 2])
        return tmp

    def init_state_transition_probs(self, time_interval, sampling_num):
        ###セルの中の座標を均等にsampling_num**3点サンプリング###
        dx = np.linspace(0.00001, self.widths[0]*0.99999, sampling_num)
        dy = np.linspace(0.00001, self.widths[1]*0.99999, sampling_num)
        dt = np.linspace(0.00001, self.widths[2]*0.99999, sampling_num)
        samples = list(itertools.product(dx, dy, dt))

        ###各行動、各方角でサンプリングした点を移動してインデックスの増分を記録###
        tmp = {}
        for a in self.actions:
            for i_t in range(self.index_nums[2]):
                transitions = []
                for s in samples:
                    before = np.array([s[0], s[1], s[2] + i_t*self.widths[2]]).T + self.pose_min
                    before_index = np.array([0, 0, i_t]).T                                                      #遷移前のインデックス

                    after = self.transition_state(a[0], a[1], time_interval, before)
                    after_index = np.floor((after - self.pose_min)/self.widths).astype(int)

                    transitions.append(after_index - before_index)

                unique, count = np.unique(transitions, axis=0, return_counts=True)
                probs = [c/sampling_num**3 for c in  count]
                tmp[a, i_t] = list(zip(unique, probs))

        return tmp

    def init_value_function(self):
        v = np.empty(self.index_nums)
        f = np.zeros(self.index_nums)
        o = np.empty(self.index_nums)

        for index in self.indexes:
            f[index] = self.final_state(np.array(index).T)
            o[index] = True if self.map_image[index[0], index[1]] < 255 else False
            v[index] = 0.0 if f[index] else - 1000.0

        return v, f, o

    def final_state(self, index):
        x_min, y_min, _ = self.pose_min + self.widths*index
        x_max, y_max, _ = self.pose_min + self.widths*(index + 1)

        corners = [[x_min, y_min, _], [x_min, y_max, _], [x_max, y_min, _], [x_max, y_max, _] ] #4隅の座標
        return all([self.goal.inside(np.array(c).T) for c in corners ])

    def transition_state(self, nu, omega, time, pose):
        t0 = pose[2]
        if math.fabs(omega) < 1e-10:
            return pose + np.array( [nu*math.cos(t0),
                                     nu*math.sin(t0),
                                     omega ] ) * time
        else:
            return pose + np.array( [nu/omega*(math.sin(t0 + omega*time) - math.sin(t0)),
                                     nu/omega*(-math.cos(t0 + omega*time) + math.cos(t0)),
                                     omega*time ] )

In [8]:
# map_name = 'NoWall_200x200'
map_name = 'CorridorGimp_200x200'
# map_name = 'CorridorGimp_100x100'
# map_name = 'CorridorGimp_20x20'
# map_name = 'CorridorGimp_50x50'
map_image = cv2.imread('map/' + map_name + '.png', cv2.IMREAD_GRAYSCALE)

# dp = DynamicProgramming(map_image, np.array([0.05, 0.05, math.pi/18]).T, Goal(5.0, 7.0, radius=0.1), 0.1, 10)  # no wall
dp = DynamicProgramming(map_image, np.array([0.05, 0.05, math.pi/18]).T, Goal(6.75, 8.0, radius=0.1), 0.1, 10)  # 200x200
# dp = DynamicProgramming(map_image, np.array([0.05, 0.05, math.pi/9]).T, Goal(3.5, 4.0, radius=0.1), 0.1, 10)  # 100x100
# dp = DynamicProgramming(map_image, np.array([0.05, 0.05, math.pi/9]).T, Goal(0.65, 0.85, radius=0.1), 0.1, 10)
# dp = DynamicProgramming(map_image, np.array([0.05, 0.05, math.pi/12]).T, Goal(1.65, 2.0, radius=0.1), 0.1, 10)

dp.calc_stp(map_name)

fw 0 0.728 0 0 0
fw 0 0.072 0 1 0
fw 0 0.182 1 0 0
fw 0 0.018 1 1 0
fw 1 0.696 0 0 0
fw 1 0.104 0 1 0
fw 1 0.174 1 0 0
fw 1 0.026 1 1 0
fw 2 0.656 0 0 0
fw 2 0.164 0 1 0
fw 2 0.144 1 0 0
fw 2 0.036 1 1 0
fw 3 0.72 0 0 0
fw 3 0.18 0 1 0
fw 3 0.08 1 0 0
fw 3 0.02 1 1 0
fw 4 0.04 -1 0 0
fw 4 0.01 -1 1 0
fw 4 0.72 0 0 0
fw 4 0.18 0 1 0
fw 4 0.04 1 0 0
fw 4 0.01 1 1 0
fw 5 0.08 -1 0 0
fw 5 0.02 -1 1 0
fw 5 0.72 0 0 0
fw 5 0.18 0 1 0
fw 6 0.144 -1 0 0
fw 6 0.036 -1 1 0
fw 6 0.656 0 0 0
fw 6 0.164 0 1 0
fw 7 0.174 -1 0 0
fw 7 0.026 -1 1 0
fw 7 0.696 0 0 0
fw 7 0.104 0 1 0
fw 8 0.182 -1 0 0
fw 8 0.018 -1 1 0
fw 8 0.728 0 0 0
fw 8 0.072 0 1 0
fw 9 0.018 -1 -1 0
fw 9 0.182 -1 0 0
fw 9 0.072 0 -1 0
fw 9 0.728 0 0 0
fw 10 0.026 -1 -1 0
fw 10 0.174 -1 0 0
fw 10 0.104 0 -1 0
fw 10 0.696 0 0 0
fw 11 0.036 -1 -1 0
fw 11 0.144 -1 0 0
fw 11 0.164 0 -1 0
fw 11 0.656 0 0 0
fw 12 0.02 -1 -1 0
fw 12 0.08 -1 0 0
fw 12 0.18 0 -1 0
fw 12 0.72 0 0 0
fw 13 0.01 -1 -1 0
fw 13 0.04 -1 0 0
fw 13 0.18 0 -1 0
fw 13 0