In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from gridworld import GridworldMdp
from agents import OptimalAgent, MyopicAgent
from fast_agents import FastMyopicAgent, FastOptimalAgent
from mdp_interface import Mdp
from agent_runner import run_agent, get_reward_from_trajectory
import numpy as np
from multiprocessing import Pool
import tqdm
from maia_chess_backend.maia.tfprocess import get_tfp
import tensorflow as tf

In [3]:
np.set_printoptions(precision=5, linewidth=200)

In [4]:
def gen_gridworld_arr(gridworld):
    arr = np.zeros((3,width,height), dtype=np.int8)
    arr[0] = np.array(gridworld.walls)
    
    for (x,y) in gridworld.rewards:
        arr[1,x,y] = gridworld.rewards[(x,y)]
        
    (x,y) = gridworld.get_start_state()
    arr[2,x,y] = 1
    
    return arr

In [5]:
def gen_random_connected(height, width, num_rewards):
    for _ in range(5):
        try:
            return GridworldMdp.generate_random_connected(height=height,width=width,num_rewards=num_rewards,noise=0)
        except:
            pass
    raise ValueError('Could not generate Gridworld')

In [6]:
height=6
width=6
num_rewards=4

gamma = 0.9
myopic_horizon = 2

episode_length = 5
cost = 1

def gen_data(num_grids):

    data = np.zeros((num_grids,4, height,width))
    trial = 0

    for i in range(num_grids):
#         print(f'Iter {i}', end='\r')
        gridworld = gen_random_connected(height, width, num_rewards)
        mdp = Mdp(gridworld)
        gridworld_arr = gen_gridworld_arr(gridworld)
        start_state = gridworld.get_start_state()
        mdp.gridworld.start_state = start_state
        
        
        dummy_agent = FastMyopicAgent(horizon=episode_length)
        dummy_agent.set_mdp(gridworld)
        
        def recurse(agent_list, moves_left):
            if moves_left == 0: return [agent_list]

            myopic_agent = FastMyopicAgent(horizon=min(moves_left, myopic_horizon))
            optimal_agent = FastMyopicAgent(horizon=moves_left)

            l1 = recurse(agent_list+[(0,myopic_agent)], moves_left-1)
            l2 = recurse(agent_list+[(1,optimal_agent)], moves_left-1)

            return l1+l2
        
        
        agent_lists = recurse([], episode_length)
        #print([[i[0] for i in agent_list] for agent_list in agent_lists])
        rewards = []
        
        for agent_list in agent_lists:
            num_ints = sum([j[0] for j in agent_list])
            agent_list = [j[1] for j in agent_list]
            trajectory = run_agent(dummy_agent, mdp, episode_length=episode_length, agent_list=agent_list)
            rewards.append(get_reward_from_trajectory(trajectory) - num_ints*cost)
#         print(rewards)
        idx = np.array(rewards).argmax()
#         print(i)
#         print(agent_lists[i][0][0])

        data[i,:3] = gridworld_arr
        data[i,3] = agent_lists[idx][0][0]
    return data

In [7]:
%%time
with Pool(32) as p:
#     experiment2 = p.map(gen_data, [1]*1000)
    n = 4000
    experiment2_big = list(tqdm.tqdm(p.imap(gen_data, [1]*n), total=n))

100%|██████████| 4000/4000 [00:29<00:00, 136.60it/s]


CPU times: user 1.74 s, sys: 1.16 s, total: 2.89 s
Wall time: 29.6 s


In [None]:
experiment2 = np.concatenate(experiment2)
experiment2.shape

In [13]:
tfp = get_tfp(filters=64, blocks=6, regularizer=False, input_size=3, board_size=6, output_size=1)

optimizer = tfp.optimizer
loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
metrics = ['accuracy',tf.keras.metrics.Precision(),tf.keras.metrics.Recall(),tf.keras.metrics.AUC()]
# metrics = ['accuracy']

tfp.model.compile(optimizer, loss, metrics)

In [14]:
## Second experiment
tfp.model.fit(xtrain,ytrain)
tfp.model.evaluate(xeval,yeval)



[0.05002376809716225,
 0.9795440435409546,
 0.9772322177886963,
 0.9819661974906921,
 0.9984188675880432]

In [14]:
def istarmap(self, func, iterable, chunksize=1):
    """starmap-version of imap
    """
    self._check_running()
    if chunksize < 1:
        raise ValueError(
            "Chunksize must be 1+, not {0:n}".format(
                chunksize))

    task_batches = mpp.Pool._get_tasks(func, iterable, chunksize)
    result = mpp.IMapIterator(self)
    self._taskqueue.put(
        (
            self._guarded_task_generation(result._job,
                                          mpp.starmapstar,
                                          task_batches),
            result._set_length
        ))
    return (item for chunk in result for item in chunk)

In [17]:
from gridworld import GridworldMdp
from agents import OptimalAgent, MyopicAgent
from fast_agents import FastMyopicAgent, FastOptimalAgent
from mdp_interface import Mdp
from agent_runner import get_reward_from_trajectory, run_agent
import numpy as np
from maia_chess_backend.maia.tfprocess import get_tfp
import tensorflow as tf
from multiprocessing import Pool
import tqdm
import sys

def gen_gridworld_arr(gridworld, width):
    arr = np.zeros((3,width,width), dtype=np.int8)
    arr[0] = np.array(gridworld.walls)
    
    for (x,y) in gridworld.rewards:
        arr[1,x,y] = gridworld.rewards[(x,y)]
        
    (x,y) = gridworld.get_start_state()
    arr[2,x,y] = 1
    
    return arr

def gen_random_connected(width, height, num_rewards):
    for _ in range(1000):
        try:
            return GridworldMdp.generate_random_connected(width=width,height=height,num_rewards=num_rewards,noise=0)
        except:
            pass
    raise ValueError('Could not generate Gridworld')
    

def gen_data(num_grids, width, num_rewards, episode_length, myopic_horizon, cost):
    data = np.zeros((num_grids,4,width,width))
    for i in range(num_grids):
        gridworld = gen_random_connected(width, width, num_rewards)
        mdp = Mdp(gridworld)
        start_state = gridworld.get_start_state()
        mdp.gridworld.start_state = start_state
        dummy_agent = FastMyopicAgent(horizon=episode_length)
        dummy_agent.set_mdp(gridworld)
        def recurse(agent_list, moves_left):
            if moves_left == 0: return [agent_list]
            myopic_agent = FastMyopicAgent(horizon=min(moves_left, myopic_horizon))
            optimal_agent = FastMyopicAgent(horizon=moves_left)
            l1 = recurse(agent_list+[(0,myopic_agent)], moves_left-1)
            l2 = recurse(agent_list+[(1,optimal_agent)], moves_left-1)
            return l1+l2
        agent_lists = recurse([], episode_length)
        rewards = []
        for agent_list in agent_lists:
            num_ints = sum([j[0] for j in agent_list])
            agent_list = [j[1] for j in agent_list]
            trajectory = run_agent(dummy_agent, mdp, episode_length=episode_length, agent_list=agent_list)
            rewards.append(get_reward_from_trajectory(trajectory) - num_ints*cost)
        idx = np.array(rewards).argmax()
        data[i,:3] = gen_gridworld_arr(gridworld, width)
        data[i,3] = agent_lists[idx][0][0]
    return data


def get_all_data(width, num_rewards, episode_length, myopic_horizon):
    for cost in [0]:
        pos,neg = 0,0
        data = np.zeros((0,4,width,width))
        while pos < 120000 or neg < 120000:
            
#             with Pool(4) as pool:
#                 iterable = [(i, 'x') for i in range(10)]
#                 for _ in tqdm.tqdm(pool.istarmap(foo, iterable),total=len(iterable)):
#                     pass

            with Pool(8) as p:
                n = 100
                inputs = [[100,width,num_rewards,episode_length,myopic_horizon, cost]]*n
                run_data = p.starmap(gen_data, tqdm.tqdm(inputs, total=n))
#                 print(run_data)
                print(run_data[0])
                print(run_data[0][0])
                print(run_data[0][1])
#                 run_data = np.concatenate(run_data)
#                 data = np.concatenate((data, run_data))
#                 y = data[:,3,0,0]
#                 pos = (y==1).sum()
#                 neg = (y==0).sum()
#                 print(pos, neg)
#         x = data[:,:3]
#         y = data[:,3,0,0]

In [18]:
get_all_data(6,5,6,2)

100%|██████████| 100/100 [00:00<00:00, 85878.46it/s]


KeyboardInterrupt: 