In [48]:
import pandas as pd
import numpy as np
import pykep as pk
import tqdm

from simulator import Simulator, read_space_objects
from api import Environment, MAX_FUEL_CONSUMPTION
from agent import TableAgent as Agent

from copy import copy

np.random.seed(0)


In [49]:
start_time = 6599.95
step = 0.0001
end_time = 6600.05

n_actions = 3
time_space = np.arange(start_time, end_time, step)

osc = read_space_objects("data/collision.osc", "osc")
protected = osc[0]
debris = [osc[1]]
start_time_mjd2000 = pk.epoch(start_time, "mjd2000")
max_fuel_cons = MAX_FUEL_CONSUMPTION
fuel_level = protected.get_fuel()

In [50]:
action_table = np.genfromtxt("data/action_table.csv", delimiter=',', skip_header=True)
agent = Agent(action_table)

def generate_session(protected, debris, start_time, agent):
    env = Environment(copy(protected), copy(debris), start_time)
    simulator = Simulator(agent, env, update_r_p_step=None, print_out=False)
    # reward
    r = simulator.run(end_time, step, visualize=False)
    return r

In [51]:
def get_random_on_interval(space, num, replace=False):
    result = np.sort(np.random.choice(space, num, replace))
    result = result - space[0]
    result[1:] = result[1:] - result[:-1]
    return result

def get_random_time_to_req(time_space, n_actions, last_nan=True):
    if last_nan:
        n_actions -= 1
    time_to_req = get_random_on_interval(time_space, n_actions)
    if last_nan:
        time_to_req = np.append(time_to_req, np.nan)
    return time_to_req

In [52]:
np.array([1,2,3]).reshape((-1,1))

array([[1],
       [2],
       [3]])

In [53]:
def get_random_action_table(n_actions, time_space, fuel_level, max_fuel_cons, n_tables):
    # TODO - first dV=0? like just observation?
    action_tables = np.empty((n_tables, n_actions, 4))
    for j in range(n_tables):
        action_table = np.zeros((n_actions, 3))
        total_fuel_cons = np.random.uniform(fuel_level)
        for i in range(n_actions):
            fuel_cons = min(np.random.uniform(high = total_fuel_cons), max_fuel_cons)
            total_fuel_cons -= fuel_cons
            dV = get_random_on_interval(np.linspace(0, fuel_cons), 3, True) - fuel_cons/2
            action_table[i] = dV
        idx = np.arange(n_actions)
        np.random.shuffle(idx)
        time_to_req = get_random_time_to_req(time_space, n_actions).reshape((-1,1))
        action_table = np.hstack((action_table[idx], time_to_req))
        action_tables[j] = action_table
    return action_tables

# act = get_random_action_table(n_actions, time_space, fuel_level, max_fuel_cons)
# np.sum(np.abs(act[:, :-1]))
# other statistics tests

In [54]:
action_table = get_random_action_table(n_actions, time_space, fuel_level, max_fuel_cons, 1)[0]
agent = Agent(action_table)
generate_session(protected, debris, start_time_mjd2000, agent)

6.6777152038264393

In [55]:
action_table

array([[-1.58807926, -1.80967171,  0.84943774,  0.0027    ],
       [-0.04361632, -0.28039064, -0.14331077,  0.0778    ],
       [-0.35528243, -0.20409842, -0.18898001,         nan]])

In [45]:
# training loop
from tqdm import tqdm

n_sessions = 2
n_elite_sessions = 1
log = []

action_tables = get_random_action_table(n_actions, time_space, fuel_level, max_fuel_cons, n_sessions)

for i in tqdm(range(2)):
    rewards = []
    for at in action_tables:
        agent = Agent(at)
        rewards.append(generate_session(protected, debris, start_time_mjd2000, agent))
    # select_elites
    elite_indices = np.argsort(rewards)[-n_elite_sessions:]
    elite_action_tables = action_tables[elite_indices]
    best_action_table = action_tables[-1]
#     show_progress()
    action_tables = []

 50%|█████     | 1/2 [00:16<00:16, 16.38s/it]

[7.7406609151384025, 6.6650090839584042]
[0]
[]
[]


TypeError: only integer scalar arrays can be converted to a scalar index