**If sample and actions tables are already there, steps "sample" and "maneuvers" could be skipped.**

In [1]:
import numpy as np
import pandas as pd

from space_navigator.generator import Generator
from space_navigator.simulator import Simulator
from space_navigator.api import Environment
from space_navigator.agent.table_agent import TableAgent
from space_navigator.utils import read_environment, get_agent

from space_navigator.models.CE import CrossEntropy
from space_navigator.models.ES import EvolutionStrategies
from space_navigator.models.MCTS import DecisionTree
from space_navigator.models.baseline import Baseline

In [5]:
import os

n_situations = 10
step = 0.000001

# TODO - add ES
models = {
    "CE": CrossEntropy,
    "MCTS": DecisionTree,
    "baseline": Baseline,
}

save_dir_env = "../../data/environments/generated_sample"
save_dir_maneuvers = "../../training/agents_tables/generated_sample"


if not os.path.exists(save_dir_env):
    os.makedirs(save_dir_env)
if not os.path.exists(save_dir_maneuvers):
    os.makedirs(save_dir_maneuvers)

# Sample

In [3]:
# generator parameters
start_time = 6600
end_time = 6600.1
n_debris = 2

In [4]:
# sample of environments
for i in range(n_situations):
    generator = Generator(start_time, end_time)
    generator.add_protected()
    for _ in range(n_debris):
        generator.add_debris()
    generator.save_env(save_dir_env + "/collision_{}.env".format(i))

# Maneuvers

In [5]:
# models and parameters
n_actions = 3 # for CE and ES

train_params = {}
train_params["CE"] = {
    "n_iterations": 50, "n_sessions": 30,
}
train_params["MCTS"] = {
    "n_iterations": 100
}
train_params["baseline"] = {
    "n_samples": 100
}

In [6]:
# training
from IPython.display import clear_output

for i in range(n_situations):
    env = read_environment(save_dir_env + "/collision_{}.env".format(i))
    for name in models:
        clear_output(True)
        print("env: {} of {}\nmodel: {}".format(i+1, n_situations, name))
        if name in ["CE", "ES"]:
            model = models[name](env, step, n_actions)
        else:
            model = models[name](env, step)
        model.train(**train_params[name])
        model.save_action_table(
            save_dir_maneuvers + "/collision_{}_{}.csv".format(i, name)
        )


 10%|█         | 10/100 [00:00<00:00, 93.90it/s]

env: 10 of 10
model: baseline


100%|██████████| 100/100 [00:01<00:00, 80.74it/s]


# Analysis of results

Вопросы:

* как лучше представить результаты?
* добавить пороговые значение

In [6]:
from copy import copy

columns = ["without maneuvers"] + list(models.keys())
data = np.zeros((n_situations, len(columns)))
template = pd.DataFrame(data=data, columns=columns)

results_reward = copy(template)
results_probability = copy(template)
results_fuel_cons = copy(template)
results_deviation = copy(template)

In [7]:
from tqdm import trange

# simulations
collisions = []
for i in trange(n_situations):
    
    collisions.append("collision_{}".format(i))
    env_path = save_dir_env + "/" + collisions[-1] + ".env"
    env = read_environment(env_path)   
    
    for name in columns:
        
        if name == "without maneuvers":
            agent = TableAgent()
        else:
            actions_path = save_dir_maneuvers + "/collision_{}_{}.csv".format(i, name)
            agent = get_agent("table", actions_path)
            
        simulator = Simulator(agent, env, step)
        r = simulator.run()
        
        results_reward.loc[i, name] = r
        results_probability.loc[i, name] = env.get_total_collision_probability()
        results_fuel_cons.loc[i, name] = env.get_fuel_consumption()
        results_deviation.loc[i, name] = env.get_trajectory_deviation()
        
        env.reset()
        
results_reward.index = collisions
results_probability.index = collisions
results_fuel_cons.index = collisions
results_deviation.index = collisions

100%|██████████| 10/10 [00:00<00:00, 12.41it/s]


In [8]:
results_reward.head()

Unnamed: 0,without maneuvers,CE,MCTS,baseline
collision_0,-93.260168,-4.706667,-1.335294,-0.138622
collision_1,-0.159315,-2.290528,-0.159315,-0.116179
collision_2,-310.345074,-1.999601,-0.944494,-0.515042
collision_3,-22.141288,-4.180831,-1.592509,-0.354784
collision_4,-170.846069,-5.003131,-1.423691,-0.304328


In [9]:
results_probability.head()

Unnamed: 0,without maneuvers,CE,MCTS,baseline
collision_0,0.010226,0.0,1.5e-05,0.000121
collision_1,0.000816,9.852574e-06,0.000816,0.000265
collision_2,0.031935,1.855183e-13,7e-06,0.000342
collision_3,0.003114,0.0,7.9e-05,8.3e-05
collision_4,0.017985,0.0,3e-06,9.1e-05


In [10]:
results_fuel_cons.head()

Unnamed: 0,without maneuvers,CE,MCTS,baseline
collision_0,0.0,2.444817,1.243966,0.10101
collision_1,0.0,1.425698,0.0,0.10101
collision_2,0.0,1.664654,0.782375,0.505051
collision_3,0.0,1.29745,1.388969,0.30303
collision_4,0.0,1.605035,1.40003,0.30303


In [11]:
results_deviation.head()

Unnamed: 0,without maneuvers,CE,MCTS,baseline
collision_0,0.0,2.26185,0.091275,0.03746
collision_1,0.0,0.86478,0.0,0.014527
collision_2,0.0,0.334901,0.162071,0.008602
collision_3,0.0,2.883381,0.20344,0.051649
collision_4,0.0,3.398096,0.023615,0.001185
