**If sample and actions tables are already there, steps "sample" and "maneuvers" could be skipped.**

In [1]:
import numpy as np
import pandas as pd

from space_navigator.generator import Generator
from space_navigator.simulator import Simulator
from space_navigator.api import Environment
from space_navigator.agent.table_agent import TableAgent
from space_navigator.utils import read_environment, get_agent

from space_navigator.models.CE import CrossEntropy
from space_navigator.models.ES import EvolutionStrategies
from space_navigator.models.MCTS import DecisionTree
from space_navigator.models.baseline import Baseline

In [2]:
import os

n_situations = 10
step = 0.000001

models = {
    "CE": CrossEntropy,
    "ES": EvolutionStrategies,
    "MCTS": DecisionTree,
    "baseline": Baseline,
}

save_dir_env = "../../data/environments/generated_sample"
save_dir_maneuvers = "../../training/agents_tables/generated_sample"


if not os.path.exists(save_dir_env):
    os.makedirs(save_dir_env)
if not os.path.exists(save_dir_maneuvers):
    os.makedirs(save_dir_maneuvers)

# Sample

In [3]:
# generator parameters
start_time = 6600
end_time = 6600.1
n_debris = 2

In [4]:
# sample of environments
for i in range(n_situations):
    generator = Generator(start_time, end_time)
    generator.add_protected()
    for _ in range(n_debris):
        generator.add_debris()
    generator.save_env(save_dir_env + "/collision_{}.env".format(i))

# Maneuvers

In [5]:
# models and parameters
n_actions = 3 # for CE and ES
n_iterations = 100

train_params = {}
train_params["CE"] = {
    "n_iterations": n_iterations, "n_sessions": 30,
}
train_params["ES"] = {
    "iterations": n_iterations,
}
train_params["MCTS"] = {
    "n_iterations": n_iterations,
}
train_params["baseline"] = {
    "n_samples": n_iterations,
}

In [6]:
# training
from IPython.display import clear_output

for i in range(n_situations):
    env = read_environment(save_dir_env + "/collision_{}.env".format(i))
    for name in models:
        clear_output(True)
        print("env: {} of {}\nmodel: {}".format(i+1, n_situations, name))
        if name in ["CE", "ES"]:
            model = models[name](env, step, n_actions)
        else:
            model = models[name](env, step)
        model.train(**train_params[name])
        model.save_action_table(
            save_dir_maneuvers + "/collision_{}_{}.csv".format(i, name)
        )


# Analysis of results

Вопросы:

* как лучше представить результаты?
* добавить пороговые значение

In [14]:
from copy import copy

columns = ["without maneuvers"] + list(models.keys())
data = np.zeros((n_situations, len(columns)))
template = pd.DataFrame(data=data, columns=columns)

results_reward = copy(template)
results_probability = copy(template)
results_fuel_cons = copy(template)
results_deviation = copy(template)

In [15]:
from tqdm import trange
from IPython.display import clear_output

# simulations
collisions = []
for i in trange(n_situations):
    
    collisions.append("collision_{}".format(i))
    env_path = save_dir_env + "/" + collisions[-1] + ".env"
    env = read_environment(env_path)   
    
    for name in columns:
        clear_output(True)
        if name == "without maneuvers":
            agent = TableAgent()
        else:
            actions_path = save_dir_maneuvers + "/collision_{}_{}.csv".format(i, name)
            agent = get_agent("table", actions_path)        
            
        simulator = Simulator(agent, env, step)
        r = simulator.run()
        
        results_reward.loc[i, name] = r
        results_probability.loc[i, name] = env.get_total_collision_probability()
        results_fuel_cons.loc[i, name] = env.get_fuel_consumption()
        results_deviation.loc[i, name] = env.get_trajectory_deviation()
        
        env.reset()
        
results_reward.index = collisions
results_probability.index = collisions
results_fuel_cons.index = collisions
results_deviation.index = collisions

100%|██████████| 10/10 [00:01<00:00,  9.26it/s]


In [16]:
results_reward.head()

Unnamed: 0,without maneuvers,CE,ES,MCTS,baseline
collision_0,-93.260168,-4.090741,-55.849232,-17.865304,-1.135087
collision_1,-0.159315,-2.190899,-44.443494,-0.159315,-1.183781
collision_2,-310.345074,-145.821036,-21.430439,-109.784236,-5.046067
collision_3,-22.141288,-3.04391,-10.415201,-20.198665,-3.039867
collision_4,-170.846069,-83.23627,-8.576483,-170.846069,-3.590721


In [17]:
results_probability.head()

Unnamed: 0,without maneuvers,CE,ES,MCTS,baseline
collision_0,0.010226,0.0,0.0,0.002477,0.000129
collision_1,0.000816,0.000168,0.0,0.000816,0.000271
collision_2,0.031935,0.014918,4.390377e-09,0.011799,0.000342
collision_3,0.003114,0.00086,0.0,0.0029,8.3e-05
collision_4,0.017985,0.008676,0.0006959607,0.017985,9.1e-05


In [18]:
results_fuel_cons.head()

Unnamed: 0,without maneuvers,CE,ES,MCTS,baseline
collision_0,0.0,0.344011,3.386798,0.243528,0.050505
collision_1,0.0,0.532242,4.620355,0.0,0.050505
collision_2,0.0,0.463966,3.610272,0.129861,0.252525
collision_3,0.0,0.560499,5.925263,0.020978,0.151515
collision_4,0.0,0.350619,1.715002,0.0,0.151515


In [19]:
results_deviation.head()

Unnamed: 0,without maneuvers,CE,ES,MCTS,baseline
collision_0,0.0,3.74673,52.462434,1.856463,1.084418
collision_1,0.0,1.658414,39.823139,0.0,1.132596
collision_2,0.0,5.172324,17.820122,0.661201,4.792154
collision_3,0.0,2.235847,4.489938,0.179263,2.888247
collision_4,0.0,5.125747,6.813665,0.0,3.439093


In [30]:
# where RL is better than baseline
results_reward[
    (results_reward["baseline"] < results_reward["CE"])
    | (results_reward["baseline"] < results_reward["ES"])
    | (results_reward["baseline"] < results_reward["MCTS"])
]

Unnamed: 0,without maneuvers,CE,ES,MCTS,baseline
collision_1,-0.159315,-2.190899,-44.443494,-0.159315,-1.183781
collision_5,-6.7e-05,-3.15056,-9.543171,-6.7e-05,-1.054749
collision_6,-1.190747,-1.093645,-11.56487,-1.190747,-1.128734
collision_9,-287.341829,-6.580076,-12.323495,-3.003584,-10.329886
