**If sample and actions tables are already there, steps "sample" and "maneuvers" could be skipped.**

In [18]:
# TODO - create new notebook with a lot of model
# like "CE_collinear", "CE_reverse_complanar"...
import os
import numpy as np
import pandas as pd

from space_navigator.generator import Generator
from space_navigator.simulator import Simulator
from space_navigator.api import Environment
from space_navigator.agent.table_agent import TableAgent
from space_navigator.utils import read_environment, get_agent

from space_navigator.models.CE import CrossEntropy
from space_navigator.models.ES import EvolutionStrategies
from space_navigator.models.MCTS import DecisionTree
from space_navigator.models.baseline import Baseline

In [44]:
n_situations = 20
step = 0.000001

models = {
    "CE": CrossEntropy,
#     "ES": EvolutionStrategies,
#     "MCTS": DecisionTree,
    "baseline": Baseline,
}

save_dir_env = "../../data/environments/generated_sample"
save_dir_maneuvers = "../../training/agents_tables/generated_sample"


if not os.path.exists(save_dir_env):
    os.makedirs(save_dir_env)
if not os.path.exists(save_dir_maneuvers):
    os.makedirs(save_dir_maneuvers)

# Sample

In [45]:
# generator parameters
start_time = 6600
end_time = 6602
n_debris = 2

In [46]:
# sample of environments
for i in range(n_situations):
    generator = Generator(start_time, end_time)
    generator.add_protected()
    for _ in range(n_debris):
        generator.add_debris()
    generator.save_env(save_dir_env + "/collision_{}.env".format(i))

# Maneuvers

In [47]:
# models and parameters
n_iterations = 100

train_params = {}
# попробовать немного n_s но большое число итераций (без сдвига и компланар)
train_params["CE"] = {
    "init": {
        "reverse": True,
        "first_maneuver_time":"early",
        "n_maneuvers": 2,
    },
    "iteration": {
        "n_sessions": 30, 
        "dV_angle": "complanar",
        "early_stopping": True,
    },
}
# train_params["ES"] = {
#     "init": {
#         "reverse": "True",
#     },
#     "iteration": {
#         "n_sessions": 200,
#     },
# }
# train_params["MCTS"] = {
#     "init": {},
#     "n_iterations": n_iterations,
# }
train_params["baseline"] = {
    "init": {
        "reverse": "True",
    },
    "iteration": {
        "n_sessions": 300,
    },
}

In [48]:
# training
from IPython.display import clear_output

for i in range(n_situations):
    env = read_environment(save_dir_env + "/collision_{}.env".format(i))
    for name in models:
        clear_output(True)
        print("env: {} of {}\nmodel: {}".format(i+1, n_situations, name))
        model = models[name](env, step, train_params[name]["init"])
        model.train(**train_params[name]["iteration"])
        model.save_action_table(
            save_dir_maneuvers + "/collision_{}_{}.csv".format(i, name)
        )



  0%|          | 0/300 [00:00<?, ?it/s][A

env: 20 of 20
model: baseline



  1%|          | 2/300 [00:00<00:18, 16.25it/s][A
  1%|▏         | 4/300 [00:00<00:18, 16.14it/s][A
  2%|▏         | 6/300 [00:00<00:17, 16.42it/s][A
  3%|▎         | 8/300 [00:00<00:17, 16.59it/s][A
  3%|▎         | 10/300 [00:00<00:17, 16.75it/s][A
  4%|▍         | 12/300 [00:00<00:17, 16.88it/s][A
  5%|▍         | 14/300 [00:00<00:16, 16.85it/s][A
  5%|▌         | 16/300 [00:00<00:16, 16.77it/s][A
  6%|▌         | 18/300 [00:01<00:17, 16.55it/s][A
  7%|▋         | 20/300 [00:01<00:16, 16.53it/s][A
  7%|▋         | 22/300 [00:01<00:16, 16.48it/s][A
  8%|▊         | 24/300 [00:01<00:16, 16.38it/s][A
  9%|▊         | 26/300 [00:01<00:16, 16.43it/s][A
  9%|▉         | 28/300 [00:01<00:16, 16.47it/s][A
 10%|█         | 30/300 [00:01<00:16, 16.45it/s][A
 11%|█         | 32/300 [00:01<00:16, 16.47it/s][A
 11%|█▏        | 34/300 [00:02<00:16, 16.54it/s][A
 12%|█▏        | 36/300 [00:02<00:15, 16.57it/s][A
 13%|█▎        | 38/300 [00:02<00:15, 16.59it/s][A
 13%|█▎        

# Analysis of results

Вопросы:

* как лучше представить результаты?
* добавить пороговые значение

In [49]:
columns = ["without maneuvers"] + list(models)
data = np.zeros((n_situations, len(columns)))
template = pd.DataFrame(data=data, columns=columns)

results_reward = template.copy()
results_probability = template.copy()
results_fuel_cons = template.copy()
#results_deviation = template.copy

In [50]:
from tqdm import trange
from IPython.display import clear_output

# simulations
collisions = []
for i in trange(n_situations):
    
    collisions.append("collision_{}".format(i))
    env_path = save_dir_env + "/" + collisions[-1] + ".env"
    env = read_environment(env_path)   
    
    for name in columns:
        clear_output(True)
        if name == "without maneuvers":
            agent = TableAgent()
        else:
            actions_path = save_dir_maneuvers + "/collision_{}_{}.csv".format(i, name)
            agent = get_agent("table", actions_path)        
            
        simulator = Simulator(agent, env, step)
        r = simulator.run()
        
        results_reward.loc[i, name] = r
        results_probability.loc[i, name] = env.get_total_collision_probability()
        results_fuel_cons.loc[i, name] = env.get_fuel_consumption()
        #results_deviation.loc[i, name] = env.get_trajectory_deviation()
        
        env.reset()
        
results_reward.index = collisions
results_probability.index = collisions
results_fuel_cons.index = collisions
#results_deviation.index = collisions


100%|██████████| 20/20 [00:04<00:00,  4.62it/s][A
[A

In [51]:
results_reward.head()

Unnamed: 0,without maneuvers,CE,baseline
collision_0,-1477.421295,-0.858281,-1.540579
collision_1,-2477.843153,-0.051373,-0.198696
collision_2,-388.749433,-0.167346,-0.279617
collision_3,-1407.413053,-0.053533,-0.104706
collision_4,-1308.234854,-0.16344,-0.13934


In [52]:
results_probability.head()

Unnamed: 0,without maneuvers,CE,baseline
collision_0,0.016505,0.0,1.028058e-05
collision_1,0.02762,3.656303e-08,1.919724e-06
collision_2,0.004408,0.0,5.118139e-11
collision_3,0.015727,7.925582e-10,3.83421e-07
collision_4,0.014625,0.0,1.851633e-08


In [53]:
results_fuel_cons.head()

Unnamed: 0,without maneuvers,CE,baseline
collision_0,0.0,0.155824,0.234114
collision_1,0.0,0.442681,0.83612
collision_2,0.0,1.66985,0.033445
collision_3,0.0,0.498178,0.635452
collision_4,0.0,0.468232,0.234114


In [54]:
#results_deviation.head()

In [59]:
# where RL is better than baseline
beaten_baseline = results_reward[
    (results_reward["baseline"] < results_reward["CE"])
#    | (results_reward["baseline"] < results_reward["ES"])
#    | (results_reward["baseline"] < results_reward["MCTS"])
]
beaten_baseline

Unnamed: 0,without maneuvers,CE,baseline
collision_0,-1477.421295,-0.858281,-1.540579
collision_1,-2477.843153,-0.051373,-0.198696
collision_2,-388.749433,-0.167346,-0.279617
collision_3,-1407.413053,-0.053533,-0.104706
collision_5,-114.211298,-11.982464,-85.423511
collision_6,-80.954322,-0.614103,-0.820447
collision_7,-5.685711,-0.062223,-0.100794
collision_8,-2618.303212,-0.185197,-0.453718
collision_9,-2923.111121,-0.222433,-0.240118
collision_10,-8.007183,-0.245315,-0.390508


In [56]:
def print_collision_model_info(collision, model=None):
    env_path = os.path.join(save_dir_env,collision + ".env")
    env = read_environment(env_path)
    if model:
        actions_path = actions_path = os.path.join(
            save_dir_maneuvers, collision + "_" + model + ".csv")
        agent = get_agent("table", actions_path)         
    else:
        agent = TableAgent()
    simulator = Simulator(agent, env, step)
    simulator.run(print_out=True)

In [57]:
print_collision_model_info("collision_0", "CE")

Simulation started.

Start time: 6600.0 	 End time: 6602.0 	 Simulation step:1e-06

Protected SpaceObject:
Planet Name: PROTECTED
Own gravity parameter: 0.10000000000000001
Central body gravity parameter: 398600441800000
Planet radius: 38.781791315186389
Planet safe radius: 38.781791315186389
Keplerian planet elements: 
Semi major axis (AU): 5.1690231882481188e-05
Eccentricity: 0.00059492203509212203
Inclination (deg.): 72.665986782873517
Big Omega (deg.): 21.991823164589434
Small omega (deg.): 276.35543266385383
Mean anomaly (deg.): 310.54494135543132
Elements reference epoch: 2018-Jan-26 00:00:00
Ephemerides type: Keplerian
r at ref. = [-4272763.2221870366, -3537608.0823124615, -5383122.6400755178]
v at ref. = [5408.5032851461456, 606.62585769039651, -4686.8986270615278]

Debris objects:

Planet Name: DEBRIS0
Own gravity parameter: 0.10000000000000001
Central body gravity parameter: 398600441800000
Planet radius: 0.74078715601596756
Planet safe radius: 0.74078715601596756
Keplerian p

# Tuning using CE