**If sample and actions tables are already there, steps "sample" and "maneuvers" could be skipped.**

___
TODO:

* class Collision? to utils?

* create new notebook with a lot of model like "CE_collinear", "CE_reverse_complanar"...

* посмотреть разные пороги

* эксперименты с reward_function

* включить все модели

In [1]:
import os
import numpy as np
import pandas as pd

from space_navigator.generator import Generator
from space_navigator.simulator import Simulator
from space_navigator.api import Environment
from space_navigator.agent.table_agent import TableAgent
from space_navigator.utils import read_environment, get_agent

from space_navigator.models.CE import CrossEntropy
from space_navigator.models.ES import EvolutionStrategies
from space_navigator.models.MCTS import DecisionTree
from space_navigator.models.baseline import Baseline

from IPython.display import clear_output
from tqdm import trange
from pprint import pprint

In [2]:
n_situations = 20
step = 0.000001

models = {
    "CE": CrossEntropy,
#     "ES": EvolutionStrategies,
#     "MCTS": DecisionTree,
    "baseline": Baseline,
}

save_dir_env = "../../data/environments/generated_sample"
save_dir_maneuvers = "../../training/agents_tables/generated_sample"


if not os.path.exists(save_dir_env):
    os.makedirs(save_dir_env)
if not os.path.exists(save_dir_maneuvers):
    os.makedirs(save_dir_maneuvers)

In [3]:
collisions = {f"collision_{i}" : {} for i in range(n_situations)}
for name in collisions:
    collisions[name]["env_path"] = os.path.join(
        save_dir_env, name + ".env")
    collisions[name]["models_path"] = {}
    for model_name in models:
        collisions[name]["models_path"][model_name] = os.path.join(
            save_dir_maneuvers, f"{name}_{model_name}.env")

# Sample

In [4]:
# generator parameters
start_time = 6600
end_time = 6602
n_debris = 2

In [5]:
# sample of environments
for name in collisions:
    generator = Generator(start_time, end_time)
    generator.add_protected()
    for _ in range(n_debris):
        generator.add_debris()
    generator.save_env(
        collisions[name]["env_path"])

# Maneuvers

In [7]:
# models and parameters

train_params = {}
# попробовать немного n_s но большое число итераций (без сдвига и компланар)
train_params["CE"] = {
    "init": {
        "reverse": True,
        "first_maneuver_time":"early",
        "n_maneuvers": 2,
    },
    "train": {
        "n_iterations": 100,
        "n_sessions": 30, 
        "dV_angle": "complanar",
        "early_stopping": True,
    },
}
# train_params["ES"] = {
#     "init": {
#         "reverse": "True",
#     },
#     "iteration": {
#         "n_sessions": 200,
#     },
# }
# train_params["MCTS"] = {
#     "init": {},
#     "n_iterations": n_iterations,
# }
train_params["baseline"] = {
    "init": {
        "reverse": "True",
    },
    "train": {
        "n_sessions": 300,
    },
}

In [8]:
# training
for i, collision_name in enumerate(collisions):
    env = read_environment(collisions[collision_name]["env_path"])
    for model_name in models:
        clear_output(True)
        print("env: {} of {}\nmodel: {}".format(
            i+1, len(collisions), model_name))
        model = models[model_name](env, step, train_params[model_name]["init"])
        model.train(**train_params[model_name]["train"])
        model.save_action_table(
            collisions[collision_name]["models_path"][model_name])


  0%|          | 0/300 [00:00<?, ?it/s]

env: 20 of 20
model: baseline


100%|██████████| 300/300 [00:17<00:00, 17.06it/s]


# Results

In [9]:
def collision_info(env, agent, model_name):
    index = [
        "coll prob", "fuel (|dV|)",
        "dev a (m)", "dev e", "dev i (rad)",
        "dev W (rad)", "dev w (rad)", "dev M (rad)",
    ]
    v = model_name + " value"
    r = model_name + " reward"
    columns = [r, v]
    df = pd.DataFrame(index=index,columns=[v, r])
    simulator = Simulator(agent, env, step)
    simulator.run()
    
    df[v]["coll prob"] = env.get_total_collision_probability()
    df[v]["fuel (|dV|)"] = env.get_fuel_consumption()
    df[v][2:] = env.get_trajectory_deviation()
    reward_components = env.get_reward_components()
    df[r]["coll prob"] = reward_components["coll_prob"]
    df[r]["fuel (|dV|)"] = reward_components["fuel"]
    df[r][2:] = reward_components["traj_dev"]
    
    env.reset()
    return df

In [10]:
def env_thresholds(env):
    index = [
        "coll prob", "fuel (|dV|)",
        "dev a (m)", "dev e", "dev i (rad)",
        "dev W (rad)", "dev w (rad)", "dev M (rad)",
    ]
    data = [env.coll_prob_thr, env.fuel_cons_thr] + list(env.traj_dev_thr)
    df = pd.DataFrame(data, index,["threshold"])
#     df["threshold"].fillna(value="not taken", inplace=True)
    return df

In [11]:
for collision in collisions.values():
    env = read_environment(collision["env_path"])
    result_table = env_thresholds(env)
    empty_agent = TableAgent()
    result_table = result_table.join(
        collision_info(env, empty_agent, "without maneuvers"),
    )
    for model_name, model_path in collision["models_path"].items():
        agent = get_agent("table", model_path)        
        result_table = result_table.join(
            collision_info(env, agent, model_name)
        )
    collision["result_table"] = result_table



In [12]:
collisions["collision_0"]["result_table"]

Unnamed: 0,threshold,without maneuvers value,without maneuvers reward,CE value,CE reward,baseline value,baseline reward
coll prob,0.0001,0.010226,-912.342,8.00753e-08,-0.000800753,1.58675e-08,-0.000158675
fuel (|dV|),10.0,0.0,-0.0,0.0326906,-0.00326906,0.100334,-0.0100334
dev a (m),100.0,0.0,-0.0,9.71026,-0.0971026,17.7254,-0.177254
dev e,0.01,-0.0,-0.0,-1e-06,-0.0001,-1e-06,-0.0001
dev i (rad),0.01,0.0,-0.0,0.0,-0.0,0.0,-0.0
dev W (rad),0.01,0.0,-0.0,0.0,-0.0,0.0,-0.0
dev w (rad),0.01,0.0,-0.0,-0.000645,-0.0645,0.002342,-0.2342
dev M (rad),,-0.0,0.0,0.000616,0.0,-0.002214,0.0


# Analysis of results

### Rewards

In [13]:
model_names = ["without maneuvers"] + list(models)
results_reward = pd.DataFrame(index=model_names, columns=collisions)
for collision_name in results_reward:
    for model_name in model_names:
        r = np.sum(
            collisions[collision_name]["result_table"][model_name + " reward"]
        )
        results_reward.loc[model_name, collision_name] = r
results_reward = results_reward.T
results_reward.head()

Unnamed: 0,without maneuvers,CE,baseline
collision_0,-912.342,-0.165772,-0.421747
collision_1,-65.4681,-0.253552,-0.416452
collision_2,-2866.11,-0.932951,-3.12509
collision_3,-272.272,-0.149049,-0.978543
collision_4,-1610.61,-0.0855042,-11.6789


In [14]:
results_reward.head()

Unnamed: 0,without maneuvers,CE,baseline
collision_0,-912.342,-0.165772,-0.421747
collision_1,-65.4681,-0.253552,-0.416452
collision_2,-2866.11,-0.932951,-3.12509
collision_3,-272.272,-0.149049,-0.978543
collision_4,-1610.61,-0.0855042,-11.6789


In [15]:
# where RL is better than baseline
beaten_baseline = results_reward[
    (results_reward["baseline"] < results_reward["CE"])
#    | (results_reward["baseline"] < results_reward["ES"])
#    | (results_reward["baseline"] < results_reward["MCTS"])
]
print(
    f"RL better than Baseline in {beaten_baseline.shape[0]} out of {len(collisions)} cases."
)
beaten_baseline

RL better than Baseline in 19 out of 20 cases.


Unnamed: 0,without maneuvers,CE,baseline
collision_0,-912.342,-0.165772,-0.421747
collision_1,-65.4681,-0.253552,-0.416452
collision_2,-2866.11,-0.932951,-3.12509
collision_3,-272.272,-0.149049,-0.978543
collision_4,-1610.61,-0.0855042,-11.6789
collision_5,-0.394703,-0.0699706,-0.394703
collision_6,-83.7167,-0.0295319,-0.308108
collision_7,-363.039,-0.118988,-0.220543
collision_8,-304.607,-0.097092,-0.337395
collision_9,-2659.08,-0.169165,-0.282439


### Thresholds

In [16]:
def appropriate_models(result_table):
    """Returns name of model witch overcomes thresholds."""
    cols = [c for c in result_table.columns if c.split()[-1] == "value"]
    thr = result_table[['threshold']].fillna(value=9999).as_matrix()
    val = np.abs(result_table.loc[:, cols].as_matrix())
    idx = np.all(thr >= val, axis=0)
    return [' '.join(c.split()[:-1]) for i, c in enumerate(cols) if idx[i]]

In [17]:
n_overcomed_thresholds = 0
for collision in collisions.values():
    m = appropriate_models(collision["result_table"])
    if m:
        n_overcomed_thresholds += 1
print(
    f"The thresholds are overcomed in {beaten_baseline.shape[0]} out of {len(collisions)} cases."
)

The thresholds are overcomed in 19 out of 20 cases.


# Experiments

### Tuning using CE