___
TODO:

* create new notebook with a lot of models like "CE_collinear", "CE_reverse_complanar", "CE_complanar_after_collinear"...

* посмотреть разные пороги

* эксперименты с reward_function

In [1]:
import os
import numpy as np
import pandas as pd

from space_navigator.generator import Generator
from space_navigator.simulator import Simulator
from space_navigator.api import Environment
from space_navigator.agent.table_agent import TableAgent
from space_navigator.utils import read_environment, get_agent

from space_navigator.models.CE import CrossEntropy
from space_navigator.models.ES import EvolutionStrategies
from space_navigator.models.MCTS import DecisionTree
from space_navigator.models.baseline import Baseline

from IPython.display import clear_output
from tqdm import trange
from pprint import pprint

In [2]:
n_situations = 20
step = 0.000001

models = {
    "CE": CrossEntropy,
    "MCTS": DecisionTree,
    "baseline": Baseline,
}

save_dir_env = "../../data/environments/generated_sample"
save_dir_maneuvers = "../../training/agents_tables/generated_sample"


if not os.path.exists(save_dir_env):
    os.makedirs(save_dir_env)
if not os.path.exists(save_dir_maneuvers):
    os.makedirs(save_dir_maneuvers)

In [3]:
collisions = {f"collision_{i}" : {} for i in range(n_situations)}
for name in collisions:
    collisions[name]["env_path"] = os.path.join(
        save_dir_env, name + ".env")
    collisions[name]["models_path"] = {}
    for model_name in models:
        collisions[name]["models_path"][model_name] = os.path.join(
            save_dir_maneuvers, f"{name}_{model_name}.csv")

**If sample and actions tables are already there, steps "sample" and "maneuvers" could be skipped.**

# Sample

In [4]:
# generator parameters

# start_time = 6600
# end_time = 6602
# n_debris = 2

In [5]:
# sample of environments

# for name in collisions:
#     generator = Generator(start_time, end_time)
#     generator.add_protected()
#     for _ in range(n_debris):
#         generator.add_debris()
#     generator.save_env(
#         collisions[name]["env_path"])

# Maneuvers

In [6]:
# parameters

train_params = {}
train_params["CE"] = {
    "init": {
        "reverse": True,
        "first_maneuver_time": "early",
        "n_maneuvers": 2,
    },
    "train": {
        "n_iterations": 100,
        "n_sessions": 30, 
        "dV_angle": "complanar",
        "early_stopping": True,
    },
}
train_params["MCTS"] = {
    "init": {
        "max_time_to_req": 1,
    },
    "train": {
        "n_iterations": 100,
        "n_steps_ahead": 1,
    },
}
train_params["baseline"] = {
    "init": {
        "reverse": True,
    },
    "train": {
        "n_sessions": 1000,
    },
}

In [7]:
# training

# for i, collision_name in enumerate(collisions):
#     env = read_environment(collisions[collision_name]["env_path"])
#     for model_name in models:
#         clear_output(True)
#         print("env: {} of {}\nmodel: {}".format(
#             i+1, len(collisions), model_name))
#         model = models[model_name](env, step, **train_params[model_name]["init"])
#         model.train(**train_params[model_name]["train"])
#         model.save_action_table(
#             collisions[collision_name]["models_path"][model_name])

# Results

In [8]:
def collision_info(env, agent, model_name):
    index = [
        "coll prob", "fuel (|dV|)",
        "dev a (m)", "dev e", "dev i (rad)",
        "dev W (rad)", "dev w (rad)", "dev M (rad)",
    ]
    v = model_name + " value"
    r = model_name + " reward"
    columns = [r, v]
    df = pd.DataFrame(index=index,columns=[v, r])
    simulator = Simulator(agent, env, step)
    simulator.run()
    
    df[v]["coll prob"] = env.get_total_collision_probability()
    df[v]["fuel (|dV|)"] = env.get_fuel_consumption()
    df[v][2:] = env.get_trajectory_deviation()
    reward_components = env.get_reward_components()
    df[r]["coll prob"] = reward_components["coll_prob"]
    df[r]["fuel (|dV|)"] = reward_components["fuel"]
    df[r][2:] = reward_components["traj_dev"]
    
    env.reset()
    return df

In [9]:
def env_thresholds(env):
    index = [
        "coll prob", "fuel (|dV|)",
        "dev a (m)", "dev e", "dev i (rad)",
        "dev W (rad)", "dev w (rad)", "dev M (rad)",
    ]
    data = [env.coll_prob_thr, env.fuel_cons_thr] + list(env.traj_dev_thr)
    df = pd.DataFrame(data, index,["threshold"])
    return df

In [10]:
for collision in collisions.values():
    env = read_environment(collision["env_path"])
    result_table = env_thresholds(env)
    empty_agent = TableAgent()
    result_table = result_table.join(
        collision_info(env, empty_agent, "without maneuvers"),
    )
    for model_name, model_path in collision["models_path"].items():
        agent = get_agent("table", model_path)        
        result_table = result_table.join(
            collision_info(env, agent, model_name)
        )
    collision["result_table"] = result_table



In [11]:
collisions["collision_0"]["result_table"]

Unnamed: 0,threshold,without maneuvers value,without maneuvers reward,CE value,CE reward,MCTS value,MCTS reward,baseline value,baseline reward
coll prob,0.0001,0.010226,-912.342,0.0,-0.0,0.0,-0.0,1.86969e-07,-0.00186969
fuel (|dV|),10.0,0.0,-0.0,0.233197,-0.0233197,0.289525,-0.0289525,0.0900901,-0.00900901
dev a (m),100.0,0.0,-0.0,13.1463,-0.131463,75.4434,-0.754434,-15.9117,-0.159117
dev e,0.01,-0.0,-0.0,-1e-06,-0.0001,-2e-06,-0.0002,0.0,-0.0
dev i (rad),0.01,0.0,-0.0,3e-06,-0.0003,-3.1e-05,-0.0031,0.0,-0.0
dev W (rad),0.01,0.0,-0.0,-4e-06,-0.0004,-6.5e-05,-0.0065,0.0,-0.0
dev w (rad),0.01,0.0,-0.0,0.003559,-0.3559,0.01098,-1.882,-0.002103,-0.2103
dev M (rad),,-0.0,0.0,-0.003802,0.0,-0.011078,0.0,0.001988,0.0


# Analysis of results

### Rewards

In [12]:
model_names = ["without maneuvers"] + list(models)
results_reward = pd.DataFrame(index=model_names, columns=collisions)
for collision_name in results_reward:
    for model_name in model_names:
        r = np.sum(
            collisions[collision_name]["result_table"][model_name + " reward"]
        )
        results_reward.loc[model_name, collision_name] = r
results_reward = results_reward.T
results_reward.head()

Unnamed: 0,without maneuvers,CE,MCTS,baseline
collision_0,-912.342,-0.511483,-2.67519,-0.380296
collision_1,-65.4681,-0.653607,-5.39899,-0.293119
collision_2,-2866.11,-0.662706,-20.0759,-2.89897
collision_3,-272.272,-0.437669,-30.9589,-0.972916
collision_4,-1610.61,-0.52788,-0.979101,-11.594


In [13]:
# best model for collision situation
best_model = results_reward.idxmax(axis=1)
print("Best model:\n")
print(best_model)

Best model:

collision_0     baseline
collision_1     baseline
collision_2           CE
collision_3           CE
collision_4           CE
collision_5           CE
collision_6     baseline
collision_7     baseline
collision_8     baseline
collision_9           CE
collision_10    baseline
collision_11    baseline
collision_12    baseline
collision_13          CE
collision_14    baseline
collision_15    baseline
collision_16    baseline
collision_17    baseline
collision_18          CE
collision_19    baseline
dtype: object


In [14]:
# where RL is better than baseline
idx = best_model[best_model != "baseline"].index
beaten_baseline = results_reward.loc[idx]
print(
    f"RL better is than Baseline in {beaten_baseline.shape[0]} out of {len(collisions)} cases."
)
beaten_baseline

RL better is than Baseline in 7 out of 20 cases.


Unnamed: 0,without maneuvers,CE,MCTS,baseline
collision_2,-2866.11,-0.662706,-20.0759,-2.89897
collision_3,-272.272,-0.437669,-30.9589,-0.972916
collision_4,-1610.61,-0.52788,-0.979101,-11.594
collision_5,-0.394703,-0.112809,-0.394703,-0.394703
collision_9,-2659.08,-0.221765,-59.5613,-0.273986
collision_13,-104.042,-0.715547,-25.4164,-1.02039
collision_18,-1595.5,-1.07864,-87.2993,-28.2579


### Thresholds

In [15]:
def appropriate_models(result_table):
    """Returns name of model witch overcomes thresholds."""
    cols = [c for c in result_table.columns if c.split()[-1] == "value"]
    thr = result_table[['threshold']].fillna(value=9999).as_matrix()
    val = np.abs(result_table.loc[:, cols].as_matrix())
    idx = np.all(thr >= val, axis=0)
    return [' '.join(c.split()[:-1]) for i, c in enumerate(cols) if idx[i]]

In [16]:
print("Thresholds are overcomed:\n")
n_overcomed_thresholds = 0
for name, collision in collisions.items():
    m = appropriate_models(collision["result_table"])
    print(f"{name}: {m}")
    if m:
        n_overcomed_thresholds += 1
print(
    f"\nThe thresholds are overcomed in {n_overcomed_thresholds} out of {len(collisions)} cases."
)

Thresholds are overcomed:

collision_0: ['CE', 'baseline']
collision_1: ['CE', 'baseline']
collision_2: ['CE']
collision_3: ['CE', 'baseline']
collision_4: ['CE', 'MCTS']
collision_5: ['without maneuvers', 'CE', 'MCTS', 'baseline']
collision_6: ['CE', 'baseline']
collision_7: ['CE', 'baseline']
collision_8: ['MCTS', 'baseline']
collision_9: ['CE', 'baseline']
collision_10: ['MCTS', 'baseline']
collision_11: ['baseline']
collision_12: []
collision_13: ['CE', 'baseline']
collision_14: ['baseline']
collision_15: ['MCTS', 'baseline']
collision_16: ['CE', 'baseline']
collision_17: ['baseline']
collision_18: ['CE']
collision_19: ['CE', 'baseline']

The thresholds are overcomed in 19 out of 20 cases.


# Experiments - tuning using CE

In [17]:
model_name = "baseline_tuned_by_CE"
for collision_name, collision in collisions.items():
    collision["models_path"][model_name] = os.path.join(
        save_dir_maneuvers, f"{collision_name}_{model_name}.csv"
    )

### For one collision situation to observe the process

In [18]:
# parameters
CE_tuning_params = {
    "init": {
        "reverse": True,
        "first_maneuver_time": "early",
        "n_maneuvers": 2,
        "percentile": 90,
    },
    "train": {
        "n_iterations": 100,
        "print_out": True,
        "n_sessions": 30, 
        "sigma_decay": 0.9,
        "percentile_growth": 1.01,
        "dV_angle": "complanar",
        "early_stopping": True,
    },
}

In [19]:
# for collision_0 

# env = read_environment(collisions["collision_0"]["env_path"])
# CE_model = models["CE"](env, step, **CE_tuning_params["init"])
# CE_model.set_action_table_from_path(
#     collisions["collision_0"]["models_path"]["baseline"]
# )
# CE_model.train(**CE_tuning_params["train"])
# CE_model.save_action_table(collisions["collision_0"]["models_path"][model_name])

### for others to observe the results

In [20]:
CE_tuning_params["train"]["print_out"] = False

In [21]:
# for collision_name, collision in collisions.items():
#     if collision_name != "collision_0":
#         env = read_environment(collision["env_path"])
#         clear_output(True)
#         print(collision_name)
#         CE_model = models["CE"](env, step, **CE_tuning_params["init"])
#         CE_model.set_action_table_from_path(
#             collision["models_path"]["baseline"]
#         )
#         CE_model.train(**CE_tuning_params["train"])
#         CE_model.save_action_table(collision["models_path"][model_name])

### Results

In [22]:
# add to result table
for collision in collisions.values():
    env = read_environment(collision["env_path"])
    agent = get_agent("table", collision["models_path"][model_name])
    collision["result_table"][[f"{model_name} value", f"{model_name} reward"]] = collision_info(
        env, agent, "model_name")

In [23]:
collisions["collision_0"]["result_table"]

Unnamed: 0,threshold,without maneuvers value,without maneuvers reward,CE value,CE reward,MCTS value,MCTS reward,baseline value,baseline reward,baseline_tuned_by_CE value,baseline_tuned_by_CE reward
coll prob,0.0001,0.010226,-912.342,0.0,-0.0,0.0,-0.0,1.86969e-07,-0.00186969,5.89408e-07,-0.00589408
fuel (|dV|),10.0,0.0,-0.0,0.233197,-0.0233197,0.289525,-0.0289525,0.0900901,-0.00900901,0.0148666,-0.00148666
dev a (m),100.0,0.0,-0.0,13.1463,-0.131463,75.4434,-0.754434,-15.9117,-0.159117,-10.2029,-0.102029
dev e,0.01,-0.0,-0.0,-1e-06,-0.0001,-2e-06,-0.0002,0.0,-0.0,1e-06,-0.0001
dev i (rad),0.01,0.0,-0.0,3e-06,-0.0003,-3.1e-05,-0.0031,0.0,-0.0,0.0,-0.0
dev W (rad),0.01,0.0,-0.0,-4e-06,-0.0004,-6.5e-05,-0.0065,0.0,-0.0,-0.0,-0.0
dev w (rad),0.01,0.0,-0.0,0.003559,-0.3559,0.01098,-1.882,-0.002103,-0.2103,-0.0,-0.0
dev M (rad),,-0.0,0.0,-0.003802,0.0,-0.011078,0.0,0.001988,0.0,-4e-06,0.0


### Analysis of results

##### Rewards

In [24]:
model_names = ["without maneuvers"] + list(models) + [model_name]
results_reward = pd.DataFrame(index=model_names, columns=collisions)
for collision_name in results_reward:
    for model_name in model_names:
        r = np.sum(
            collisions[collision_name]["result_table"][model_name + " reward"]
        )
        results_reward.loc[model_name, collision_name] = r
results_reward = results_reward.T
results_reward.head()

Unnamed: 0,without maneuvers,CE,MCTS,baseline,baseline_tuned_by_CE
collision_0,-912.342,-0.511483,-2.67519,-0.380296,-0.10951
collision_1,-65.4681,-0.653607,-5.39899,-0.293119,-0.293119
collision_2,-2866.11,-0.662706,-20.0759,-2.89897,-0.525088
collision_3,-272.272,-0.437669,-30.9589,-0.972916,-0.108728
collision_4,-1610.61,-0.52788,-0.979101,-11.594,-0.0788089


In [25]:
# best model for collision situation
best_model = results_reward.idxmax(axis=1)
print("Best model:\n")
print(best_model)

Best model:

collision_0     baseline_tuned_by_CE
collision_1                 baseline
collision_2     baseline_tuned_by_CE
collision_3     baseline_tuned_by_CE
collision_4     baseline_tuned_by_CE
collision_5     baseline_tuned_by_CE
collision_6     baseline_tuned_by_CE
collision_7     baseline_tuned_by_CE
collision_8     baseline_tuned_by_CE
collision_9     baseline_tuned_by_CE
collision_10    baseline_tuned_by_CE
collision_11    baseline_tuned_by_CE
collision_12    baseline_tuned_by_CE
collision_13    baseline_tuned_by_CE
collision_14    baseline_tuned_by_CE
collision_15    baseline_tuned_by_CE
collision_16    baseline_tuned_by_CE
collision_17    baseline_tuned_by_CE
collision_18    baseline_tuned_by_CE
collision_19                baseline
dtype: object


In [26]:
# where RL is better than baseline
idx = best_model[best_model != "baseline"].index
beaten_baseline = results_reward.loc[idx]
print(
    f"RL better is than Baseline in {beaten_baseline.shape[0]} out of {len(collisions)} cases."
)
beaten_baseline

RL better is than Baseline in 18 out of 20 cases.


Unnamed: 0,without maneuvers,CE,MCTS,baseline,baseline_tuned_by_CE
collision_0,-912.342,-0.511483,-2.67519,-0.380296,-0.10951
collision_2,-2866.11,-0.662706,-20.0759,-2.89897,-0.525088
collision_3,-272.272,-0.437669,-30.9589,-0.972916,-0.108728
collision_4,-1610.61,-0.52788,-0.979101,-11.594,-0.0788089
collision_5,-0.394703,-0.112809,-0.394703,-0.394703,-0.0500911
collision_6,-83.7167,-0.934126,-55.2713,-0.307321,-0.0283811
collision_7,-363.039,-1.12661,-12.6959,-0.190779,-0.110487
collision_8,-304.607,-4.2767,-0.909025,-0.321205,-0.0926842
collision_9,-2659.08,-0.221765,-59.5613,-0.273986,-0.169156
collision_10,-1759.78,-5.1454,-0.927768,-0.175366,-0.0770405


In [27]:
# where baseline tuned by CE is the best
idx = best_model[best_model == "baseline_tuned_by_CE"].index
tuned_baseline_is_best = results_reward.loc[idx]
print(
    f"Baseline tuned by CE is better than others in {tuned_baseline_is_best.shape[0]} out of {len(collisions)} cases."
)
tuned_baseline_is_best

Baseline tuned by CE is better than others in 18 out of 20 cases.


Unnamed: 0,without maneuvers,CE,MCTS,baseline,baseline_tuned_by_CE
collision_0,-912.342,-0.511483,-2.67519,-0.380296,-0.10951
collision_2,-2866.11,-0.662706,-20.0759,-2.89897,-0.525088
collision_3,-272.272,-0.437669,-30.9589,-0.972916,-0.108728
collision_4,-1610.61,-0.52788,-0.979101,-11.594,-0.0788089
collision_5,-0.394703,-0.112809,-0.394703,-0.394703,-0.0500911
collision_6,-83.7167,-0.934126,-55.2713,-0.307321,-0.0283811
collision_7,-363.039,-1.12661,-12.6959,-0.190779,-0.110487
collision_8,-304.607,-4.2767,-0.909025,-0.321205,-0.0926842
collision_9,-2659.08,-0.221765,-59.5613,-0.273986,-0.169156
collision_10,-1759.78,-5.1454,-0.927768,-0.175366,-0.0770405


### Thresholds

In [28]:
print("Thresholds are overcomed:\n")
n_overcomed_thresholds = 0
for name, collision in collisions.items():
    m = appropriate_models(collision["result_table"])
    print(f"{name}: {m}")
    if m:
        n_overcomed_thresholds += 1
print(
    f"\nThe thresholds are overcomed in {n_overcomed_thresholds} out of {len(collisions)} cases."
)

Thresholds are overcomed:

collision_0: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_1: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_2: ['CE', 'baseline_tuned_by_CE']
collision_3: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_4: ['CE', 'MCTS', 'baseline_tuned_by_CE']
collision_5: ['without maneuvers', 'CE', 'MCTS', 'baseline', 'baseline_tuned_by_CE']
collision_6: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_7: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_8: ['MCTS', 'baseline', 'baseline_tuned_by_CE']
collision_9: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_10: ['MCTS', 'baseline', 'baseline_tuned_by_CE']
collision_11: ['baseline', 'baseline_tuned_by_CE']
collision_12: ['baseline_tuned_by_CE']
collision_13: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_14: ['baseline', 'baseline_tuned_by_CE']
collision_15: ['MCTS', 'baseline', 'baseline_tuned_by_CE']
collision_16: ['CE', 'baseline', 'baseline_tuned_by_CE']
collision_17: ['baseline'