In [1]:
import os
import numpy as np
import pandas as pd

from space_navigator.generator import Generator
from space_navigator.simulator import Simulator
from space_navigator.api import Environment
from space_navigator.agent.table_agent import TableAgent
from space_navigator.utils import read_environment, get_agent

from space_navigator.models.CE import CrossEntropy
from space_navigator.models.MCTS import DecisionTree
from space_navigator.models.collinear_GS import CollinearGridSearch
from space_navigator.models.baseline import Baseline

from IPython.display import clear_output
from tqdm import trange
from pprint import pprint

In [2]:
n_situations = 20
step = 0.000001

models = {
    "baseline": Baseline,
    "CE": CrossEntropy,
    "MCTS": DecisionTree,
    "collinear_GS": CollinearGridSearch,
}

save_dir_env = "../../../data/environments/generated_sample_1_day"
save_dir_maneuvers = "../../../training/agents_tables/generated_sample_1_day_without_reverse"


if not os.path.exists(save_dir_env):
    os.makedirs(save_dir_env)
if not os.path.exists(save_dir_maneuvers):
    os.makedirs(save_dir_maneuvers)

In [3]:
collisions = {f"collision_{i}" : {} for i in range(n_situations)}
for name in collisions:
    collisions[name]["env_path"] = os.path.join(
        save_dir_env, name + ".env")
    collisions[name]["models_path"] = {}
    for model_name in models:
        collisions[name]["models_path"][model_name] = os.path.join(
            save_dir_maneuvers, f"{name}_{model_name}.csv")

**If sample and actions tables are already there, steps "sample" and "maneuvers" could be skipped.**

# Sample

In [4]:
# generator parameters

# start_time = 6600
# end_time = 6601
# n_debris = 3

In [5]:
# sample of environments

# for name in collisions:
#     generator = Generator(start_time, end_time)
#     generator.add_protected()
#     for _ in range(n_debris):
#         generator.add_debris()
#     generator.save_env(
#         collisions[name]["env_path"])

# Maneuvers

In [6]:
# parameters

train_params = {}
train_params["baseline"] = {
    "init": {
        "reverse": False,
    },
    "train": {
        "n_sessions": 1000,
    },
}
train_params["CE"] = {
    "init": {
        "reverse": False,
        "first_maneuver_time": "early",
        "n_maneuvers": 2,
    },
    "train": {
        "n_iterations": 100,
        "n_sessions": 30, 
        "dV_angle": "complanar",
        "early_stopping": True,
    },
}
train_params["MCTS"] = {
    "init": {
        "max_time_to_req": 1,
    },
    "train": {
        "n_iterations": 100,
        "n_steps_ahead": 1,
    },
}
train_params["collinear_GS"] = {
    "init": {
        "reverse": False,
    },
    "train": {
        "n_sessions": 1000,
    },
}

In [7]:
# training

# print_out = False

# for i, collision_name in enumerate(collisions):
#     env = read_environment(collisions[collision_name]["env_path"])
#     for model_name in models:
#         clear_output(True)
#         print("env: {} of {}\nmodel: {}".format(
#             i+1, len(collisions), model_name))
#         model = models[model_name](env, step, **train_params[model_name]["init"])
#         model.train(print_out=print_out, **train_params[model_name]["train"])
#         model.save_action_table(
#             collisions[collision_name]["models_path"][model_name])

# Results

In [8]:
def collision_info(env, agent, model_name):
    index = [
        "coll prob", "fuel (|dV|)",
        "dev a (m)", "dev e", "dev i (rad)",
        "dev W (rad)", "dev w (rad)", "dev M (rad)",
    ]
    v = model_name + " value"
    r = model_name + " reward"
    columns = [r, v]
    df = pd.DataFrame(index=index,columns=[v, r])
    simulator = Simulator(agent, env, step)
    simulator.run()
    
    df[v]["coll prob"] = env.get_total_collision_probability()
    df[v]["fuel (|dV|)"] = env.get_fuel_consumption()
    df[v][2:] = env.get_trajectory_deviation()
    reward_components = env.get_reward_components()
    df[r]["coll prob"] = reward_components["coll_prob"]
    df[r]["fuel (|dV|)"] = reward_components["fuel"]
    df[r][2:] = reward_components["traj_dev"]
    
    env.reset()
    return df

In [9]:
def env_thresholds(env):
    index = [
        "coll prob", "fuel (|dV|)",
        "dev a (m)", "dev e", "dev i (rad)",
        "dev W (rad)", "dev w (rad)", "dev M (rad)",
    ]
    data = [env.coll_prob_thr, env.fuel_cons_thr] + list(env.traj_dev_thr)
    df = pd.DataFrame(data, index,["threshold"])
    return df

In [10]:
for collision in collisions.values():
    env = read_environment(collision["env_path"])
    result_table = env_thresholds(env)
    empty_agent = TableAgent()
    result_table = result_table.join(
        collision_info(env, empty_agent, "without maneuvers"),
    )
    for model_name, model_path in collision["models_path"].items():
        agent = get_agent("table", model_path)        
        result_table = result_table.join(
            collision_info(env, agent, model_name)
        )
    collision["result_table"] = result_table



In [11]:
collisions["collision_0"]["result_table"]

Unnamed: 0,threshold,without maneuvers value,without maneuvers reward,baseline value,baseline reward,CE value,CE reward,MCTS value,MCTS reward,collinear_GS value,collinear_GS reward
coll prob,0.0001,0.01519,-1359.1,5.55067e-07,-0.00555067,6.0715e-10,-6.0715e-06,0.000132342,-3.91082,2.02843e-05,-0.202843
fuel (|dV|),10.0,0.0,-0.0,0.0900901,-0.00900901,0.741542,-0.0741542,5.5741,-0.55741,0.0700701,-0.00700701
dev a (m),100.0,0.0,-0.0,-183.192,-8.48728,-99.2641,-0.992641,-75.5559,-0.755559,-142.484,-4.82353
dev e,0.01,-0.0,-0.0,0.0,-0.0,0.000102,-0.0102,0.000205,-0.0205,0.0,-0.0
dev i (rad),0.01,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.000268,-0.0268,0.0,-0.0
dev W (rad),0.01,0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.001286,-0.1286,0.0,-0.0
dev w (rad),0.01,0.0,-0.0,0.013314,-3.9826,0.001834,-0.1834,-0.011016,-1.9144,0.010356,-1.3204
dev M (rad),,-0.0,0.0,-0.013386,0.0,-0.001435,0.0,0.006701,0.0,-0.010412,0.0


# Analysis of results

### Reward

In [12]:
model_names = ["without maneuvers"] + list(models)
results_reward = pd.DataFrame(index=model_names, columns=collisions)
for collision_name in results_reward:
    for model_name in model_names:
        r = np.sum(
            collisions[collision_name]["result_table"][model_name + " reward"]
        )
        results_reward.loc[model_name, collision_name] = r
results_reward = results_reward.astype('float32').T
results_reward.head(10)

Unnamed: 0,without maneuvers,baseline,CE,MCTS,collinear_GS
collision_0,-1359.101196,-12.484435,-1.260401,-7.314085,-6.353776
collision_1,-502.399963,-1.556013,-3.416666,-65.966537,-1.556013
collision_2,-1689.974854,-722.629517,-0.734805,-101.721466,-640.503296
collision_3,-2073.405518,-1.554243,-2.002636,-2.374381,-1.214648
collision_4,-105.46225,-1.802612,-1.01228,-6.923814,-5.14629
collision_5,-3303.089844,-1.341808,-0.165636,-4.998756,-1.341809
collision_6,-2840.884521,-1.358161,-0.771382,-1.443773,-1.358161
collision_7,-9.633049,-0.413886,-0.384088,-9.633049,-0.413886
collision_8,-4138.464355,-15.942473,-0.283148,-41.306671,-15.942467
collision_9,-0.349337,-0.349337,-0.349337,-0.349337,-0.349337


In [13]:
# best model for collision situation
best_model = results_reward.idxmax(axis=1)
print("Best model:\n")
print(best_model)

Best model:

collision_0                    CE
collision_1              baseline
collision_2                    CE
collision_3          collinear_GS
collision_4                    CE
collision_5                    CE
collision_6                    CE
collision_7                    CE
collision_8                    CE
collision_9     without maneuvers
collision_10         collinear_GS
collision_11             baseline
collision_12             baseline
collision_13                   CE
collision_14                   CE
collision_15                   CE
collision_16             baseline
collision_17         collinear_GS
collision_18         collinear_GS
collision_19                   CE
dtype: object


In [14]:
# where RL is better than baseline
idx = best_model[best_model != "baseline"].index
beaten_baseline = results_reward.loc[idx]
print(
    f"RL is better than Baseline in {beaten_baseline.shape[0]} out of {len(collisions)} cases."
)
beaten_baseline

RL is better than Baseline in 16 out of 20 cases.


Unnamed: 0,without maneuvers,baseline,CE,MCTS,collinear_GS
collision_0,-1359.101196,-12.484435,-1.260401,-7.314085,-6.353776
collision_2,-1689.974854,-722.629517,-0.734805,-101.721466,-640.503296
collision_3,-2073.405518,-1.554243,-2.002636,-2.374381,-1.214648
collision_4,-105.46225,-1.802612,-1.01228,-6.923814,-5.14629
collision_5,-3303.089844,-1.341808,-0.165636,-4.998756,-1.341809
collision_6,-2840.884521,-1.358161,-0.771382,-1.443773,-1.358161
collision_7,-9.633049,-0.413886,-0.384088,-9.633049,-0.413886
collision_8,-4138.464355,-15.942473,-0.283148,-41.306671,-15.942467
collision_9,-0.349337,-0.349337,-0.349337,-0.349337,-0.349337
collision_10,-1807.059448,-3.342343,-7.59129,-45.26339,-3.34232


In [15]:
# where baseline is better than RL
idx = best_model[best_model == "baseline"].index
beaten_RL = results_reward.loc[idx]
print(
    f"Baseline is better than RL in {beaten_RL.shape[0]} out of {len(collisions)} cases."
)
beaten_RL

Baseline is better than RL in 4 out of 20 cases.


Unnamed: 0,without maneuvers,baseline,CE,MCTS,collinear_GS
collision_1,-502.399963,-1.556013,-3.416666,-65.966537,-1.556013
collision_11,-1245.774902,-1.238609,-4.953611,-1.338538,-1.238609
collision_12,-254.408829,-0.763456,-0.805045,-1.458652,-0.763456
collision_16,-17.366779,-0.345102,-1.347845,-4.802671,-0.345102


### Thresholds

In [16]:
def appropriate_models(result_table):
    """Returns name of model witch overcomes thresholds."""
    cols = [c for c in result_table.columns if c.split()[-1] == "value"]
    thr = result_table[['threshold']].fillna(value=9999).as_matrix()
    val = np.abs(result_table.loc[:, cols].as_matrix())
    idx = np.all(thr >= val, axis=0)
    return [' '.join(c.split()[:-1]) for i, c in enumerate(cols) if idx[i]]

In [17]:
print("Thresholds are overcome:\n")
n_overcome_thresholds = 0
for name, collision in collisions.items():
    m = appropriate_models(collision["result_table"])
    print(f"{name}: {m}")
    if m:
        n_overcome_thresholds += 1
print(
    f"\nThe thresholds are overcome in {n_overcome_thresholds} out of {len(collisions)} cases."
)

Thresholds are overcome:

collision_0: ['CE']
collision_1: ['baseline', 'collinear_GS']
collision_2: ['CE']
collision_3: ['baseline']
collision_4: ['baseline', 'CE']
collision_5: ['CE']
collision_6: ['baseline', 'CE', 'MCTS', 'collinear_GS']
collision_7: ['baseline', 'CE', 'collinear_GS']
collision_8: ['CE']
collision_9: ['without maneuvers', 'baseline', 'CE', 'MCTS', 'collinear_GS']
collision_10: []
collision_11: ['baseline', 'MCTS', 'collinear_GS']
collision_12: ['baseline', 'CE', 'MCTS', 'collinear_GS']
collision_13: ['CE']
collision_14: []
collision_15: ['baseline', 'CE', 'MCTS', 'collinear_GS']
collision_16: ['baseline', 'CE', 'collinear_GS']
collision_17: ['baseline', 'CE', 'MCTS', 'collinear_GS']
collision_18: ['baseline', 'collinear_GS']
collision_19: ['CE']

The thresholds are overcome in 18 out of 20 cases.


  after removing the cwd from sys.path.
  """


# Experiments - tuning using CE

In [18]:
model_name = "collinear_GS_tuned_by_CE"
for collision_name, collision in collisions.items():
    collision["models_path"][model_name] = os.path.join(
        save_dir_maneuvers, f"{collision_name}_{model_name}.csv"
    )

In [19]:
# parameters
CE_tuning_params = {
    "init": {
        "reverse": False,
        "first_maneuver_time": "early",
        "n_maneuvers": 2,
        "percentile": 90,
    },
    "train": {
        "n_iterations": 100,
        "n_sessions": 30, 
        "sigma_decay": 0.9,
        "percentile_growth": 1.01,
        "dV_angle": "complanar",
        "early_stopping": True,
    },
}

In [20]:
# train

# print_out = False

# for collision_name, collision in collisions.items():
#     env = read_environment(collision["env_path"])
#     clear_output(True)
#     print(collision_name)
#     CE_model = models["CE"](env, step, **CE_tuning_params["init"])
#     CE_model.set_action_table_from_path(
#         collision["models_path"]["collinear_GS"]
#     )
#     CE_model.train(print_out=print_out, **CE_tuning_params["train"])
#     CE_model.save_action_table(collision["models_path"][model_name])

### Results

In [21]:
# add to result table
for collision in collisions.values():
    env = read_environment(collision["env_path"])
    agent = get_agent("table", collision["models_path"][model_name])
    collision["result_table"][[f"{model_name} value", f"{model_name} reward"]] = collision_info(
        env, agent, "model_name")

In [22]:
collisions["collision_0"]["result_table"]

Unnamed: 0,threshold,without maneuvers value,without maneuvers reward,baseline value,baseline reward,CE value,CE reward,MCTS value,MCTS reward,collinear_GS value,collinear_GS reward,collinear_GS_tuned_by_CE value,collinear_GS_tuned_by_CE reward
coll prob,0.0001,0.01519,-1359.1,5.55067e-07,-0.00555067,6.0715e-10,-6.0715e-06,0.000132342,-3.91082,2.02843e-05,-0.202843,4.65269e-06,-0.0465269
fuel (|dV|),10.0,0.0,-0.0,0.0900901,-0.00900901,0.741542,-0.0741542,5.5741,-0.55741,0.0700701,-0.00700701,0.348495,-0.0348495
dev a (m),100.0,0.0,-0.0,-183.192,-8.48728,-99.2641,-0.992641,-75.5559,-0.755559,-142.484,-4.82353,-64.9359,-0.649359
dev e,0.01,-0.0,-0.0,0.0,-0.0,0.000102,-0.0102,0.000205,-0.0205,0.0,-0.0,4.9e-05,-0.0049
dev i (rad),0.01,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.000268,-0.0268,0.0,-0.0,0.0,-0.0
dev W (rad),0.01,0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.001286,-0.1286,0.0,-0.0,0.0,-0.0
dev w (rad),0.01,0.0,-0.0,0.013314,-3.9826,0.001834,-0.1834,-0.011016,-1.9144,0.010356,-1.3204,-3.9e-05,-0.0039
dev M (rad),,-0.0,0.0,-0.013386,0.0,-0.001435,0.0,0.006701,0.0,-0.010412,0.0,0.000113,0.0


### Analysis of results

##### Reward

In [23]:
model_names = ["without maneuvers"] + list(models) + [model_name]
results_reward = pd.DataFrame(index=model_names, columns=collisions)
for collision_name in results_reward:
    for model_name in model_names:
        r = np.sum(
            collisions[collision_name]["result_table"][model_name + " reward"]
        )
        results_reward.loc[model_name, collision_name] = r
results_reward = results_reward.astype('float32').T
results_reward.head()

Unnamed: 0,without maneuvers,baseline,CE,MCTS,collinear_GS,collinear_GS_tuned_by_CE
collision_0,-1359.101196,-12.484435,-1.260401,-7.314085,-6.353776,-0.739535
collision_1,-502.399963,-1.556013,-3.416666,-65.966537,-1.556013,-0.933865
collision_2,-1689.974854,-722.629517,-0.734805,-101.721466,-640.503296,-173.443207
collision_3,-2073.405518,-1.554243,-2.002636,-2.374381,-1.214648,-0.775696
collision_4,-105.46225,-1.802612,-1.01228,-6.923814,-5.14629,-0.392247


In [24]:
# best model for collision situation
best_model = results_reward.idxmax(axis=1)
print("Best model:\n")
print(best_model)

Best model:

collision_0     collinear_GS_tuned_by_CE
collision_1     collinear_GS_tuned_by_CE
collision_2                           CE
collision_3     collinear_GS_tuned_by_CE
collision_4     collinear_GS_tuned_by_CE
collision_5                           CE
collision_6                           CE
collision_7     collinear_GS_tuned_by_CE
collision_8                           CE
collision_9            without maneuvers
collision_10    collinear_GS_tuned_by_CE
collision_11    collinear_GS_tuned_by_CE
collision_12    collinear_GS_tuned_by_CE
collision_13                          CE
collision_14    collinear_GS_tuned_by_CE
collision_15                          CE
collision_16    collinear_GS_tuned_by_CE
collision_17    collinear_GS_tuned_by_CE
collision_18    collinear_GS_tuned_by_CE
collision_19                          CE
dtype: object


In [25]:
# where RL is better than baseline
idx = best_model[best_model != "baseline"].index
beaten_baseline = results_reward.loc[idx]
print(
    f"RL better is than Baseline in {beaten_baseline.shape[0]} out of {len(collisions)} cases."
)
beaten_baseline

RL better is than Baseline in 20 out of 20 cases.


Unnamed: 0,without maneuvers,baseline,CE,MCTS,collinear_GS,collinear_GS_tuned_by_CE
collision_0,-1359.101196,-12.484435,-1.260401,-7.314085,-6.353776,-0.739535
collision_1,-502.399963,-1.556013,-3.416666,-65.966537,-1.556013,-0.933865
collision_2,-1689.974854,-722.629517,-0.734805,-101.721466,-640.503296,-173.443207
collision_3,-2073.405518,-1.554243,-2.002636,-2.374381,-1.214648,-0.775696
collision_4,-105.46225,-1.802612,-1.01228,-6.923814,-5.14629,-0.392247
collision_5,-3303.089844,-1.341808,-0.165636,-4.998756,-1.341809,-0.890573
collision_6,-2840.884521,-1.358161,-0.771382,-1.443773,-1.358161,-0.931782
collision_7,-9.633049,-0.413886,-0.384088,-9.633049,-0.413886,-0.320748
collision_8,-4138.464355,-15.942473,-0.283148,-41.306671,-15.942467,-2.889336
collision_9,-0.349337,-0.349337,-0.349337,-0.349337,-0.349337,-0.349337


In [26]:
# where collinear GS tuned by CE is the best
idx = best_model[best_model == "collinear_GS_tuned_by_CE"].index
tuned_collinear_GS_is_best = results_reward.loc[idx]
print(
    f"Collinear GS tuned by CE is better than others in {tuned_collinear_GS_is_best.shape[0]} out of {len(collisions)} cases."
)
tuned_collinear_GS_is_best

Collinear GS tuned by CE is better than others in 12 out of 20 cases.


Unnamed: 0,without maneuvers,baseline,CE,MCTS,collinear_GS,collinear_GS_tuned_by_CE
collision_0,-1359.101196,-12.484435,-1.260401,-7.314085,-6.353776,-0.739535
collision_1,-502.399963,-1.556013,-3.416666,-65.966537,-1.556013,-0.933865
collision_3,-2073.405518,-1.554243,-2.002636,-2.374381,-1.214648,-0.775696
collision_4,-105.46225,-1.802612,-1.01228,-6.923814,-5.14629,-0.392247
collision_7,-9.633049,-0.413886,-0.384088,-9.633049,-0.413886,-0.320748
collision_10,-1807.059448,-3.342343,-7.59129,-45.26339,-3.34232,-0.536408
collision_11,-1245.774902,-1.238609,-4.953611,-1.338538,-1.238609,-1.191068
collision_12,-254.408829,-0.763456,-0.805045,-1.458652,-0.763456,-0.66639
collision_14,-2300.181152,-45.496563,-5.027085,-23.913877,-45.496563,-1.088137
collision_16,-17.366779,-0.345102,-1.347845,-4.802671,-0.345102,-0.166532


### Thresholds

In [27]:
print("Thresholds are overcome:\n")
n_overcome_thresholds = 0
for name, collision in collisions.items():
    m = appropriate_models(collision["result_table"])
    print(f"{name}: {m}")
    if m:
        n_overcome_thresholds += 1
print(
    f"\nThe thresholds are overcome in {n_overcome_thresholds} out of {len(collisions)} cases."
)

Thresholds are overcome:

collision_0: ['CE', 'collinear_GS_tuned_by_CE']
collision_1: ['baseline', 'collinear_GS', 'collinear_GS_tuned_by_CE']
collision_2: ['CE']
collision_3: ['baseline', 'collinear_GS_tuned_by_CE']
collision_4: ['baseline', 'CE', 'collinear_GS_tuned_by_CE']
collision_5: ['CE', 'collinear_GS_tuned_by_CE']
collision_6: ['baseline', 'CE', 'MCTS', 'collinear_GS', 'collinear_GS_tuned_by_CE']
collision_7: ['baseline', 'CE', 'collinear_GS', 'collinear_GS_tuned_by_CE']
collision_8: ['CE', 'collinear_GS_tuned_by_CE']
collision_9: ['without maneuvers', 'baseline', 'CE', 'MCTS', 'collinear_GS', 'collinear_GS_tuned_by_CE']
collision_10: ['collinear_GS_tuned_by_CE']
collision_11: ['baseline', 'MCTS', 'collinear_GS', 'collinear_GS_tuned_by_CE']
collision_12: ['baseline', 'CE', 'MCTS', 'collinear_GS', 'collinear_GS_tuned_by_CE']
collision_13: ['CE']
collision_14: ['collinear_GS_tuned_by_CE']
collision_15: ['baseline', 'CE', 'MCTS', 'collinear_GS', 'collinear_GS_tuned_by_CE']
colli

  after removing the cwd from sys.path.
  """
