diff --git a/.gitignore b/.gitignore
index c379545b0..6ea9113c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ safe_control_gym/envs/gym_control/assets/cartpole.urdf
 experiments/figure6/trained_gp_model/bak_best_model_*.pth
 experiments/figure7/safe_exp_results/
 experiments/figure8/unsafe_ppo_temp_data/
+experiments/figure8/unsafe_ppo_model/bak_unsafe_ppo_model_30000.pt
 temp-data/
 z_docstring.py
 TODOs.md
diff --git a/experiments/figure6/config_overrides/gp_mpc_quad_training.yaml b/experiments/figure6/config_overrides/gp_mpc_quad_training.yaml
index 8262c63d5..4c6758394 100644
--- a/experiments/figure6/config_overrides/gp_mpc_quad_training.yaml
+++ b/experiments/figure6/config_overrides/gp_mpc_quad_training.yaml
@@ -101,4 +101,3 @@ task_config:
       distrib: 'uniform'
       low: -0.1
       high: 0.1
-
diff --git a/experiments/figure6/gp_mpc_experiment.py b/experiments/figure6/gp_mpc_experiment.py
index 9b1e80b2b..5f2efe52e 100644
--- a/experiments/figure6/gp_mpc_experiment.py
+++ b/experiments/figure6/gp_mpc_experiment.py
@@ -15,7 +15,14 @@
 from safe_control_gym.utils.configuration import ConfigFactory
 
 
-def plot_xz_comparison_diag_constraint(prior_run, run, init_ind, dir=None):
+def plot_xz_comparison_diag_constraint(prior_run,
+                                       run,
+                                       init_ind,
+                                       dir=None
+                                       ):
+    """
+
+    """
     state_inds = [0,2]
     goal = [0, 1]
     ax = plot_2D_comparison_with_prior(state_inds, prior_run, run, goal, init_ind, dir=dir)
@@ -34,7 +41,15 @@ def plot_xz_comparison_diag_constraint(prior_run, run, init_ind, dir=None):
     plt.tight_layout()
 
 
-def plot_2D_comparison_with_prior(state_inds, prior_run, run, goal, init_ind, dir=None):
+def plot_2D_comparison_with_prior(state_inds,
+                                  prior_run,
+                                  run, goal,
+                                  init_ind,
+                                  dir=None
+                                  ):
+    """
+
+    """
     horizon_cov = run.state_horizon_cov[init_ind]
     horizon_states = run.horizon_states[init_ind]
     prior_horizon_states = prior_run.horizon_states[init_ind]
@@ -94,7 +109,14 @@ def plot_2D_comparison_with_prior(state_inds, prior_run, run, goal, init_ind, di
     return ax
 
 
-def add_2d_cov_ellipse(position, cov, ax, legend=False):
+def add_2d_cov_ellipse(position,
+                       cov,
+                       ax,
+                       legend=False
+                       ):
+    """
+
+    """
     evals, evecs = np.linalg.eig(cov)
     major_axis_ind = np.argmax(evals)
     minor_axis_ind = 0 if major_axis_ind == 1 else 1
@@ -127,7 +149,6 @@ def add_2d_cov_ellipse(position, cov, ax, legend=False):
     fac = ConfigFactory()
     fac.add_argument("--train_only", type=bool, default=False, help="True if only training is performed.")
     config = fac.merge()
-
     # Create environment.
     env_func = partial(make,
                        config.task,
diff --git a/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole.yaml b/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole.yaml
index 9b8429289..ef4fa8928 100644
--- a/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole.yaml
+++ b/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole.yaml
@@ -2,7 +2,6 @@ algo_config:
   pretraining: False 
   pretrained: null
   constraint_slack: 0.05
-
   log_interval: 1000
   save_interval: 1000
   num_checkpoints: 10
diff --git a/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole_pretrain.yaml b/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole_pretrain.yaml
index 8c39ebda7..6a96cf27e 100644
--- a/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole_pretrain.yaml
+++ b/experiments/figure7/config_overrides/safe_explorer_ppo_cartpole_pretrain.yaml
@@ -10,7 +10,6 @@ algo_config:
   constraint_eval_interval: 5
   constraint_buffer_size: 1000000
   constraint_slack: 0.05
-
   log_interval: 10
   save_interval: 10
   num_checkpoints: 5
@@ -27,4 +26,3 @@ task_config:
       constraint_input_type: 'STATE'
       active_dims: 0
   done_on_violation: True
-
diff --git a/experiments/figure7/create_fig7.sh b/experiments/figure7/create_fig7.sh
index ea7b94124..c20405239 100755
--- a/experiments/figure7/create_fig7.sh
+++ b/experiments/figure7/create_fig7.sh
@@ -1,3 +1,4 @@
 #!/bin/bash
 
+# Safe Explorer vs PPO baselines.
 python3 ./safe_exp_plots.py --plot_dir ./safe_exp_results
diff --git a/experiments/figure7/create_safe_exp_results.sh b/experiments/figure7/create_safe_exp_results.sh
index fd716a025..a23b01b24 100755
--- a/experiments/figure7/create_safe_exp_results.sh
+++ b/experiments/figure7/create_safe_exp_results.sh
@@ -1,30 +1,34 @@
 #!/bin/bash
-# proper cleanup for background processes
+
+# Allow proper cleanup of background processes.
 trap "exit" INT TERM ERR
 trap "kill 0" EXIT
 
+# Remove previous results.
 rm -r -f ./safe_exp_results/
 
+# Writing paths.
 OUTPUT_DIR="./"
 TAG_ROOT="safe_exp_results"
+
+# Configuration files path.
 CONFIG_PATH_ROOT="./config_overrides"
 
+# Options.
 seeds=(2 22 222 2222 22222 9 90 998 9999 90001)
 thread=1
 
-########################## ppo 
+# PPO.
 TAG="ppo"
 CONFIG_PATH="${CONFIG_PATH_ROOT}/ppo_cartpole.yaml"
-
 for seed in "${seeds[@]}"
 do
     python3 ../main.py --algo ppo --task cartpole --overrides $CONFIG_PATH --output_dir ${OUTPUT_DIR} --tag $TAG_ROOT/$TAG --thread $thread --seed $seed
 done 
 
-########################## ppo + reward shaping 
+# PPO with reward shaping.
 TAG="ppo_rs"
 CONFIG_PATH="${CONFIG_PATH_ROOT}/ppo_rs_cartpole.yaml"
-
 tolerances=(0.15 0.2)
 for tolerance in "${tolerances[@]}"
 do
@@ -34,19 +38,16 @@ do
     done 
 done
 
-########################## pretrain safe explorer
+# Safe Explorer pre-training.
 TAG="safe_exp_pretrain"
 CONFIG_PATH="${CONFIG_PATH_ROOT}/safe_explorer_ppo_cartpole_pretrain.yaml"
-
 train_seed=88890
 python3 ../main.py --algo safe_explorer_ppo --task cartpole --overrides $CONFIG_PATH --output_dir ${OUTPUT_DIR} --tag $TAG_ROOT/$TAG --thread $thread --seed $train_seed
 
-########################## train safe explorer
+# Safe Explorer.
 PRETRAINED_PATH=(${OUTPUT_DIR}/$TAG_ROOT/$TAG/seed${train_seed}*)
-
 TAG="safe_exp_"
 CONFIG_PATH="${CONFIG_PATH_ROOT}/safe_explorer_ppo_cartpole.yaml"
-
 slacks=(0.15 0.2)
 for slack in "${slacks[@]}"
 do
diff --git a/experiments/figure7/safe_exp_plots.py b/experiments/figure7/safe_exp_plots.py
index 2b9627127..371d35b4d 100755
--- a/experiments/figure7/safe_exp_plots.py
+++ b/experiments/figure7/safe_exp_plots.py
@@ -21,9 +21,6 @@
 from safe_control_gym.utils.registration import make
 from safe_control_gym.utils.utils import mkdirs, set_dir_from_config, set_device_from_config, set_seed_from_config, save_video
 
-# -----------------------------------------------------------------------------------
-#                   Plot Funcs
-# -----------------------------------------------------------------------------------
 
 def plot_from_exps(legend_dir_specs,
                    out_path="temp.jpg",
@@ -35,7 +32,9 @@ def plot_from_exps(legend_dir_specs,
                    x_num_max=None,
                    num_std=1,
                    use_median_quantile=False):
-    """Plots 1 stat figure at a time."""
+    """Plots 1 stat figure at a time.
+
+    """
     # Get all stats.
     stats = defaultdict(list)
     for l, dirs in legend_dir_specs.items():
@@ -43,13 +42,11 @@ def plot_from_exps(legend_dir_specs,
             # Pick from either log source (tensorboard or log text files).
             path = os.path.join(d, "logs", scalar_name + ".log")
             _, x, _, y = load_from_log_file(path)
-
             # Smoothing.
             x, y = np.asarray(x), np.asarray(y)
             if window:
                 x, y = window_func(x, y, window, np.mean)
             stats[l].append([x, y])
-
     # Post-processing.
     x_max = float("inf")
     for _, runs in stats.items():
@@ -58,19 +55,16 @@ def plot_from_exps(legend_dir_specs,
             x_max = min(x_max, len(x))
     if x_num_max:
         x_max = min(x_max, x_num_max)
-
     processed_stats = {}
     for name, runs in stats.items():
         # Use same x for all runs to an algo.
         x = np.array([x[:x_max] for x, _ in runs])[0]
-
         # Different y for different runs.
         y = np.stack([y[:x_max] for _, y in runs])
         y_mean = np.mean(y, axis=0)
         y_std = np.std(y, axis=0)
         y_median = np.median(y, axis=0)
         y_quantiles = np.quantile(y, [0.25, 0.75], axis=0)
-
         # Record stats.
         processed_stats[name] = {
             "x": x,
@@ -79,33 +73,28 @@ def plot_from_exps(legend_dir_specs,
             "median": y_median,
             "quantiles": y_quantiles,
         }
-
     # Actual plot.
     fig = plt.figure(figsize=(10, 5))
     plt.clf()
     for i, name in enumerate(sorted(processed_stats.keys())):
         color = COLORS[i]
         x = processed_stats[name]["x"]
-
         if use_median_quantile:
             y_median = processed_stats[name]["median"]
             y_quantiles = processed_stats[name]["quantiles"]
             y_quant_1st = y_quantiles[0]
             y_quant_3rd = y_quantiles[1]
-
             plt.plot(x, y_median, label=name, color=color)
             plt.fill_between(x, y_quant_3rd, y_quant_1st, alpha=0.3, color=color)
         else:
             y_mean = processed_stats[name]["mean"]
             y_std = processed_stats[name]["std"]
-
             plt.plot(x, y_mean, label=name, color=color)
             plt.fill_between(x, y_mean + num_std * y_std, y_mean - num_std * y_std, alpha=0.3, color=color)
     plt.title(title)
     plt.xlabel(xlabel)
     plt.ylabel(ylabel)
     plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=4)
-
     plt.tight_layout()
     plt.savefig(out_path)
     plt.show()
@@ -113,7 +102,9 @@ def plot_from_exps(legend_dir_specs,
 
 
 def reward_to_cost(y):
-    """Converts RL reward to control cost (for plotting)."""
+    """Converts RL reward to control cost (for plotting).
+
+    """
     return 250 - y
 
 
@@ -121,7 +112,6 @@ def load_stats(legend_dir_specs, scalar_names=[], window=None, x_num_max=None):
     """Gets all processed stats for multiple scalars."""
     scalar_stats = {}
     for scalar_name in scalar_names:
-
         # Get all stats.
         stats = defaultdict(list)
         for l, dirs in legend_dir_specs.items():
@@ -129,7 +119,6 @@ def load_stats(legend_dir_specs, scalar_names=[], window=None, x_num_max=None):
                 # Pick from either log source (tensorboard or log text files).
                 path = os.path.join(d, "logs", scalar_name + ".log")
                 _, x, _, y = load_from_log_file(path)
-
                 # Smoothing.
                 x, y = np.asarray(x), np.asarray(y)
                 if "return" in scalar_name.lower() or "reward" in scalar_name.lower():
@@ -137,7 +126,6 @@ def load_stats(legend_dir_specs, scalar_names=[], window=None, x_num_max=None):
                 if window:
                     x, y = window_func(x, y, window, np.mean)
                 stats[l].append([x, y])
-
         # Post-processing.
         x_max = float("inf")
         for _, runs in stats.items():
@@ -146,19 +134,16 @@ def load_stats(legend_dir_specs, scalar_names=[], window=None, x_num_max=None):
                 x_max = min(x_max, len(x))
         if x_num_max:
             x_max = min(x_max, x_num_max)
-
         processed_stats = {}
         for name, runs in stats.items():
             # Use same x for all runs to an algo.
             x = np.array([x[:x_max] for x, _ in runs])[0]
-
             # Different y for different runs.
             y = np.stack([y[:x_max] for _, y in runs])
             y_mean = np.mean(y, axis=0)
             y_std = np.std(y, axis=0)
             y_median = np.median(y, axis=0)
             y_quantiles = np.quantile(y, [0.25, 0.75], axis=0)
-
             # Record stats.
             processed_stats[name] = {
                 "x": x,
@@ -167,10 +152,8 @@ def load_stats(legend_dir_specs, scalar_names=[], window=None, x_num_max=None):
                 "median": y_median,
                 "quantiles": y_quantiles,
             }
-
         # Copy over stats.
         scalar_stats[scalar_name] = deepcopy(processed_stats)
-
     return scalar_stats
 
 
@@ -186,18 +169,17 @@ def plot_from_exps2(legend_dir_specs,
                     num_std=1,
                     use_median_quantile=False,
                     cols_per_row=3):
-    """Plots 1 stat figure at a time."""
+    """Plots 1 stat figure at a time.
+
+    """
     # Get all stats.
     scalar_stats = load_stats(legend_dir_specs, scalar_names=scalar_names, window=window, x_num_max=x_num_max)
-
     # Make plots.
     num_plots = len(scalar_stats)
     num_rows = math.ceil(num_plots / cols_per_row)
     num_cols = min(num_plots, cols_per_row)
-
     fig = plt.figure(figsize=(12, 6))
     axes = fig.subplots(nrows=num_rows, ncols=num_cols)
-
     for idx, scalar_name in enumerate(scalar_names):
         row_idx = idx // num_rows
         col_idx = idx % num_cols
@@ -206,38 +188,30 @@ def plot_from_exps2(legend_dir_specs,
         else:
             ax = axes[col_idx]
         processed_stats = scalar_stats[scalar_name]
-
         for i, name in enumerate(sorted(processed_stats.keys())):
             color_i = (i + 0) % len(COLORS)
             color = COLORS[color_i]
-
             line_i = (i + 0) % len(LINE_STYLES)
             linestyle = LINE_STYLES[line_i][-1]
-
             x = processed_stats[name]["x"]
-
             if use_median_quantile:
                 y_median = processed_stats[name]["median"]
                 y_quantiles = processed_stats[name]["quantiles"]
                 y_quant_1st = y_quantiles[0]
                 y_quant_3rd = y_quantiles[1]
-
                 ax.plot(x, y_median, label=name, color=color, alpha=0.7, linestyle=linestyle)
                 ax.fill_between(x, y_quant_3rd, y_quant_1st, alpha=0.1, color=color)
             else:
                 y_mean = processed_stats[name]["mean"]
                 y_std = processed_stats[name]["std"]
-
                 ax.plot(x, y_mean, label=name, color=color, alpha=0.7, linestyle=linestyle)
                 ax.fill_between(x, y_mean + num_std * y_std, y_mean - num_std * y_std, alpha=0.1, color=color)
         ax.set_xlabel(xlabel)
         ax.set_ylabel(ylabels[idx])
         ax.set_ylim((-10, None))
-
     # Postprocess plot.
     fig.suptitle(title)
     fig.subplots_adjust(bottom=0.15)
-
     lines = []
     labels = []
     for ax in fig.axes:
@@ -245,21 +219,19 @@ def plot_from_exps2(legend_dir_specs,
         lines.extend(axLine)
         labels.extend(axLabel)
         break
-    fig.legend(
-        lines,
-        labels,
-        loc='lower center',
-        fancybox=True,
-        shadow=True,
-        borderaxespad=0.1,
-        ncol=7)
+    fig.legend(lines,
+               labels,
+               loc='lower center',
+               fancybox=True,
+               shadow=True,
+               borderaxespad=0.1,
+               ncol=7)
     plt.savefig(out_path)
     plt.show()
     return scalar_stats
 
 
 xabs_legend_map = {
-    # baseline
     "ppo": "ppo",
     "ppo_rs_0.15": "ppo_rs_slack0.15",
     "ppo_rs_0.2": "ppo_rs_slack0.2",
@@ -267,16 +239,17 @@ def plot_from_exps2(legend_dir_specs,
     "safe_exp_slack0.2": "se_0.2",
 }
 
+
 xlh2_legend_map = {
-    # baseline
     "ppo": "ppo",
 }
 
+
 thetalh2_legend_map = {
-    # baseline
     "ppo": "ppo",
 }
 
+
 legend_maps = {
     "xabs": xabs_legend_map,
     "xlh2": xlh2_legend_map,
@@ -284,26 +257,23 @@ def plot_from_exps2(legend_dir_specs,
 }
 
 xabs_name_map = {
-    # baseline
     "ppo": "PPO",
-    # reward shaping
     "ppo_rs_slack0.15": "PPO+cost shaping 0.15",
     "ppo_rs_slack0.2": "PPO+cost shaping 0.2",
-    # se
     "se_0.15": "PPO+safety layer 0.15",
     "se_0.2": "PPO+safety layer 0.2",
 }
 
 
 def plot(config):
-    """Central plot function."""
-    legend_map = legend_maps[config.constraint]
+    """Central plot function.
 
+    """
+    legend_map = legend_maps[config.constraint]
     # Collect results.
     dirs = os.listdir(config.plot_dir)
-    # remove pretrain dirs
+    # Remove pretrain dirs.
     dirs = sorted([d for d in dirs if d in legend_map])
-
     # Make spec.
     spec = {}
     for d in dirs:
@@ -311,32 +281,27 @@ def plot(config):
         legend = legend_map[d_name]
         seed_dirs = os.listdir(os.path.join(config.plot_dir, d))
         spec[legend] = [os.path.join(config.plot_dir, d, sd) for sd in seed_dirs]
-
     # Collective plot (mean only).
-    scalar_stats = plot_from_exps2(
-        spec,
-        out_path=os.path.join(config.plot_dir, "performance.jpg"),
-        scalar_names=["stat/ep_return", "stat/constraint_violation"],
-        title="Learning Curves",
-        sub_titles=["Average Costs", "Total Constraint Violations"],
-        xlabel="Step",
-        ylabels=["Cost", "Constraint Violations"],
-        window=10,
-        x_num_max=None,
-        num_std=0,
-        use_median_quantile=True,
-        cols_per_row=3)
-
-    # Save stats to csv.
+    scalar_stats = plot_from_exps2(spec,
+                                   out_path=os.path.join(config.plot_dir, "performance.jpg"),
+                                   scalar_names=["stat/ep_return", "stat/constraint_violation"],
+                                   title="Learning Curves",
+                                   sub_titles=["Average Costs", "Total Constraint Violations"],
+                                   xlabel="Step",
+                                   ylabels=["Cost", "Constraint Violations"],
+                                   window=10,
+                                   x_num_max=None,
+                                   num_std=0,
+                                   use_median_quantile=True,
+                                   cols_per_row=3)
+    # Save stats to CSV.
     curves = ["median", "top_quartile", "bottom_quartile"]
     header = []
     stat_rows = []
     x_already = False
-
     for scalar_name in scalar_stats:
         stats = scalar_stats[scalar_name]
         true_scalar_name = {"stat/ep_return": "Cost", "stat/constraint_violation": "Constraint Violations"}[scalar_name]
-
         # Collect stats.
         for algo_name in sorted(stats.keys()):
             true_algo_name = xabs_name_map[algo_name]
@@ -351,27 +316,21 @@ def plot(config):
             stat_rows.append(stat["median"])
             stat_rows.append(stat["quantiles"][1])
             stat_rows.append(stat["quantiles"][0])
-
     # Make rows.
     stat_mtx = np.array(stat_rows).transpose()
     rows = stat_mtx.tolist()
-
     # Write to csv.
     csv_path = os.path.join(config.plot_dir, "performance.csv")
     with open(csv_path, "w") as f:
         writer = csv.writer(f)
         writer.writerow(header)
         writer.writerows(rows)
-
     print("Plotting done.")
 
 
-# -----------------------------------------------------------------------------------
-#                   Main
-# -----------------------------------------------------------------------------------
-
 MAIN_FUNCS = {"plot": plot}
 
+
 if __name__ == "__main__":
     # Make config.
     fac = ConfigFactory()
diff --git a/experiments/figure8/create_fig8.sh b/experiments/figure8/create_fig8.sh
index 8ff3aaa3e..07d2ac910 100755
--- a/experiments/figure8/create_fig8.sh
+++ b/experiments/figure8/create_fig8.sh
@@ -1,3 +1,4 @@
 #!/bin/bash
 
+# Model-predictive safety certification of unsafe PPO controller.
 python3 ./mpsc_experiment.py --task cartpole --algo mpsc --overrides ./config_overrides/config_mpsc_cartpole.yaml
diff --git a/experiments/figure8/create_unsafe_ppo_model.sh b/experiments/figure8/create_unsafe_ppo_model.sh
index 5b614e630..66bd309f1 100755
--- a/experiments/figure8/create_unsafe_ppo_model.sh
+++ b/experiments/figure8/create_unsafe_ppo_model.sh
@@ -14,4 +14,4 @@ cp ./unsafe_ppo_model/unsafe_ppo_model_30000.pt ./unsafe_ppo_model/bak_unsafe_pp
 mv ./unsafe_ppo_temp_data/seed2_*/model_latest.pt ./unsafe_ppo_model/unsafe_ppo_model_30000.pt 
 
 # Removed the temporary data used to train the new unsafe PPO model.
-rm -r -f ./unsafe_ppo_temp_data/
\ No newline at end of file
+rm -r -f ./unsafe_ppo_temp_data/
diff --git a/experiments/main.py b/experiments/main.py
index 1c10870dd..d371d11b4 100644
--- a/experiments/main.py
+++ b/experiments/main.py
@@ -1,4 +1,4 @@
-"""Vanilla training/plotting/testing script.
+"""Template training/plotting/testing script.
 
 """
 import os
@@ -15,27 +15,22 @@
 from safe_control_gym.utils.utils import mkdirs, set_dir_from_config, set_device_from_config, set_seed_from_config, save_video
 
 
-# -----------------------------------------------------------------------------------
-#                   Funcs
-# -----------------------------------------------------------------------------------
-
 def train(config):
-    """General training template.
+    """Training template.
     
     Usage:
         * to start training, use with `--func train`.
         * to restore from a previous training, additionally use `--restore {dir_path}` 
             where `dir_path` is the output folder from previous training.  
+
     """
     # Experiment setup.
     if not config.restore:
         set_dir_from_config(config)
     set_seed_from_config(config)
     set_device_from_config(config)
-
     # Define function to create task/env.
     env_func = partial(make, config.task, output_dir=config.output_dir, **config.task_config)
-
     # Create the controller/control_agent.
     control_agent = make(config.algo,
                          env_func,
@@ -48,7 +43,6 @@ def train(config):
     control_agent.reset()
     if config.restore:
         control_agent.load(os.path.join(config.restore, "model_latest.pt"))
-
     # Training.
     control_agent.learn()
     control_agent.close()
@@ -62,12 +56,12 @@ def make_plots(config):
         * use with `--func plot` and `--restore {dir_path}` where `dir_path` is 
             the experiment folder containing the logs.
         * save figures under `dir_path/plots/`.
+
     """
     # Define source and target log locations.
     log_dir = os.path.join(config.output_dir, "logs")
     plot_dir = os.path.join(config.output_dir, "plots")
     mkdirs(plot_dir)
-
     plot_from_logs(log_dir, plot_dir, window=3)
     print("Plotting done.")
 
@@ -80,13 +74,12 @@ def test_policy(config):
         * to test policy from a trained model checkpoint, additionally use 
             `--restore {dir_path}` where `dir_path` is folder to the trained model.
         * to test un-trained policy (e.g. non-learning based), use as it is.
+
     """
     # Evaluation setup.
     set_device_from_config(config)
-
     # Define function to create task/env.
     env_func = partial(make, config.task, output_dir=config.output_dir, **config.task_config)
-
     # Create the controller/control_agent.
     control_agent = make(config.algo,
                          env_func,
@@ -99,35 +92,31 @@ def test_policy(config):
     control_agent.reset()
     if config.restore:
         control_agent.load(os.path.join(config.restore, "model_latest.pt"))
-
     # Test controller.
-    results = control_agent.run(n_episodes=config.algo_config.eval_batch_size, render=config.render, verbose=config.verbose, use_adv=config.use_adv)
+    results = control_agent.run(n_episodes=config.algo_config.eval_batch_size,
+                                render=config.render,
+                                verbose=config.verbose,
+                                use_adv=config.use_adv)
     control_agent.close()
-
     # Save evalution results.
     eval_path = os.path.join(config.output_dir, "eval", config.eval_output_path)
     eval_dir = os.path.dirname(eval_path)
     mkdirs(eval_dir)
     with open(eval_path, "wb") as f:
         pickle.dump(results, f)
-
     ep_lengths = results["ep_lengths"]
     ep_returns = results["ep_returns"]
     msg = "eval_ep_length {:.2f} +/- {:.2f}\n".format(ep_lengths.mean(), ep_lengths.std())
     msg += "eval_ep_return {:.3f} +/- {:.3f}\n".format(ep_returns.mean(), ep_returns.std())
     print(msg)
-
     if "frames" in results:
         save_video(os.path.join(eval_dir, "video.gif"), results["frames"])
     print("Evaluation done.")
 
 
-# -----------------------------------------------------------------------------------
-#                   Main
-# -----------------------------------------------------------------------------------
-
 MAIN_FUNCS = {"train": train, "plot": make_plots, "test": test_policy}
 
+
 if __name__ == "__main__":
     # Make config.
     fac = ConfigFactory()
@@ -138,12 +127,10 @@ def test_policy(config):
     fac.add_argument("--use_adv", action="store_true", help="if to evaluate against adversary.")
     fac.add_argument("--eval_output_path", type=str, default="test_results.pkl", help="file path to save evaluation results.")
     config = fac.merge()
-
     # System settings.
     if config.thread > 0:
         # E.g. set single thread for less context switching
         torch.set_num_threads(config.thread)
-
     # Execute.
     func = MAIN_FUNCS.get(config.func, None)
     if func is None:
diff --git a/safe_control_gym/__init__.py b/safe_control_gym/__init__.py
index 7a1f443e2..e797863f6 100644
--- a/safe_control_gym/__init__.py
+++ b/safe_control_gym/__init__.py
@@ -1,4 +1,3 @@
-"""Register all configurable objects by importing."""
 from safe_control_gym.controllers import *
 from safe_control_gym.envs import *
 from safe_control_gym.envs.env_wrappers import *
diff --git a/safe_control_gym/controllers/__init__.py b/safe_control_gym/controllers/__init__.py
index 2dfe5c151..90d6fa60d 100644
--- a/safe_control_gym/controllers/__init__.py
+++ b/safe_control_gym/controllers/__init__.py
@@ -1,7 +1,8 @@
-"""Register controllers."""
+"""Register controllers.
+
+"""
 from safe_control_gym.utils.registration import register
 
-####################### MPC
 register(id="mpc",
          entry_point="safe_control_gym.controllers.mpc.mpc:MPC",
          config_entry_point="safe_control_gym.controllers.mpc:mpc.yaml")
@@ -10,22 +11,18 @@
          entry_point="safe_control_gym.controllers.mpc.linear_mpc:LinearMPC",
          config_entry_point="safe_control_gym.controllers.mpc:linear_mpc.yaml")
 
-####################### Learning-based MPC
 register(id="gp_mpc",
          entry_point="safe_control_gym.controllers.mpc.gp_mpc:GPMPC",
          config_entry_point="safe_control_gym.controllers.mpc:gp_mpc.yaml")
 
-####################### Model Predictive Safety Certification
 register(id="mpsc",
          entry_point="safe_control_gym.controllers.mpsc.mpsc:MPSC",
          config_entry_point="safe_control_gym.controllers.mpsc:mpsc.yaml")
 
-####################### RL Baselines
 register(id="ppo",
          entry_point="safe_control_gym.controllers.ppo.ppo:PPO",
          config_entry_point="safe_control_gym.controllers.ppo:ppo.yaml")
 
-####################### Soft
 register(id="safe_explorer_ppo",
          entry_point="safe_control_gym.controllers.safe_explorer.safe_ppo:SafeExplorerPPO",
          config_entry_point="safe_control_gym.controllers.safe_explorer:safe_ppo.yaml")
diff --git a/safe_control_gym/controllers/base_controller.py b/safe_control_gym/controllers/base_controller.py
index 37e6d2e5b..627f11341 100644
--- a/safe_control_gym/controllers/base_controller.py
+++ b/safe_control_gym/controllers/base_controller.py
@@ -1,8 +1,12 @@
-"""Base classes."""
+"""Base classes.
+
+"""
 
 
 class BaseController:
-    """Template for controller/agent, implement the following methods as needed."""
+    """Template for controller/agent, implement the following methods as needed.
+
+    """
 
     def __init__(self,
                  env_func,
@@ -11,7 +15,8 @@ def __init__(self,
                  output_dir="temp",
                  device="cpu",
                  seed=0,
-                 **kwargs):
+                 **kwargs
+                 ):
         """Initializes controller agent.
 
         Args:
@@ -21,37 +26,54 @@ def __init__(self,
             output_dir (str): folder to write outputs.
             device (str): cpu or cuda.
             seed (int): random seed.
+
         """
-        # base args
+        # Base args.
         self.env_func = env_func
         self.training = training
         self.checkpoint_path = checkpoint_path
         self.output_dir = output_dir
         self.device = device
         self.seed = seed
-
-        # algo specific args
+        # Algorithm specific args.
         for k, v in kwargs.items():
             self.__dict__[k] = v
 
     def reset(self):
-        """Do initializations for training or evaluation."""
+        """Do initializations for training or evaluation.
+
+        """
         pass
 
     def close(self):
-        """Shuts down and cleans up lingering resources."""
+        """Shuts down and cleans up lingering resources.
+
+        """
         pass
 
-    def save(self, path):
-        """Saves model params and experiment state to checkpoint path."""
+    def save(self,
+             path
+             ):
+        """Saves model params and experiment state to checkpoint path.
+
+        """
         pass
 
-    def load(self, path):
-        """Restores model and experiment given checkpoint path."""
+    def load(self,
+             path
+             ):
+        """Restores model and experiment given checkpoint path.
+
+        """
         pass
 
-    def learn(self, env=None, **kwargs):
-        """Performs learning (pre-training, training, fine-tuning, etc)."""
+    def learn(self,
+              env=None,
+              **kwargs
+              ):
+        """Performs learning (pre-training, training, fine-tuning, etc).
+
+        """
         pass
 
     def run(self,
@@ -59,6 +81,9 @@ def run(self,
             render=False,
             n_episodes=10,
             verbose=False,
-            **kwargs):
-        """Runs evaluation with current policy."""
+            **kwargs
+            ):
+        """Runs evaluation with current policy.
+
+        """
         pass
diff --git a/safe_control_gym/controllers/mpc/gp_mpc.py b/safe_control_gym/controllers/mpc/gp_mpc.py
index f854c2d6b..2248a0ac8 100644
--- a/safe_control_gym/controllers/mpc/gp_mpc.py
+++ b/safe_control_gym/controllers/mpc/gp_mpc.py
@@ -1,19 +1,30 @@
-"""Model Predictive Control with Gaussian Process
-
-Example: 
-    run mpc on cartpole balance 
-    
-        $ python tests/test_main.py --mode test_policy --exp_id mpc_cartpole \
-            --algo mpc --task cartpole --env_wraps time_limit --max_episode_steps 200 
+"""Model Predictive Control with a Gaussian Process model.
+
+Based on:
+    * L. Hewing, J. Kabzan and M. N. Zeilinger, "Cautious Model Predictive Control Using Gaussian Process Regression,"
+     in IEEE Transactions on Control Systems Technology, vol. 28, no. 6, pp. 2736-2743, Nov. 2020, doi: 10.1109/TCST.2019.2949757.
+
+Implementation details:
+    1. The previous time step MPC solution is used to compute the set constraints and GP dynamics rollout.
+       Here, the dynamics are rolled out using the Mean Equivelence method, the fastest, but least accurate.
+    2. The GP is approximated using the Fully Independent Training Conditional (FITC) outlined in
+        * J. Quinonero-Candela, C. E. Rasmussen, and R. Herbrich, “A unifying view of sparse approximate Gaussian process regression,”
+          Journal of Machine Learning Research, vol. 6, pp. 1935–1959, 2005.
+          https://www.jmlr.org/papers/volume6/quinonero-candela05a/quinonero-candela05a.pdf
+        * E. Snelson and Z. Ghahramani, “Sparse gaussian processes using pseudo-inputs,” in Advances in Neural Information Processing
+          Systems, Y. Weiss, B. Scholkopf, and J. C. Platt, Eds., 2006, pp. 1257–1264.
+       and the inducing points are the previous MPC solution.
+    3. Each dimension of the learned error dynamics is an independent Zero Mean SE Kernel GP.
 
 """
 import scipy
 import numpy as np
 import casadi as cs
-from copy import deepcopy
 import time
 import torch
 import gpytorch
+
+from copy import deepcopy
 from skopt.sampler import Lhs
 from functools import partial
 
@@ -22,37 +33,9 @@
 from safe_control_gym.controllers.mpc.gp_utils import GaussianProcessCollection, ZeroMeanIndependentGPModel, covSEard
 from safe_control_gym.envs.benchmark_env import Task
 
-# -----------------------------------------------------------------------------------
-#                   Gaussian Process MPC
-# -----------------------------------------------------------------------------------
-
 
 class GPMPC(MPC):
-    """MPC with Gaussian Process as dynamics residual. This implementation is based on the paper
-
-    L. Hewing, J. Kabzan and M. N. Zeilinger, "Cautious Model Predictive Control Using Gaussian Process Regression,"
-     in IEEE Transactions on Control Systems Technology, vol. 28, no. 6, pp. 2736-2743, Nov. 2020,
-     doi: 10.1109/TCST.2019.2949757.
-    (https://ieeexplore.ieee.org/abstract/document/8909368) or (https://arxiv.org/pdf/1705.10702.pdf).
-
-    A Gaussian process learns the error dynamics, and state and input constraints are to ensure, with some
-    probability, that the error dynamics will remain within constraint boundaires. Note that there are a lot of
-    approximations made on the GPs, as outlined in the papers. Namely,
-        1. The previous time step mpc solution is used to compute the set constraints and gp dynamics rollout.
-           Here, the dynamics are rolled out using the Mean Equivelence method, the fastest, but least accurate.
-        2. The GP is approximated using the Fully Independent Training Conditional (FITC) outlined in
-            J. Quinonero-Candela, C. E. Rasmussen, and R. Herbrich, “A unifying ˜
-             view of sparse approximate Gaussian process regression,” Journal of
-             Machine Learning Research, vol. 6, pp. 1935–1959, 2005.
-             (https://www.jmlr.org/papers/volume6/quinonero-candela05a/quinonero-candela05a.pdf)
-        and
-            E. Snelson and Z. Ghahramani, “Sparse gaussian processes using
-             pseudo-inputs,” in Advances in Neural Information Processing Systems,
-             Y. Weiss, B. Scholkopf, and J. C. Platt, Eds., 2006, pp. 1257–1264.
-             (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.514.540&rep=rep1&type=pdf)
-          The inducing points are the previous mpc solution. These approximations are critical to making GP-MPC
-          work.
-        3. Each dimension of the learned error dynamics is an independent Zero Mean SE Kernel GP.
+    """MPC with Gaussian Process as dynamics residual. 
 
     """
 
@@ -79,37 +62,36 @@ def __init__(
             inertial_prop: list = [1.0],
             prior_param_coeff: float = 1.0,
             output_dir: str = "results/temp",
-            **kwargs):
-        """
-        Initialize GP-MPC.
+            **kwargs
+            ):
+        """Initialize GP-MPC.
 
         Args:
-            env_func (gym.Env): Functionalized initialization of the environment.
-            seed (int): Random seed (currently not used).
-            horizon (int): mpc planning horizon.
+            env_func (gym.Env): functionalized initialization of the environment.
+            seed (int): random seed.
+            horizon (int): MPC planning horizon.
             Q, R (np.array): cost weight matrix.
             use_prev_start (bool): Warmstart mpc with the previous solution.
-            train_iterations (int): The number of training examples to use for each dimension of the GP.
-            optimization_iterations (list): The number of optimization iterations for each dimension of the GP.
-            learning_rate (list): The learning rate for training each dimension of the GP.
-            normalize_training_data (bool): Normalize the training data (NOT WORKING WELL).
-            use_gpu (bool): Use GPU while training the gp.
-            gp_model_path (str): Path to a pretrained GP model. If None, will train a new one.
-            output_dir (str): Directory to store model and results.
-            prob (float): Desired probabilistic safety level.
-            inertial_prop (list): To initialize the inertial properties of the prior model.
-            prior_param_coeff (float): Constant multiplying factor to adjust the prior model intertial properties.
-            input_mask (list): List of which input dimensions to use in GP model. If None, all are used.
-            target_mask (list): List of which output dimensions to use in the GP model. If None, all are used.
+            train_iterations (int): the number of training examples to use for each dimension of the GP.
+            optimization_iterations (list): the number of optimization iterations for each dimension of the GP.
+            learning_rate (list): the learning rate for training each dimension of the GP.
+            normalize_training_data (bool): Normalize the training data.
+            use_gpu (bool): use GPU while training the gp.
+            gp_model_path (str): path to a pretrained GP model. If None, will train a new one.
+            output_dir (str): directory to store model and results.
+            prob (float): desired probabilistic safety level.
+            inertial_prop (list): to initialize the inertial properties of the prior model.
+            prior_param_coeff (float): constant multiplying factor to adjust the prior model intertial properties.
+            input_mask (list): list of which input dimensions to use in GP model. If None, all are used.
+            target_mask (list): list of which output dimensions to use in the GP model. If None, all are used.
             gp_approx (str): 'mean_eq' used mean equivalence rollout for the GP dynamics. Only one that works currently.
-            online_learning (bool): If true, GP kernel values will be updated using past trajectory values.
-            additional_constraints (list): List of Constraint objects defining additional constraints to be used.
+            online_learning (bool): if true, GP kernel values will be updated using past trajectory values.
+            additional_constraints (list): list of Constraint objects defining additional constraints to be used.
 
         """
         self.prior_env_func = partial(env_func,
                                       inertial_prop=np.array(inertial_prop)*prior_param_coeff)
         self.prior_param_coeff = prior_param_coeff
-
         # Initialize the method using linear MPC.
         self.prior_ctrl = LinearMPC(
             self.prior_env_func,
@@ -117,36 +99,27 @@ def __init__(
             q_mpc=q_mpc,
             r_mpc=r_mpc,
             use_prev_start=use_prev_start,
-            # runner args
-            # shared/base args
             output_dir=output_dir,
             additional_constraints=additional_constraints,
         )
         self.prior_ctrl.reset()
-
         super().__init__(
             self.prior_env_func,
-            # model args
             horizon=horizon,
             q_mpc=q_mpc,
             r_mpc=r_mpc,
             use_prev_start=use_prev_start,
-            # runner args
-            # shared/base args
             output_dir=output_dir,
             additional_constraints=additional_constraints,
             **kwargs)
-
-        # setup envs
+        # Setup environments.
         self.env_func = env_func
         self.env = env_func(randomized_init=False)
         self.env_training = env_func(randomized_init=True)
-
         # No training data accumulated yet so keep the dynamics function as linear prior.
         self.train_data = None
         self.prior_dynamics_func = self.prior_ctrl.linear_dynamics_func
-
-        # GP and training parameters
+        # GP and training parameters.
         self.gaussian_process = None
         self.train_iterations = train_iterations
         self.optimization_iterations = optimization_iterations
@@ -156,7 +129,6 @@ def __init__(
         self.use_gpu = use_gpu
         self.seed = seed
         self.prob = prob
-
         if input_mask is None:
             self.input_mask = np.arange(self.model.nx + self.model.nu).tolist()
         else:
@@ -167,80 +139,72 @@ def __init__(
             self.target_mask = target_mask
         Bd = np.eye(self.model.nx)
         self.Bd = Bd[:, self.target_mask]
-
         self.gp_approx = gp_approx
         self.online_learning = online_learning
         self.last_obs = None
         self.last_action = None
 
-        ## logging
-        #self.logger = ExperimentLogger(output_dir, log_file_out=True)
-
     def setup_prior_dynamics(self):
-        """Computes the LQR gain used for propograting GP uncertainty from the prior model dynamics."""
-        # Determine the LQR gain K to propogate the input uncertainty.
-        # It may be better to do this at each timestep, but that will increase comp complexity.
+        """Computes the LQR gain used for propograting GP uncertainty from the prior model dynamics.
+
+        """
+        # Determine the LQR gain K to propogate the input uncertainty (doing this at each timestep will increase complexity).
         A, B = discretize_linear_system(self.prior_ctrl.dfdx, self.prior_ctrl.dfdu, self.dt)
         Q_lqr = self.Q
         R_lqr = self.R
         P = scipy.linalg.solve_discrete_are(A, B, Q_lqr, R_lqr)
         btp = np.dot(B.T, P)
-
-        # The sign below is the correct sign (with -). I have triple checked.
         self.lqr_gain = -np.dot(np.linalg.inv(self.R + np.dot(btp, B)), np.dot(btp, A))
         self.discrete_dfdx = A
         self.discrete_dfdu = B
 
     def set_gp_dynamics_func(self):
-        """Updates symbolic dynamics with actual control frequency, 
-        initialize GP model and add to the combined dynamics.
+        """Updates symbolic dynamics.
+
+        With actual control frequency, initialize GP model and add to the combined dynamics.
+
         """
         self.setup_prior_dynamics()
-        # Compute the probabilistic constraint inverse CDF according to section III.D.b in (Hewing 2019)
+        # Compute the probabilistic constraint inverse CDF according to section III.D.b in Hewing 2019.
         self.inverse_cdf = scipy.stats.norm.ppf(1 - (1/self.model.nx - (self.prob + 1)/(2*self.model.nx)))
-
         self.create_sparse_GP_machinery()
 
     def create_sparse_GP_machinery(self):
-        """This setups the gaussian process approximations for FITC formulation. """
-        lengthscales, signal_var, noise_var, gp_K_plus_noise = self.gaussian_process.get_hyperparameters(
-            as_numpy=True)
+        """This setups the gaussian process approximations for FITC formulation.
+
+        """
+        lengthscales, signal_var, noise_var, gp_K_plus_noise = self.gaussian_process.get_hyperparameters(as_numpy=True)
         self.length_scales = lengthscales.squeeze()
         self.signal_var = signal_var.squeeze()
         self.noise_var = noise_var.squeeze()
         self.gp_K_plus_noise = gp_K_plus_noise
-
         Nx = len(self.input_mask)
         Ny = len(self.target_mask)
         N = self.gaussian_process.n_training_samples
-
-        # Create CasADI function for computing the kernel K_z_zind with parameters for z, z_ind, length scales
-        # and signal variance. The method used here is similar to that done in safe_control_gym.math_and_models.gp_functions
-        # build_gp().
-        # We need the CasADI version of this so that it can by symbolically differentiated in in the mpc optimization.
+        # Create CasADI function for computing the kernel K_z_zind with parameters for z, z_ind, length scales and signal variance.
+        # We need the CasADI version of this so that it can by symbolically differentiated in in the MPC optimization.
         z1 = cs.SX.sym('z1', Nx)
         z2 = cs.SX.sym('z2', Nx)
         ell_s = cs.SX.sym('ell', Nx)
         sf2_s = cs.SX.sym('sf2')
         z_ind  = cs.SX.sym('z_ind', self.T, Nx)
-
         covSE = cs.Function('covSE', [z1, z2, ell_s, sf2_s],
                             [covSEard(z1, z2, ell_s, sf2_s)])
-
         ks = cs.SX.zeros(1, self.T)
         for i in range(self.T):
             ks[i] = covSE(z1, z_ind[i, :], ell_s, sf2_s)
         ks_func = cs.Function('K_s', [z1, z_ind, ell_s, sf2_s], [ks])
-
         K_z_zind = cs.SX.zeros(Ny, self.T)
         for i in range(Ny):
             K_z_zind[i,:] = ks_func(z1, z_ind, self.length_scales[i,:], self.signal_var[i])
-
-        # This will be mulitplied by the mean_post_factor computed at every time step
-        # to compute the approximate mean.
+        # This will be mulitplied by the mean_post_factor computed at every time step to compute the approximate mean.
         self.K_z_zind_func = cs.Function('K_z_zind', [z1, z_ind],[K_z_zind],['z1', 'z2'],['K'])
 
-    def preprocess_training_data(self, x_seq, u_seq, x_next_seq):
+    def preprocess_training_data(self,
+                                 x_seq,
+                                 u_seq,
+                                 x_next_seq
+                                 ):
         """Converts trajectory data for GP trianing.
         
         Args:
@@ -251,33 +215,29 @@ def preprocess_training_data(self, x_seq, u_seq, x_next_seq):
         Returns:
             np.array: inputs for GP training, (N, nx+nu).
             np.array: targets for GP training, (N, nx).
+
         """
-        # Get the predicted dynamics. Recall that this is a linear prior, which means
-        # we need to account for the fact that it is linearized about an eq using
-        # self.X_GOAL and self.U_GOAL.
+        # Get the predicted dynamics. This is a linear prior, thus we need to account for the fact that
+        # it is linearized about an eq using self.X_GOAL and self.U_GOAL.
         x_pred_seq = self.prior_dynamics_func(x0=x_seq.T - self.prior_ctrl.X_LIN[:, None],
                                                p=u_seq.T - self.prior_ctrl.U_LIN[:,None])['xf'].toarray()
-
-        targets = (x_next_seq.T - (x_pred_seq+self.prior_ctrl.X_LIN[:,None])).transpose()  # (N, nx)
-        #targets = x_next_seq - x_pred_seq.T # (N, nx)
-        inputs = np.hstack([x_seq, u_seq])  # (N, nx+nu)
+        targets = (x_next_seq.T - (x_pred_seq+self.prior_ctrl.X_LIN[:,None])).transpose()  # (N, nx).
+        inputs = np.hstack([x_seq, u_seq])  # (N, nx+nu).
         return inputs, targets
 
-    def precompute_probabilistic_limits(self, print_sets=True):
+    def precompute_probabilistic_limits(self,
+                                        print_sets=True
+                                        ):
         """This updates the constraint value limits to account for the uncertainty in the dynamics rollout.
 
         Args:
             print_sets (bool): True to print out the sets for debugging purposes.
 
         """
-        # TODO: This should be divided into multiple functions. One for rolling out covaiance and another for
-        #  computing the probabilistic constraint limits (at the least).
         nx, nu = self.model.nx, self.model.nu
         T = self.T
-
         state_covariances = np.zeros((self.T+1, nx, nx))
         input_covariances = np.zeros((self.T, nu, nu))
-
         # Initilize lists for the tightening of each constraint.
         state_constraint_set = []
         for state_constraint in self.constraints.state_constraints:
@@ -285,7 +245,6 @@ def precompute_probabilistic_limits(self, print_sets=True):
         input_constraint_set = []
         for input_constraint in self.constraints.input_constraints:
             input_constraint_set.append(np.zeros((input_constraint.num_constraints, T)))
-
         if self.x_prev is not None and self.u_prev is not None:
             cov_x = np.zeros((nx, nx))
             for i in range(T):
@@ -293,7 +252,6 @@ def precompute_probabilistic_limits(self, print_sets=True):
                 cov_u = self.lqr_gain @ cov_x @ self.lqr_gain.T
                 input_covariances[i] = cov_u
                 cov_xu = cov_x @ self.lqr_gain.T
-
                 z = np.hstack((self.x_prev[:,i], self.u_prev[:,i]))
                 if self.gp_approx == 'taylor':
                     raise NotImplementedError("Taylor GP approximation is currently not working.")
@@ -302,16 +260,13 @@ def precompute_probabilistic_limits(self, print_sets=True):
                     cov_d = cov_d_tensor.detach().numpy()
                 else:
                     raise NotImplementedError('gp_approx method is incorrect or not implemented')
-
-                # loop through input constraints and tighten by the required ammount
+                # Loop through input constraints and tighten by the required ammount.
                 for ui, input_constraint in enumerate(self.constraints.input_constraints):
                     input_constraint_set[ui][:, i] = -1*self.inverse_cdf * \
                                                     np.absolute(input_constraint.A) @ np.sqrt(np.diag(cov_u))
                 for si, state_constraint in enumerate(self.constraints.state_constraints):
                     state_constraint_set[si][:, i] = -1*self.inverse_cdf * \
                                                     np.absolute(state_constraint.A) @ np.sqrt(np.diag(cov_x))
-
-
                 if self.gp_approx == 'taylor':
                     raise NotImplementedError("Taylor GP rollout not implemented.")
                 elif self.gp_approx == 'mean_eq':
@@ -323,44 +278,38 @@ def precompute_probabilistic_limits(self, print_sets=True):
                             self.Bd @ cov_d @ self.Bd.T
                 else:
                     raise NotImplementedError('gp_approx method is incorrect or not implemented')
-
             # Udate Final covariance.
             for si, state_constraint in enumerate(self.constraints.state_constraints):
                 state_constraint_set[si][:,-1] = -1 * self.inverse_cdf * \
                                                 np.absolute(state_constraint.A) @ np.sqrt(np.diag(cov_x))
-
             state_covariances[-1] = cov_x
         if print_sets:
             print("Probabilistic State Constraint values along Horizon:")
             print(state_constraint_set)
             print("Probabilistic Input Constraint values along Horizon:")
             print(input_constraint_set)
-
         self.results_dict['input_constraint_set'].append(input_constraint_set)
         self.results_dict['state_constraint_set'].append(state_constraint_set)
         self.results_dict['state_horizon_cov'].append(state_covariances)
         self.results_dict['input_horizon_cov'].append(input_covariances)
-
         return state_constraint_set, input_constraint_set
 
     def precompute_sparse_gp_values(self):
-        """Uses the last MPC solution to precomupte values associated with the FITC GP approximation."""
-        # TODO: Move to GP utils
+        """Uses the last MPC solution to precomupte values associated with the FITC GP approximation.
+
+        """
         n_data_points = self.gaussian_process.n_training_samples
         dim_gp_inputs = len(self.input_mask)
         dim_gp_outputs = len(self.target_mask)
-
-        # Get the inducing points
+        # Get the inducing points.
         if self.x_prev is not None and self.u_prev is not None:
-            # Use the previous MPC soln (as per Hewing 2019)
+            # Use the previous MPC solution as in Hewing 2019.
             z_ind = np.hstack((self.x_prev[:,:-1].T, self.u_prev.T))
             z_ind = z_ind[:,self.input_mask]
         else:
-            # If there is no previous solution. Choose T random training set points
+            # If there is no previous solution. Choose T random training set points.
             inds = self.env.np_random.choice(range(n_data_points), size=self.T)
-
             z_ind = self.data_inputs[inds][:, self.input_mask]
-
         K_zind_zind = self.gaussian_process.kernel(torch.Tensor(z_ind).double())
         K_zind_zind_inv = self.gaussian_process.kernel_inv(torch.Tensor(z_ind).double())
         K_x_zind = self.gaussian_process.kernel(torch.from_numpy(self.data_inputs[:, self.input_mask]).double(),
@@ -373,44 +322,38 @@ def precompute_sparse_gp_values(self):
         for i in range(dim_gp_outputs):
             mean_post_factor[i] = Sigma[i] @ K_x_zind[i].T @ Gamma_inv[i] @ \
                                   torch.from_numpy(self.data_targets[:,self.target_mask[i]]).double()
-
         return mean_post_factor.detach().numpy(), Sigma.detach().numpy(), K_zind_zind_inv.detach().numpy(), z_ind
 
     def setup_gp_optimizer(self):
-        """Sets up nonlinear optimization problem including 
-        cost objective, variable bounds and dynamics constraints.
-        """
+        """Sets up nonlinear optimization problem including cost objective, variable bounds and dynamics constraints.
 
+        """
         nx, nu = self.model.nx, self.model.nu
         T = self.T
-
-        # define optimizer and variables
+        # Define optimizer and variables.
         opti = cs.Opti()
-        # states
+        # States.
         x_var = opti.variable(nx, T + 1)
-        # inputs
+        # Inputs.
         u_var = opti.variable(nu, T)
-        # initial state
+        # Initial state.
         x_init = opti.parameter(nx, 1)
-        # reference (equilibrium point or trajectory, last step for terminal cost)
+        # Reference (equilibrium point or trajectory, last step for terminal cost).
         x_ref = opti.parameter(nx, T + 1)
-        # Chance constraint limits
+        # Chance constraint limits.
         state_constraint_set = []
         for state_constraint in self.constraints.state_constraints:
             state_constraint_set.append(opti.parameter(state_constraint.num_constraints, T+1))
         input_constraint_set = []
         for input_constraint in self.constraints.input_constraints:
             input_constraint_set.append(opti.parameter(input_constraint.num_constraints, T))
-
-        # Sparse GP mean postfactor matrix
+        # Sparse GP mean postfactor matrix.
         mean_post_factor = opti.parameter(len(self.target_mask), T)
-        # Sparse GP inducing points
+        # Sparse GP inducing points.
         z_ind = opti.parameter(T, len(self.input_mask))
-
-        # cost (cumulative)
+        # Cost (cumulative).
         cost = 0
         cost_func = self.model.loss
-
         for i in range(T):
             cost += cost_func(x=x_var[:, i],
                               u=u_var[:, i],
@@ -418,7 +361,7 @@ def setup_gp_optimizer(self):
                               Ur=np.zeros((nu, 1)),
                               Q=self.Q,
                               R=self.R)["l"]
-        # terminal cost
+        # Terminal cost.
         cost += cost_func(x=x_var[:, -1],
                           u=np.zeros((nu, 1)),
                           Xr=x_ref[:, -1],
@@ -426,35 +369,28 @@ def setup_gp_optimizer(self):
                           Q=self.Q,
                           R=self.R)["l"]
         opti.minimize(cost)
-
         z = cs.vertcat(x_var[:,:-1], u_var)
         z = z[self.input_mask,:]
         for i in range(self.T):
-
             # Dynamics constraints using the dynamics of the prior and the mean of the GP.
-            # This follows the tractable dynamics formulation in Section III.B in (Hewing 2019).
+            # This follows the tractable dynamics formulation in Section III.B in Hewing 2019.
             # Note that for the GP approximation, we are purposely using elementwise multiplication *.
             next_state = self.prior_dynamics_func(x0=x_var[:, i]-self.prior_ctrl.X_LIN[:,None],
                                                   p=u_var[:, i]-self.prior_ctrl.U_LIN[:,None])['xf'] + \
             self.prior_ctrl.X_LIN[:,None]+ self.Bd @ cs.sum2(self.K_z_zind_func(z1=z[:,i].T, z2=z_ind)['K'] *
                                                              mean_post_factor)
             opti.subject_to(x_var[:, i + 1] == next_state)
-
-            # Probabilistic state and input constraints according to Hewing 2019 constraint tightening
+            # Probabilistic state and input constraints according to Hewing 2019 constraint tightening.
             for s_i, state_constraint in enumerate(self.state_constraints_sym):
                 opti.subject_to(state_constraint(x_var[:, i]) <= state_constraint_set[s_i][:,i])
             for u_i, input_constraint in enumerate(self.input_constraints_sym):
                 opti.subject_to(input_constraint(u_var[:, i]) <= input_constraint_set[u_i][:,i])
-
-        # final state constraints
+        # Final state constraints.
         for s_i, state_constraint in enumerate(self.state_constraints_sym):
             opti.subject_to(state_constraint(x_var[:, -1]) <= state_constraint_set[s_i][:,-1])
-
-
-        # initial condition constraints
+        # Initial condition constraints.
         opti.subject_to(x_var[:, 0] == x_init)
-
-        # create solver (IPOPT solver for now)
+        # Create solver (IPOPT solver in this version).
         opts = {"ipopt.print_level": 4,
                 "ipopt.sb": "yes",
                 "ipopt.max_iter": 100,
@@ -473,8 +409,10 @@ def setup_gp_optimizer(self):
             "cost": cost
         }
 
-    def select_action_with_gp(self, obs):
-        """Solves nonlinear mpc problem to get next action.
+    def select_action_with_gp(self,
+                              obs
+                              ):
+        """Solves nonlinear MPC problem to get next action.
 
          Args:
              obs (np.array): current state/observation.
@@ -494,39 +432,32 @@ def select_action_with_gp(self, obs):
         mean_post_factor = opti_dict["mean_post_factor"]
         z_ind = opti_dict["z_ind"]
         cost = opti_dict["cost"]
-
-        # assign the initial state
+        # Assign the initial state.
         opti.set_value(x_init, obs)
-
-        # assign reference trajectory within horizon
+        # Assign reference trajectory within horizon.
         goal_states = self.get_references()
         opti.set_value(x_ref, goal_states)
         if self.mode == "tracking":
             self.traj_step += 1
-
-        # set the probabilistic state and input constraint set limits
+        # Set the probabilistic state and input constraint set limits.
         state_constraint_set_prev, input_constraint_set_prev = self.precompute_probabilistic_limits()
         for si in range(len(self.constraints.state_constraints)):
             opti.set_value(state_constraint_set[si], state_constraint_set_prev[si])
         for ui in range(len(self.constraints.input_constraints)):
             opti.set_value(input_constraint_set[ui], input_constraint_set_prev[ui])
-
         mean_post_factor_val, Sigma, K_zind_zind_inv, z_ind_val = self.precompute_sparse_gp_values()
         opti.set_value(mean_post_factor, mean_post_factor_val)
         opti.set_value(z_ind, z_ind_val)
-
-        # initial guess for optim problem
+        # Initial guess for the optimization problem.
         if self.warmstart and self.x_prev is not None and self.u_prev is not None:
             # shift previous solutions by 1 step
             x_guess = deepcopy(self.x_prev)
             u_guess = deepcopy(self.u_prev)
             x_guess[:, :-1] = x_guess[:, 1:]
             u_guess[:-1] = u_guess[1:]
-
             opti.set_initial(x_var, x_guess)
             opti.set_initial(u_var, u_guess)
-
-        # solve the optimization problem
+        # Solve the optimization problem.
         try:
             sol = opti.solve()
             x_val, u_val = sol.value(x_var), sol.value(u_var)
@@ -535,52 +466,49 @@ def select_action_with_gp(self, obs):
         u_val = np.atleast_2d(u_val)
         self.x_prev = x_val
         self.u_prev = u_val
-
         self.results_dict['horizon_states'].append(deepcopy(self.x_prev))
         self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev))
-
         zi = np.hstack((x_val[:,0], u_val[:,0]))
         zi = zi[self.input_mask]
         gp_contribution = np.sum(self.K_z_zind_func(z1=zi, z2=z_ind_val)['K'].toarray() * mean_post_factor_val,axis=1)
         print("GP Contribution: %s" % gp_contribution)
-        # take the first one from solved action sequence
+        # Take the first one from solved action sequence.
         if u_val.ndim > 1:
             action = u_val[:, 0]
         else:
             action = np.array([u_val[0]])
-
         self.prev_action = action,
         return action
 
-    def learn(self, input_data=None, target_data=None, gp_model=None, plot=False):
+    def learn(self,
+              input_data=None,
+              target_data=None,
+              gp_model=None,
+              plot=False
+              ):
         """Performs GP training.
 
         Args:
-            input_data, target_data (np.array): (OPTIONAL) data to use for training
-            gp_model (str): If not None, this is the path to pretrained models to use instead of training new ones.
-            plot (bool): To plot validation trajectories or not.
+            input_data, target_data (optiona, np.array): data to use for training
+            gp_model (str): if not None, this is the path to pretrained models to use instead of training new ones.
+            plot (bool): to plot validation trajectories or not.
 
         Returns:
             training_results (dict): Dictionary of the training results.
 
         """
-
         if gp_model is None:
             gp_model = self.gp_model_path
-
-        # TODO: Until sampling within constraints is implemented, do this and fix later
         self.prior_ctrl.remove_constraints(self.prior_ctrl.additional_constraints)
         self.reset()
-
         if self.online_learning:
             input_data = np.zeros((self.train_iterations, len(self.input_mask)))
             target_data = np.zeros((self.train_iterations, len(self.target_mask)))
-
         if input_data is None and target_data is None:
             train_inputs = []
             train_targets = []
             train_info = []
-            # Use Latin Hypercube Sampling to generate states withing environment bounds
+            # Use Latin Hypercube Sampling to generate states withing environment bounds.
             lhs_sampler = Lhs(lhs_type='classic', criterion='maximin')
             limits = [(self.env.INIT_STATE_RAND_INFO[key].low, self.env.INIT_STATE_RAND_INFO[key].high) for key in
                       self.env.INIT_STATE_RAND_INFO]
@@ -593,7 +521,6 @@ def learn(self, input_data=None, target_data=None, gp_model=None, plot=False):
             for i in range(self.train_iterations):
                 # For random initial state training.
                 init_state = init_state_samples[i,:]
-
                 # Collect data with prior controller.
                 run_env = self.env_func(init_state=init_state, randomized_init=False)
                 episode_results = self.prior_ctrl.run(env=run_env, max_steps=1)
@@ -601,7 +528,6 @@ def learn(self, input_data=None, target_data=None, gp_model=None, plot=False):
                 x_obs = episode_results['obs'][-3:,:]
                 u_seq = episode_results['action'][-1:,:]
                 run_env.close()
-
                 x_seq = x_obs[:-1,:]
                 x_next_seq = x_obs[1:,:]
                 train_inputs_i, train_targets_i = self.preprocess_training_data(x_seq, u_seq, x_next_seq)
@@ -610,12 +536,10 @@ def learn(self, input_data=None, target_data=None, gp_model=None, plot=False):
         else:
             train_inputs = input_data
             train_targets = target_data
-
         train_inputs = np.vstack(train_inputs)
         train_targets = np.vstack(train_targets)
         self.data_inputs = train_inputs
         self.data_targets = train_targets
-
         train_inputs_tensor = torch.Tensor(train_inputs).double()
         train_targets_tensor = torch.Tensor(train_targets).double()
         if plot:
@@ -626,7 +550,7 @@ def learn(self, input_data=None, target_data=None, gp_model=None, plot=False):
             u_seq = validation_results['action']
             x_seq = x_obs[:-1, :]
             x_next_seq = x_obs[1:, :]
-        # Define likelihood
+        # Define likelihood.
         likelihood = gpytorch.likelihoods.GaussianLikelihood(
             noise_constraint=gpytorch.constraints.GreaterThan(1e-9),
         ).double()
@@ -649,22 +573,18 @@ def learn(self, input_data=None, target_data=None, gp_model=None, plot=False):
                                     learning_rate=self.learning_rate,
                                     gpu=self.use_gpu,
                                     dir=self.output_dir)
-        # Plot Validation
+        # Plot validation.
         if plot:
-
             validation_inputs, validation_targets = self.preprocess_training_data(x_seq, u_seq, x_next_seq)
             fig_count = 0
             fig_count = self.gaussian_process.plot_trained_gp(torch.Tensor(validation_inputs).double(),
                                                               torch.Tensor(validation_targets).double(),
                                                               fig_count=fig_count)
-
         self.set_gp_dynamics_func()
         self.setup_gp_optimizer()
-        # TODO: Until sampling within constraints is fixed, need to add constraint back in
         self.prior_ctrl.add_constraints(self.prior_ctrl.additional_constraints)
         self.prior_ctrl.reset()
-
-        # collect training results
+        # Collect training results.
         training_results = {}
         training_results['train_targets'] = train_targets
         training_results['train_inputs'] = train_inputs
@@ -672,21 +592,20 @@ def learn(self, input_data=None, target_data=None, gp_model=None, plot=False):
             training_results['info'] = train_info
         except UnboundLocalError:
             training_results['info'] = None
-
         return training_results
 
-
-    def select_action(self, obs):
+    def select_action(self,
+                      obs
+                      ):
         """Select the action based on the given observation.
 
         Args:
-            obs (np.array): Current observed state
+            obs (np.array): current observed state.
 
         Returns:
-            action (np.array): Desired policy action.
+            action (np.array): desired policy action.
 
         """
-        # todo: Modify this to handle a passed in prior controller
         if self.gaussian_process is None:
             action = self.prior_ctrl.select_action(obs)
         else:
@@ -695,7 +614,6 @@ def select_action(self, obs):
                 exit()
             t1 = time.perf_counter()
             action = self.select_action_with_gp(obs)
-            #action = self.apply_linear_ancilliary_control(obs, action)
             t2 = time.perf_counter()
             print("GP SELECT ACTION TIME: %s" %(t2 - t1))
             self.last_obs = obs
@@ -703,12 +621,16 @@ def select_action(self, obs):
         return action
 
     def close(self):
-        """Cleans up resources."""
+        """Clean up.
+
+        """
         self.env_training.close()
         self.env.close()
-        #self.logger.close()
 
     def reset_results_dict(self):
+        """
+
+        """
         "Result the results_dict before running."
         super().reset_results_dict()
         self.results_dict['input_constraint_set'] = []
@@ -717,8 +639,10 @@ def reset_results_dict(self):
         self.results_dict['input_horizon_cov'] = []
 
     def reset(self):
-        """Reset the controller before running."""
-        # setup reference input
+        """Reset the controller before running.
+
+        """
+        # Setup reference input.
         if self.env.TASK == Task.STABILIZATION:
             self.mode = "stabilization"
             self.x_goal = self.env.X_GOAL
@@ -726,15 +650,12 @@ def reset(self):
             self.mode = "tracking"
             self.traj = self.env.X_GOAL.T
             self.traj_step = 0
-
-        # dynamics model
+        # Dynamics model.
         if self.gaussian_process is not None:
             self.set_gp_dynamics_func()
-
-            # casadi optimizer
+            # CasADi optimizer.
             self.setup_gp_optimizer()
         self.prior_ctrl.reset()
-
-        # previously solved states & inputs, useful for warm start
+        # Previously solved states & inputs, useful for warm start.
         self.x_prev = None
         self.u_prev = None
diff --git a/safe_control_gym/controllers/mpc/gp_mpc.yaml b/safe_control_gym/controllers/mpc/gp_mpc.yaml
index a6c9d5256..cff27d17b 100644
--- a/safe_control_gym/controllers/mpc/gp_mpc.yaml
+++ b/safe_control_gym/controllers/mpc/gp_mpc.yaml
@@ -1,7 +1,7 @@
 # Seed for LHS sampling
 seed: 1337
 
-# mpc args
+# MPC args
 horizon: 10
 r_mpc:
   - 1.
@@ -30,7 +30,7 @@ inertial_prop:
   - 1.0
 prior_param_coeff: 1.
 
-# runner args
+# Runner args
 deque_size: 10
 eval_batch_size: 10
 
diff --git a/safe_control_gym/controllers/mpc/gp_utils.py b/safe_control_gym/controllers/mpc/gp_utils.py
index 7dad37e2a..bf9a8b0e8 100644
--- a/safe_control_gym/controllers/mpc/gp_utils.py
+++ b/safe_control_gym/controllers/mpc/gp_utils.py
@@ -1,43 +1,60 @@
+"""Utility functions for Gaussian Processes.
+
+"""
 import os.path
 import numpy as np
 import gpytorch
 import torch
 import matplotlib.pyplot as plt
-from copy import deepcopy
 import casadi as ca
 
+from copy import deepcopy
+
 from safe_control_gym.utils.utils import mkdirs
 
 
-def covSEard(x, z, ell, sf2):
-    """ GP squared exponential kernel
-        This function is copied from GP-MPC library by
-        Copyright (c) 2018, Helge-André Langåker
+def covSEard(x,
+             z,
+             ell,
+             sf2
+             ):
+    """GP squared exponential kernel.
 
-        Args:
-            x (np.array or casadi.MX/SX): First vector.
-            z (np.array or casadi.MX/SX): Second vector.
-            ell (np.array or casadi.MX/SX): Length scales.
-            sf2 (float or casadi.MX/SX): output scale parameter.
+    This function is based on the 2018 GP-MPC library by Helge-André Langåker
 
-        Returns:
-            SE kernel (casadi.MX/SX): SE kernel.
+    Args:
+        x (np.array or casadi.MX/SX): First vector.
+        z (np.array or casadi.MX/SX): Second vector.
+        ell (np.array or casadi.MX/SX): Length scales.
+        sf2 (float or casadi.MX/SX): output scale parameter.
+
+    Returns:
+        SE kernel (casadi.MX/SX): SE kernel.
 
     """
     dist = ca.sum1((x - z)**2 / ell**2)
     return sf2 * ca.SX.exp(-.5 * dist)
 
+
 class ZeroMeanIndependentMultitaskGPModel(gpytorch.models.ExactGP):
-    def __init__(self, train_x, train_y, likelihood, nx):
-        """
-        Multidimension Gaussian Process model with zero mean function (or
-        constant mean...)and radial basis function kernel (SE).
+    """Multidimensional Gaussian Process model with zero mean function.
+
+    Or constant mean and radial basis function kernel (SE).
+
+    """
+
+    def __init__(self,
+                 train_x,
+                 train_y,
+                 likelihood,
+                 nx
+                 ):
+        """Initialize a multidimensional Gaussian Process model with zero mean function.
 
         Args:
-            train_x (torch.Tensor): input training data (input_dim X N samples)
-            train_y (torch.Tensor): output training data (output dim x N samples)
-            likelihood (gpytorch.likelihood): Likelihood function
-                (gpytorch.likelihoods.MultitaskGaussianLikelihood)
+            train_x (torch.Tensor): input training data (input_dim X N samples).
+            train_y (torch.Tensor): output training data (output dim x N samples).
+            likelihood (gpytorch.likelihood): Likelihood function (gpytorch.likelihoods.MultitaskGaussianLikelihood).
             nx (int): dimension of the target output (output dim)
 
         """
@@ -54,7 +71,12 @@ def __init__(self, train_x, train_y, likelihood, nx):
             ard_num_dims=train_x.shape[1]
         )
 
-    def forward(self, x):
+    def forward(self,
+                x
+                ):
+        """
+
+        """
         mean_x = self.mean_module(x)
         covar_x = self.covar_module(x)
         return gpytorch.distributions.MultitaskMultivariateNormal.from_batch_mvn(
@@ -63,16 +85,22 @@ def forward(self, x):
 
 
 class ZeroMeanIndependentGPModel(gpytorch.models.ExactGP):
-    def __init__(self, train_x, train_y, likelihood):
-        """
-        Single dimensional output Gaussian Process model with zero mean function (or
-        constant mean...) and radial basis function kernel (SE).
+    """Single dimensional output Gaussian Process model with zero mean function.
+
+    Or constant mean and radial basis function kernel (SE).
+
+    """
+    def __init__(self,
+                 train_x,
+                 train_y,
+                 likelihood
+                 ):
+        """Initialize a single dimensional Gaussian Process model with zero mean function.
 
         Args:
-            train_x (torch.Tensor): input training data (input_dim X N samples)
-            train_y (torch.Tensor): output training data (output dim x N samples)
-            likelihood (gpytorch.likelihood): Likelihood function
-                (gpytorch.likelihoods.GaussianLikelihood)
+            train_x (torch.Tensor): input training data (input_dim X N samples).
+            train_y (torch.Tensor): output training data (output dim x N samples).
+            likelihood (gpytorch.likelihood): Likelihood function (gpytorch.likelihoods.GaussianLikelihood).
 
         """
         super().__init__(train_x, train_y, likelihood)
@@ -84,29 +112,38 @@ def __init__(self, train_x, train_y, likelihood):
             ard_num_dims=train_x.shape[1]
         )
 
-    def forward(self, x):
+    def forward(self,
+                x
+                ):
+        """
+
+        """
         mean_x = self.mean_module(x)
         covar_x = self.covar_module(x)
         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
 
+
 class GaussianProcessCollection:
-    """Collection of GaussianProcesses for multioutput GPs"""
+    """Collection of GaussianProcesses for multioutput GPs.
+
+    """
+
     def __init__(self, model_type,
                  likelihood,
                  target_dim,
                  input_mask=None,
                  target_mask=None,
-                 normalize=False):
-        """
-        Creates a single GaussianProcess for each output dimension
+                 normalize=False
+                 ):
+        """Creates a single GaussianProcess for each output dimension.
 
         Args:
-            model_type (gpytorch model class): Model class for the GP (ZeroMeanIndependentGPModel)
-            likelihood (gpytorch.likelihood): likelihood function
+            model_type (gpytorch model class): Model class for the GP (ZeroMeanIndependentGPModel).
+            likelihood (gpytorch.likelihood): likelihood function.
             target_dim (int): Dimension of the output (how many GPs to make).
             input_mask (list): Input dimensions to keep. If None, use all input dimensions.
             target_mask (list): Target dimensions to keep. If None, use all target dimensions.
-            normalize (bool): If True, scale all data between -1 and 1. (prototype and not fully operational)
+            normalize (bool): If True, scale all data between -1 and 1.
 
         """
         self.gp_list = []
@@ -123,7 +160,10 @@ def __init__(self, model_type,
                                                 input_mask=input_mask,
                                                 normalize=normalize))
 
-    def _init_properties(self, train_inputs, train_targets):
+    def _init_properties(self,
+                         train_inputs,
+                         train_targets
+                         ):
         """Initialize useful properties.
 
         Args:
@@ -135,7 +175,11 @@ def _init_properties(self, train_inputs, train_targets):
         self.output_dimension = target_dimension
         self.n_training_samples = train_inputs.shape[0]
 
-    def init_with_hyperparam(self, train_inputs, train_targets, path_to_statedicts):
+    def init_with_hyperparam(self,
+                             train_inputs,
+                             train_targets,
+                             path_to_statedicts
+                             ):
         """Load hyperparameters from a state_dict.
 
         Args:
@@ -160,8 +204,12 @@ def init_with_hyperparam(self, train_inputs, train_targets, path_to_statedicts):
         gp_K_plus_noise = torch.stack(gp_K_plus_noise_list)
         self.K_plus_noise = gp_K_plus_noise
 
-    def get_hyperparameters(self, as_numpy=False):
-        """Get the outputscale and lengthscale from the kernel matrices of the GPs."""
+    def get_hyperparameters(self,
+                            as_numpy=False
+                            ):
+        """Get the outputscale and lengthscale from the kernel matrices of the GPs.
+
+        """
         lengthscale_list = []
         output_scale_list = []
         noise_list = []
@@ -169,7 +217,6 @@ def get_hyperparameters(self, as_numpy=False):
             lengthscale_list.append(gp.model.covar_module.base_kernel.lengthscale.detach())
             output_scale_list.append(gp.model.covar_module.outputscale.detach())
             noise_list.append(gp.model.likelihood.noise.detach())
-
         lengthscale = torch.cat(lengthscale_list)
         outputscale = torch.Tensor(output_scale_list)
         noise = torch.Tensor(noise_list)
@@ -178,11 +225,19 @@ def get_hyperparameters(self, as_numpy=False):
         else:
             return lengthscale, outputscale, noise, self.K_plus_noise
 
-    def train(self, train_x_raw, train_y_raw, n_train=[500], learning_rate=[0.01], gpu=False, dir='results'):
-        """
-        Train the GP using Train_x and Train_y
-        train_x: Torch tensor ( N samples [rows] by input dim [cols])
-        train_y: Torch tensor (N samples [rows] by target dim [cols]  )
+    def train(self,
+              train_x_raw,
+              train_y_raw,
+              n_train=[500],
+              learning_rate=[0.01],
+              gpu=False,
+              dir='results'
+              ):
+        """Train the GP using Train_x and Train_y.
+
+        Args:
+            train_x: Torch tensor (N samples [rows] by input dim [cols])
+            train_y: Torch tensor (N samples [rows] by target dim [cols])
 
         """
         self._init_properties(train_x_raw, train_y_raw)
@@ -208,19 +263,26 @@ def train(self, train_x_raw, train_y_raw, n_train=[500], learning_rate=[0.01], g
         gp_K_plus_noise = torch.stack(gp_K_plus_noise_list)
         self.K_plus_noise = gp_K_plus_noise
 
-    def predict(self, x, requires_grad=False, return_pred=True):
+    def predict(self,
+                x,
+                requires_grad=False,
+                return_pred=True
+                ):
         """
-        x : torch.Tensor (N_samples x input DIM)
+
+        Args:
+            x : torch.Tensor (N_samples x input DIM).
+
         Return
-            Predicitons
-            mean : torch.tensor (nx X N_samples)
-            lower : torch.tensor (nx X N_samples)
-            upper : torch.tensor (nx X N_samples)
+            Predictions
+                mean : torch.tensor (nx X N_samples).
+                lower : torch.tensor (nx X N_samples).
+                upper : torch.tensor (nx X N_samples).
+
         """
         means_list = []
         cov_list = []
         pred_list = []
-
         for gp in self.gp_list:
             if return_pred:
                 mean, cov, pred = gp.predict(x, requires_grad=requires_grad, return_pred=return_pred)
@@ -236,45 +298,58 @@ def predict(self, x, requires_grad=False, return_pred=True):
         else:
             return means, cov
 
-    def prediction_jacobian(self, query):
-        """Return Jacobian"""
+    def prediction_jacobian(self,
+                            query
+                            ):
+        """Return Jacobian.
+
+        """
         return NotImplementedError
 
-    def plot_trained_gp(self, inputs, targets, fig_count=0):
-        """Plot the trained GP given the input and target data. Useful for validation.
+    def plot_trained_gp(self,
+                        inputs,
+                        targets,
+                        fig_count=0
+                        ):
+        """Plot the trained GP given the input and target data.
+
         """
         for gp_ind, gp in enumerate(self.gp_list):
             fig_count = gp.plot_trained_gp(inputs, targets[:,gp_ind,None], fig_count)
             fig_count += 1
 
-    def _kernel_list(self, x1, x2=None):
+    def _kernel_list(self,
+                     x1,
+                     x2=None
+                     ):
         """Evaluate the kernel given vectors x1 and x2.
 
         Args:
-            x1 (torch.Tensor): First vector
-            x2 (torch.Tensor): Second vector
+            x1 (torch.Tensor): First vector.
+            x2 (torch.Tensor): Second vector.
 
-        Return:
+        Returns:
             list of LazyTensor Kernels.
 
         """
         if x2 is None:
             x2 = x1
-
         k_list = []
         for gp in self.gp_list:
             k_list.append(gp.model.covar_module(x1, x2))
-
         return k_list
 
-    def kernel(self, x1, x2=None):
+    def kernel(self,
+               x1,
+               x2=None
+               ):
         """Evaluate the kernel given vectors x1 and x2.
 
         Args:
-            x1 (torch.Tensor): First vector
-            x2 (torch.Tensor): Second vector
+            x1 (torch.Tensor): First vector.
+            x2 (torch.Tensor): Second vector.
 
-        Return:
+        Returns:
             Torch tensor of the non-lazy kernel matrices.
 
         """
@@ -282,14 +357,19 @@ def kernel(self, x1, x2=None):
         non_lazy_tensors = [k.evaluate() for k in k_list]
         return torch.stack(non_lazy_tensors)
 
-    def kernel_inv(self, x1, x2=None):
-        """Evaluate the inverse kernel given vectors x1 and x2. Only works for square kernel.
+    def kernel_inv(self,
+                   x1,
+                   x2=None
+                   ):
+        """Evaluate the inverse kernel given vectors x1 and x2.
+
+        Only works for square kernel.
 
         Args:
-            x1 (torch.Tensor): First vector
-            x2 (torch.Tensor): Second vector
+            x1 (torch.Tensor): First vector.
+            x2 (torch.Tensor): Second vector.
 
-        Return:
+        Returns:
             Torch tensor of the non-lazy inverse kernel matrices.
 
         """
@@ -298,19 +378,28 @@ def kernel_inv(self, x1, x2=None):
         assert x1.shape == x2.shape, ValueError("x1 and x2 need to have the same shape.")
         k_list = self._kernel_list(x1, x2)
         num_of_points = x1.shape[0]
-
         # Efficient inversion is performed VIA inv_matmul on the laze tensor with Identity.
         non_lazy_tensors = [k.inv_matmul(torch.eye(num_of_points).double()) for k in k_list]
         return torch.stack(non_lazy_tensors)
 
 class GaussianProcess:
-    def __init__(self, model_type, likelihood, input_mask=None, target_mask=None, normalize=False):
-        """
-        Gaussian Process decorator for gpytorch
+    """Gaussian Process decorator for gpytorch.
+
+    """
+
+    def __init__(self,
+                  model_type,
+                  likelihood,
+                  input_mask=None,
+                  target_mask=None,
+                  normalize=False
+                  ):
+        """Initialize Gaussian Process.
+       
         Args:
-            model_type (gpytorch model class): Model class for the GP (ZeroMeanIndependentMultitaskGPModel)
-            likelihood (gpytorch.likelihood): likelihood function
-            normalize (bool): If True, scale all data between -1 and 1. (prototype and not fully operational)
+            model_type (gpytorch model class): Model class for the GP (ZeroMeanIndependentMultitaskGPModel).
+            likelihood (gpytorch.likelihood): likelihood function.
+            normalize (bool): If True, scale all data between -1 and 1. (prototype and not fully operational).
 
         """
         self.model_type = model_type
@@ -321,8 +410,13 @@ def __init__(self, model_type, likelihood, input_mask=None, target_mask=None, no
         self.input_mask = input_mask
         self.target_mask = target_mask
 
-    def _init_model(self, train_inputs, train_targets):
-        """Init GP model from train inputs and train_targets"""
+    def _init_model(self,
+                    train_inputs,
+                    train_targets
+                    ):
+        """Init GP model from train inputs and train_targets.
+
+        """
         if train_targets.ndim > 1:
             target_dimension = train_targets.shape[1]
         else:
@@ -331,24 +425,32 @@ def _init_model(self, train_inputs, train_targets):
             self.model = self.model_type(train_inputs,
                                          train_targets,
                                          self.likelihood)
-
         # Extract dimensions for external use.
         self.input_dimension = train_inputs.shape[1]
         self.output_dimension = target_dimension
         self.n_training_samples = train_inputs.shape[0]
 
-    def _compute_GP_covariances(self, train_x):
-        """Compute K(X,X) + sigma*I and it's inverse for later use."""
+    def _compute_GP_covariances(self,
+                                train_x
+                                ):
+        """Compute K(X,X) + sigma*I and its inverse.
 
-        # compute inverse covariance plus noise for faster computation later
+        """
+        # Pre-compute inverse covariance plus noise to speed-up computation.
         K_lazy = self.model.covar_module(train_x.double())
         K_lazy_plus_noise = K_lazy.add_diag(self.model.likelihood.noise)
         n_samples = train_x.shape[0]
         self.model.K_plus_noise = K_lazy_plus_noise.matmul(torch.eye(n_samples).double())
         self.model.K_plus_noise_inv = K_lazy_plus_noise.inv_matmul(torch.eye(n_samples).double())
 
-    def init_with_hyperparam(self, train_inputs, train_targets, path_to_statedict):
-        """Load hyperparameters from a state_dict."""
+    def init_with_hyperparam(self,
+                             train_inputs,
+                             train_targets,
+                             path_to_statedict
+                             ):
+        """Load hyperparameters from a state_dict.
+
+        """
         if self.input_mask is not None:
             train_inputs = train_inputs[:, self.input_mask]
         if self.target_mask is not None:
@@ -360,19 +462,25 @@ def init_with_hyperparam(self, train_inputs, train_targets, path_to_statedict):
         self.model.double() # needed otherwise loads state_dict as float32
         self._compute_GP_covariances(train_inputs)
 
-    def train(self, train_x_raw, train_y_raw, n_train=500, learning_rate=0.01, gpu=False, fname='best_model.pth'):
-        """
-        Train the GP using Train_x and Train_y
-        train_x: Torch tensor ( N samples [rows] by input dim [cols])
-        train_y: Torch tensor (N samples [rows] by target dim [cols]  )
+    def train(self,
+              train_x_raw,
+              train_y_raw,
+              n_train=500,
+              learning_rate=0.01,
+              gpu=False,
+              fname='best_model.pth'
+              ):
+        """Train the GP using Train_x and Train_y.
 
-        """
+        Args:
+            train_x: Torch tensor (N samples [rows] by input dim [cols])
+            train_y: Torch tensor (N samples [rows] by target dim [cols])
 
+        """
         if self.input_mask is not None:
             train_x_raw = train_x_raw[:, self.input_mask]
         if self.target_mask is not None:
             train_y_raw = train_y_raw[:, self.target_mask]
-
         if self.NORMALIZE:
             self.scale_normalization = 2/(train_x_raw.max(0)[0] - train_x_raw.min(0)[0])
             self.scale_shift = (-train_x_raw.max(0)[0] - train_x_raw.min(0)[0])/(train_x_raw.max(0)[0] -
@@ -382,19 +490,16 @@ def train(self, train_x_raw, train_y_raw, n_train=500, learning_rate=0.01, gpu=F
         else:
             train_x = train_x_raw
             train_y = train_y_raw
-
         self._init_model(train_x, train_y)
         if gpu:
             train_x = train_x.cuda()
             train_y = train_y.cuda()
             self.model = self.model.cuda()
             self.likelihood = self.likelihood.cuda()
-
         self.model.double()
         self.likelihood.double()
         self.model.train()
         self.likelihood.train()
-        # Uncomment two lines below to fix likelihood noise
         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
         mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
         last_loss = 99999999
@@ -415,7 +520,6 @@ def train(self, train_x_raw, train_y_raw, n_train=500, learning_rate=0.01, gpu=F
                 best_epoch = i
 
             i+=1
-
         print("Training Complete")
         print("Lowest epoch: %s" %best_epoch)
         print("Lowest Loss: %s" % best_loss)
@@ -426,28 +530,31 @@ def train(self, train_x_raw, train_y_raw, n_train=500, learning_rate=0.01, gpu=F
         self.model.load_state_dict(torch.load(fname))
         self._compute_GP_covariances(train_x)
 
-    def predict(self, x, requires_grad=False, return_pred=True):
-        """
-        x : torch.Tensor (N_samples x input DIM)
-        Return
-            Predicitons
-            mean : torch.tensor (nx X N_samples)
-            lower : torch.tensor (nx X N_samples)
-            upper : torch.tensor (nx X N_samples)
+    def predict(self,
+                x,
+                requires_grad=False,
+                return_pred=True
+                ):
         """
 
+        Args:
+            x : torch.Tensor (N_samples x input DIM).
+
+        Returns:
+            Predictions
+                mean : torch.tensor (nx X N_samples).
+                lower : torch.tensor (nx X N_samples).
+                upper : torch.tensor (nx X N_samples).
 
+        """
         self.model.eval()
         self.likelihood.eval()
         if type(x) is np.ndarray:
             x = torch.from_numpy(x).double()
-
         if self.input_mask is not None:
             x = x[:,self.input_mask]
-
         if self.NORMALIZE:
             x = self.normalize(x)
-
         if requires_grad:
             predictions = self.likelihood(self.model(x))
             mean = predictions.mean
@@ -462,17 +569,21 @@ def predict(self, x, requires_grad=False, return_pred=True):
         else:
             return mean, cov
 
-    def prediction_jacobian(self, query):
+    def prediction_jacobian(self,
+                            query
+                            ):
         mean_der, cov_der = torch.autograd.functional.jacobian(
                                 lambda x: self.predict(x, requires_grad=True, return_pred=False),
                                 query.double())
         return mean_der.detach().squeeze()
 
-    def plot_trained_gp(self, inputs, targets, fig_count=0):
-
+    def plot_trained_gp(self,
+                        inputs,
+                        targets,
+                        fig_count=0
+                        ):
         if self.target_mask is not None:
             targets = targets[:, self.target_mask]
-
         means, covs, preds = self.predict(inputs)
         t = np.arange(inputs.shape[0])
         lower, upper = preds.confidence_region()
@@ -492,24 +603,34 @@ def plot_trained_gp(self, inputs, targets, fig_count=0):
             plt.xlabel('Time (s)')
             plt.ylabel('v')
             plt.show()
-
         return fig_count
 
-    def normalize(self, vector):
+    def normalize(self,
+                  vector
+                  ):
         dim = vector.shape[1]
         return normalize(vector,
                          self.scale_normalization[:dim],
                          self.scale_shift[:dim])
 
-    def unnormalize(self, vector):
+    def unnormalize(self,
+                    vector
+                    ):
         dim = vector.shape[1]
         return unnormalize(vector,
                            self.scale_normalization[:dim],
                            self.scale_shift[:dim])
 
 
-def normalize(vector, a, b):
+def normalize(vector,
+              a,
+              b
+              ):
     return vector*a + b
 
-def unnormalize(vector, a, b):
+
+def unnormalize(vector,
+                a,
+                b
+                ):
     return (vector - b)/a
diff --git a/safe_control_gym/controllers/mpc/linear_mpc.py b/safe_control_gym/controllers/mpc/linear_mpc.py
index ec7d21674..5694f2b7a 100644
--- a/safe_control_gym/controllers/mpc/linear_mpc.py
+++ b/safe_control_gym/controllers/mpc/linear_mpc.py
@@ -1,53 +1,33 @@
-"""Linear Model Predictive Control
+"""Linear Model Predictive Control.
 
-Example: 
-    run linear mpc on cartpole balance::
-    
-        $ python tests/test_main.py --mode test_policy --exp_id linear_mpc_cartpole \
-        --algo linear_mpc --task cartpole_200 --horizon 50 --q_mpc 100 0 10 0
-
-    run linear mpc on cartpole balance::
-    
-        $ python tests/test_main.py --mode test_policy --exp_id linear_mpc_quad \
-        --algo linear_mpc --task quadrotor --ctrl_time_mult 10 --horizon 50 \
-        --use_prev_start {--shooting}
-
-Todo: 
-    * better initialization of operating points per mpc step.
+Based on:
+    * https://people.eecs.berkeley.edu/~pabbeel/cs287-fa12/slides/LQR.pdf
+    * https://pythonrobotics.readthedocs.io/en/latest/modules/path_tracking.html#mpc-modeling 
+    * https://github.com/AtsushiSakai/PythonRobotics/blob/master/PathTracking/model_predictive_speed_and_steer_control/model_predictive_speed_and_steer_control.py
 
 """
-from sys import platform
 import numpy as np
 import casadi as cs
+
+from sys import platform
 from copy import deepcopy
 
 from safe_control_gym.controllers.mpc.mpc import MPC
 from safe_control_gym.envs.benchmark_env import Task
 
-# -----------------------------------------------------------------------------------
-#                   Linear MPC
-# -----------------------------------------------------------------------------------
-
 
 class LinearMPC(MPC):
     """ Simple linear MPC.
-
-    reference:
-    - https://people.eecs.berkeley.edu/~pabbeel/cs287-fa12/slides/LQR.pdf
-    - https://pythonrobotics.readthedocs.io/en/latest/modules/path_tracking.html#mpc-modeling 
-    - https://github.com/AtsushiSakai/PythonRobotics/blob/d391cdbb8c82f9c3e643f4c2fd95e85d03b47c9e/PathTracking/model_predictive_speed_and_steer_control/model_predictive_speed_and_steer_control.py
+    
     """
 
     def __init__(
             self,
             env_func,
-            # model args
             horizon=5,
             q_mpc=[1],
             r_mpc=[1],
             warmstart=True,
-            # runner args
-            # shared/base args
             output_dir="results/temp",
             additional_constraints=[],
             **kwargs):
@@ -60,14 +40,13 @@ def __init__(
             r_mpc (list): diagonals of input/action cost weight.
             warmstart (bool): if to initialize from previous iteration.
             output_dir (str): output directory to write logs and results.
-            additional_constraints (list): List of constraints
+            additional_constraints (list): list of constraints.
 
         """
-        # all params/args (lazy hack)
+        # Store all params/args.
         for k, v in locals().items():
             if k != "self" and k != "kwargs" and "__" not in k:
                 self.__dict__[k] = v
-
         super().__init__(
             env_func,
             horizon=horizon,
@@ -78,21 +57,19 @@ def __init__(
             additional_constraints=additional_constraints,
             **kwargs
         )
-
-        # todo: setup environment equilibrium
         self.X_LIN = np.atleast_2d(self.env.X_GOAL)[0,:].T
         self.U_LIN = np.atleast_2d(self.env.U_GOAL)[0,:]
 
-
     def set_dynamics_func(self):
-        """Updates symbolic dynamics with actual control frequency."""
-        # original version, used in shooting
+        """Updates symbolic dynamics with actual control frequency.
+
+        """
+        # Original version, used in shooting.
         dfdxdfdu = self.model.df_func(x=self.X_LIN, u=self.U_LIN)
         dfdx = dfdxdfdu['dfdx'].toarray()
         dfdu = dfdxdfdu['dfdu'].toarray()
         delta_x = cs.MX.sym('delta_x', self.model.nx,1)
         delta_u = cs.MX.sym('delta_u', self.model.nu,1)
-
         x_dot_lin_vec = dfdx @ delta_x + dfdu @ delta_u
         self.linear_dynamics_func = cs.integrator(
             'linear_discrete_dynamics', self.model.integration_algo,
@@ -102,30 +79,28 @@ def set_dynamics_func(self):
                 'ode': x_dot_lin_vec
             }, {'tf': self.dt}
         )
-
         self.dfdx = dfdx
         self.dfdu = dfdu
 
-
     def setup_optimizer(self):
-        """Sets up convex optimization problem including
-        cost objective, variable bounds and dynamics constraints
+        """Sets up convex optimization problem.
+
+        Including cost objective, variable bounds and dynamics constraints.
+
         """
         nx, nu = self.model.nx, self.model.nu
         T = self.T
-
-        # define optimizer and variables
+        # Define optimizer and variables.
         opti = cs.Opti()
-        # states
+        # States.
         x_var = opti.variable(nx, T + 1)
-        # inputs
+        # Inputs.
         u_var = opti.variable(nu, T)
-        # initial state
+        # Initial state.
         x_init = opti.parameter(nx, 1)
-        # reference (equilibrium point or trajectory, last step for terminal cost)
+        # Reference (equilibrium point or trajectory, last step for terminal cost).
         x_ref = opti.parameter(nx, T + 1)
-
-        # cost (cumulative)
+        # Cost (cumulative).
         cost = 0
         cost_func = self.model.loss
         for i in range(T):
@@ -135,7 +110,7 @@ def setup_optimizer(self):
                               Ur=np.zeros((nu, 1)),
                               Q=self.Q,
                               R=self.R)["l"]
-        # terminal cost
+        # Terminal cost.
         cost += cost_func(x=x_var[:, -1]+self.X_LIN[:,None],
                           u=np.zeros((nu, 1))+self.U_LIN[:, None],
                           Xr=x_ref[:, -1],
@@ -143,26 +118,21 @@ def setup_optimizer(self):
                           Q=self.Q,
                           R=self.R)["l"]
         opti.minimize(cost)
-
         for i in range(self.T):
-            # dynamics constraints
+            # Dynamics constraints.
             next_state = self.linear_dynamics_func(x0=x_var[:, i], p=u_var[:,i])['xf']
             opti.subject_to(x_var[:, i + 1] == next_state)
-
-            # State and input constraints
+            # State and input constraints.
             for state_constraint in self.state_constraints_sym:
                 opti.subject_to(state_constraint(x_var[:,i] + self.X_LIN.T) < 0)
             for input_constraint in self.input_constraints_sym:
                 opti.subject_to(input_constraint(u_var[:,i] + self.U_LIN.T) < 0)
-
-        # final state constraints
+        # Final state constraints.
         for state_constraint in self.state_constraints_sym:
             opti.subject_to(state_constraint(x_var[:,-1] + self.X_LIN.T)  < 0)
-
-        # initial condition constraints
+        # Initial condition constraints.
         opti.subject_to(x_var[:, 0] == x_init)
-
-        # create solver (IPOPT solver for now )
+        # Create solver (IPOPT solver in this version).
         opts = {}
         if platform == "linux":
             opti.solver('sqpmethod', opts)
@@ -181,18 +151,20 @@ def setup_optimizer(self):
             "cost": cost
         }
 
-    def select_action(self, obs):
-        """Solve nonlinear mpc problem to get next action
+    def select_action(self,
+                      obs
+                      ):
+        """Solve nonlinear mpc problem to get next action.
         
         Args:
             obs (np.array): current state/observation. 
         
         Returns:
             action (np.array): input/action to the task/env.
+
         """
         nx, nu = self.model.nx, self.model.nu
         T = self.T
-
         opti_dict = self.opti_dict
         opti = opti_dict["opti"]
         x_var = opti_dict["x_var"]
@@ -200,30 +172,24 @@ def select_action(self, obs):
         x_init = opti_dict["x_init"]
         x_ref = opti_dict["x_ref"]
         cost = opti_dict["cost"]
-
-        # assign the initial state
+        # Assign the initial state.
         opti.set_value(x_init, obs-self.X_LIN)
-
-        # assign reference trajectory within horizon
+        # Assign reference trajectory within horizon.
         goal_states = self.get_references()
         opti.set_value(x_ref, goal_states)
         if self.env.TASK == Task.TRAJ_TRACKING:
             self.traj_step += 1
-
         if self.warmstart and self.u_prev is not None and self.x_prev is not None:
             opti.set_initial(x_var, self.x_prev)
             opti.set_initial(u_var, self.u_prev)
-
-        # solve the optimization problem
+        # Solve the optimization problem.
         try:
             sol = opti.solve()
             x_val, u_val = sol.value(x_var), sol.value(u_var)
             self.x_prev = x_val
             self.u_prev = u_val
-
             self.results_dict['horizon_states'].append(deepcopy(self.x_prev) + self.X_LIN[:, None])
             self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev) + self.U_LIN[:, None])
-
         except RuntimeError as e:
             print(e)
             return_status = opti.return_status()
@@ -233,14 +199,11 @@ def select_action(self, obs):
             elif return_status == 'Maximum_Iterations_Exceeded':
                 self.terminate_loop = True
                 u_val = opti.debug.value(u_var)
-
-        # take first one from solved action sequence
+        # Take first one from solved action sequence.
         if u_val.ndim > 1:
             action = u_val[:, 0]
         else:
             action = np.array([u_val[0]])
         action += self.U_LIN
         self.prev_action = action
-
         return action
-
diff --git a/safe_control_gym/controllers/mpc/linear_mpc.yaml b/safe_control_gym/controllers/mpc/linear_mpc.yaml
index 3bca31ecf..59663e3d7 100644
--- a/safe_control_gym/controllers/mpc/linear_mpc.yaml
+++ b/safe_control_gym/controllers/mpc/linear_mpc.yaml
@@ -1,4 +1,4 @@
-# mpc args
+# MPC args
 horizon: 10
 r_mpc:
   - 1.
@@ -6,8 +6,6 @@ q_mpc:
   - 1.
 use_prev_start: True
 
-# runner args
+# Runner args
 deque_size: 10
 eval_batch_size: 10
-
-
diff --git a/safe_control_gym/controllers/mpc/mpc.py b/safe_control_gym/controllers/mpc/mpc.py
index a4b79d597..aca2c31c6 100644
--- a/safe_control_gym/controllers/mpc/mpc.py
+++ b/safe_control_gym/controllers/mpc/mpc.py
@@ -1,23 +1,9 @@
-"""Model Predictive Control
+"""Model Predictive Control.
 
-Example: 
-    run mpc on cartpole balance::
-    
-        $ python tests/test_main.py --mode test_policy --exp_id mpc_cartpole \
-        --algo mpc --task cartpole_200 --horizon 50 --q_mpc 100 0 10 0 
-        
-    run mpc on quadrotor hovering::
-    
-        $ python tests/test_main.py --mode test_policy --exp_id mpc_quad \
-        --algo mpc --task quadrotor --ctrl_time_mult 10 --horizon 50 
-
-Todo:
-    * All for additional constraints to be added externally?
-    * Remove eval batch?
-    * Terminal Constraints? Talk to Lukas about how best to do this.
 """
 import numpy as np
 import casadi as cs
+
 from copy import deepcopy
 
 from safe_control_gym.controllers.base_controller import BaseController
@@ -26,26 +12,22 @@
 from safe_control_gym.envs.constraints import ConstraintList, GENERAL_CONSTRAINTS, create_ConstraintList_from_dict
 
 
-# -----------------------------------------------------------------------------------
-#                   (Nonlinear) MPC
-# -----------------------------------------------------------------------------------
+class MPC(BaseController):
+    """MPC with full nonlinear model.
 
+    """
 
-class MPC(BaseController):
-    """MPC with full nonlinear model."""
     def __init__(
             self,
             env_func,
-            # model args
             horizon=5,
             q_mpc=[1],
             r_mpc=[1],
             warmstart=True,
-            # runner args
-            # shared/base args
             output_dir="results/temp",
             additional_constraints=None,
-            **kwargs):
+            **kwargs
+            ):
         """Creates task and controller.
 
         Args:
@@ -61,10 +43,8 @@ def __init__(
         for k, v in locals().items():
             if k != "self" and k != "kwargs" and "__" not in k:
                 self.__dict__.update({k: v})
-
-        # task
+        # Task.
         self.env = env_func()
-
         if additional_constraints is not None:
             additional_ConstraintsList = create_ConstraintList_from_dict(additional_constraints,
                                                                          GENERAL_CONSTRAINTS,
@@ -74,23 +54,21 @@ def __init__(
         else:
             self.reset_constraints(self.env.constraints.constraints)
             self.additional_constraints = []
-
-        # model params
+        # Model parameters
         self.model = self.env.symbolic
         self.dt = self.model.dt
-
         self.T = horizon
         self.Q = get_cost_weight_matrix(self.q_mpc, self.model.nx)
         self.R = get_cost_weight_matrix(self.r_mpc, self.model.nu)
 
-        # logging
-        #self.logger = ExperimentLogger(output_dir)
-
-    def reset_constraints(self, constraints):
+    def reset_constraints(self,
+                          constraints
+                          ):
         """ Setup the constraints list.
 
         Args:
-            constraints (list): List of constraints controller is subject too.
+            constraints (list): List of constraints controller is subject to.
+
         """
         self.constraints = ConstraintList(constraints)
         self.state_constraints_sym = self.constraints.get_state_constraint_symbolic_models()
@@ -98,93 +76,102 @@ def reset_constraints(self, constraints):
         if len(self.constraints.input_state_constraints) > 0:
             raise NotImplementedError('MPC cannot handle combined state input constraints yet.')
 
-    def add_constraints(self, constraints):
-        """Add the constraints in provided list to the system
+    def add_constraints(self,
+                        constraints
+                        ):
+        """Add the constraints (from a list) to the system.
 
         Args:
             constraints (list): List of constraints controller is subject too.
+
         """
         self.reset_constraints(constraints + self.constraints.constraints)
 
-    def remove_constraints(self, constraints):
+    def remove_constraints(self,
+                           constraints
+                           ):
         """Remove constraints from the current constraint list.
 
         Args:
-            constraints (list): list of Constraints to be removed
+            constraints (list): list of constraints to be removed.
+
         """
         old_constraints_list = self.constraints.constraints
-
         for constraint in constraints:
             assert constraint in self.constraints.constraints,\
                 ValueError("This constraint is not in the current list of constraints")
             old_constraints_list.remove(constraint)
         self.reset_constraints(old_constraints_list)
 
-
     def close(self):
-        """Cleans up resources."""
+        """Cleans up resources.
+
+        """
         self.env.close()
-        #self.logger.close()
 
     def reset(self):
-        """Prepares for training or evaluation."""
-        # setup reference input
+        """Prepares for training or evaluation.
+
+        """
+        # Setup reference input.
         if self.env.TASK == Task.STABILIZATION:
             self.mode = "stabilization"
             self.x_goal = self.env.X_GOAL
         elif self.env.TASK == Task.TRAJ_TRACKING:
             self.mode = "tracking"
             self.traj = self.env.X_GOAL.T
-            # step along the reference
+            # Step along the reference.
             self.traj_step = 0
-
-        # dynamics model
+        # Dynamics model.
         self.set_dynamics_func()
-
-        # casadi optimizer
+        # CasADi optimizer.
         self.setup_optimizer()
-        # previously solved states & inputs, useful for warm start
+        # Previously solved states & inputs, useful for warm start.
         self.x_prev = None
         self.u_prev = None
 
         self.reset_results_dict()
 
     def set_dynamics_func(self):
-        """Updates symbolic dynamics with actual control frequency."""
-        self.dynamics_func = cs.integrator('fd', self.model.integration_algo, {
-            'x': self.model.x_sym,
-            'p': self.model.u_sym,
-            'ode': self.model.x_dot
-        }, {'tf': self.dt})
+        """Updates symbolic dynamics with actual control frequency.
+
+        """
+        self.dynamics_func = cs.integrator('fd', self.model.integration_algo,
+                                           {
+                                            'x': self.model.x_sym,
+                                            'p': self.model.u_sym,
+                                            'ode': self.model.x_dot
+                                            },
+                                           {'tf': self.dt})
 
     def setup_optimizer(self):
-        """Sets up nonlinear optimization problem."""
+        """Sets up nonlinear optimization problem.
+
+        """
         nx, nu = self.model.nx, self.model.nu
         T = self.T
-
-        # define optimizer and variables
+        # Define optimizer and variables.
         opti = cs.Opti()
-        # states
+        # States.
         x_var = opti.variable(nx, T + 1)
-        # inputs
+        # Inputs.
         u_var = opti.variable(nu, T)
-        # initial state
+        # Initial state.
         x_init = opti.parameter(nx, 1)
-        # reference (equilibrium point or trajectory, last step for terminal cost)
+        # Reference (equilibrium point or trajectory, last step for terminal cost).
         x_ref = opti.parameter(nx, T + 1)
-
-        # cost (cumulative)
+        # Cost (cumulative).
         cost = 0
         cost_func = self.model.loss
         for i in range(T):
-            # can ignore first state cost (since fist x_var == x_init)
+            # Can ignore the first state cost since fist x_var == x_init.
             cost += cost_func(x=x_var[:, i],
                               u=u_var[:, i],
                               Xr=x_ref[:, i],
                               Ur=np.zeros((nu, 1)),
                               Q=self.Q,
                               R=self.R)["l"]
-        # terminal cost
+        # Terminal cost.
         cost += cost_func(x=x_var[:, -1],
                           u=np.zeros((nu, 1)),
                           Xr=x_ref[:, -1],
@@ -192,28 +179,21 @@ def setup_optimizer(self):
                           Q=self.Q,
                           R=self.R)["l"]
         opti.minimize(cost)
-
-        # constraints
+        # Constraints
         for i in range(self.T):
-            # dynamics constraints
+            # Dynamics constraints.
             next_state = self.dynamics_func(x0=x_var[:, i], p=u_var[:, i])['xf']
             opti.subject_to(x_var[:, i + 1] == next_state)
-
             for state_constraint in self.state_constraints_sym:
                 opti.subject_to(state_constraint(x_var[:,i]) < 0)
             for input_constraint in self.input_constraints_sym:
                 opti.subject_to(input_constraint(u_var[:,i]) < 0)
-
-        # final state constraints
-        # todo: Make this optional Terminal State Constraint
+        # Final state constraints.
         for state_constraint in self.state_constraints_sym:
             opti.subject_to(state_constraint(x_var[:, i]) < 0)
-
-        # initial condition constraints
+        # Initial condition constraints.
         opti.subject_to(x_var[:, 0] == x_init)
-
-        # create solver (IPOPT solver for now)
-        # reference: https://groups.google.com/g/casadi-users/c/a9OGbF7R4ss
+        # Create solver (IPOPT solver in this version)
         opts = {"ipopt.print_level": 0, "ipopt.sb": "yes", "print_time": 0}
         opti.solver('ipopt', opts)
         self.opti_dict = {
@@ -225,7 +205,9 @@ def setup_optimizer(self):
             "cost": cost
         }
 
-    def select_action(self, obs):
+    def select_action(self,
+                      obs
+                      ):
         """Solves nonlinear mpc problem to get next action.
 
         Args:
@@ -242,37 +224,30 @@ def select_action(self, obs):
         x_init = opti_dict["x_init"]
         x_ref = opti_dict["x_ref"]
         cost = opti_dict["cost"]
-
-        # assign the initial state
+        # Assign the initial state.
         opti.set_value(x_init, obs)
-
-        # assign reference trajectory within horizon
+        # Assign reference trajectory within horizon.
         goal_states = self.get_references()
         opti.set_value(x_ref, goal_states)
         if self.mode == "tracking":
             self.traj_step += 1
-
-        # initial guess for optim problem
+        # Initial guess for optimization problem.
         if self.warmstart and self.x_prev is not None and self.u_prev is not None:
             # shift previous solutions by 1 step
             x_guess = deepcopy(self.x_prev)
             u_guess = deepcopy(self.u_prev)
             x_guess[:, :-1] = x_guess[:, 1:]
             u_guess[:-1] = u_guess[1:]
-
             opti.set_initial(x_var, x_guess)
             opti.set_initial(u_var, u_guess)
-
-        # solve the optimization problem
+        # Solve the optimization problem.
         sol = opti.solve()
         x_val, u_val = sol.value(x_var), sol.value(u_var)
         self.x_prev = x_val
         self.u_prev = u_val
-
         self.results_dict['horizon_states'].append(deepcopy(self.x_prev))
         self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev))
-
-        # take the first one from solved action sequence
+        # Take the first action from the solved action sequence.
         if u_val.ndim > 1:
             action = u_val[:, 0]
         else:
@@ -281,30 +256,33 @@ def select_action(self, obs):
         return action
 
     def get_references(self):
-        """Constructs reference states along mpc horizon.(nx, T+1)"""
+        """Constructs reference states along mpc horizon.(nx, T+1).
+
+        """
         if self.env.TASK == Task.STABILIZATION:
-            # repeat goal state for horizon steps
+            # Repeat goal state for horizon steps.
             goal_states = np.tile(self.env.X_GOAL.reshape(-1, 1), (1, self.T + 1))
         elif self.env.TASK == Task.TRAJ_TRACKING:
-            # slice trajectory for horizon steps,
-            # if not long enough, repeat last state for remaining steps
+            # Slice trajectory for horizon steps, if not long enough, repeat last state.
             start = min(self.traj_step, self.traj.shape[-1])
             end = min(self.traj_step + self.T + 1, self.traj.shape[-1])
             remain = max(0, self.T + 1 - (end - start))
-
             goal_states = np.concatenate([
                 self.traj[:, start:end],
                 np.tile(self.traj[:, -1:], (1, remain))
             ], -1)
         else:
             raise Exception("Reference for this mode is not implemented.")
-        return goal_states  # (nx, T+1)
+        return goal_states  # (nx, T+1).
 
     def reset_results_dict(self):
+        """
+
+        """
         self.results_dict = { 'obs': [],
                               'reward': [],
                               'done': [],
-                              'info': [], # ignore initial info so we can save easier
+                              'info': [],
                               'action': [],
                               'horizon_inputs': [],
                               'horizon_states': []
@@ -314,7 +292,8 @@ def run(self,
             env=None,
             render=False,
             logging=False,
-            max_steps=100):
+            max_steps=100
+            ):
         """Runs evaluation with current policy.
         
         Args:
@@ -327,10 +306,8 @@ def run(self,
         """
         if env is None:
             env = self.env
-
         self.x_prev = None
         self.u_prev = None
-
         obs, info = env.reset()
         print("Init State:")
         print(obs)
@@ -338,7 +315,6 @@ def run(self,
         frames = []
         self.reset_results_dict()
         self.results_dict['obs'].append(obs)
-
         i = 0
         if self.env.TASK == Task.STABILIZATION:
             MAX_STEPS = max_steps
@@ -346,7 +322,6 @@ def run(self,
             MAX_STEPS = self.traj.shape[1]
         else:
             raise("Undefined Task")
-
         self.terminate_loop = False
         while np.linalg.norm(obs - env.X_GOAL) > 1e-3 and\
                 i < MAX_STEPS and\
@@ -355,15 +330,13 @@ def run(self,
             if self.terminate_loop:
                 print("Infeasible MPC Problem")
                 break
-
-            # repeat input for more efficient control
+            # Repeat input for more efficient control.
             obs, reward, done, info = env.step(action)
             self.results_dict['obs'].append(obs)
             self.results_dict['reward'].append(reward)
             self.results_dict['done'].append(done)
             self.results_dict['info'].append(info)
             self.results_dict['action'].append(action)
-
             print(i, '-th step.')
             print(action)
             print(obs)
@@ -371,14 +344,11 @@ def run(self,
             print(done)
             print(info)
             print()
-
             if render:
                 env.render()
                 frames.append(env.render("rgb_array"))
-
             i += 1
-
-        # collect evaluation results
+        # Collect evaluation results.
         ep_lengths = np.asarray(ep_lengths)
         ep_returns = np.asarray(ep_returns)
         if logging:
@@ -386,10 +356,7 @@ def run(self,
             msg += "eval_ep_length {:.2f} +/- {:.2f} | eval_ep_return {:.3f} +/- {:.3f}\n".format(
                 ep_lengths.mean(), ep_lengths.std(), ep_returns.mean(),
                 ep_returns.std())
-            #self.logger.info(msg + "\n")
-
         self.results_dict['obs'] = np.vstack(self.results_dict['obs'])
         self.results_dict['reward'] = np.vstack(self.results_dict['reward'])
         self.results_dict['action'] = np.vstack(self.results_dict['action'])
-
         return deepcopy(self.results_dict)
diff --git a/safe_control_gym/controllers/mpc/mpc.yaml b/safe_control_gym/controllers/mpc/mpc.yaml
index 3bca31ecf..59663e3d7 100644
--- a/safe_control_gym/controllers/mpc/mpc.yaml
+++ b/safe_control_gym/controllers/mpc/mpc.yaml
@@ -1,4 +1,4 @@
-# mpc args
+# MPC args
 horizon: 10
 r_mpc:
   - 1.
@@ -6,8 +6,6 @@ q_mpc:
   - 1.
 use_prev_start: True
 
-# runner args
+# Runner args
 deque_size: 10
 eval_batch_size: 10
-
-
diff --git a/safe_control_gym/controllers/mpc/mpc_utils.py b/safe_control_gym/controllers/mpc/mpc_utils.py
index 19c1a650e..25f70874f 100644
--- a/safe_control_gym/controllers/mpc/mpc_utils.py
+++ b/safe_control_gym/controllers/mpc/mpc_utils.py
@@ -1,14 +1,15 @@
-"""Module descripion.
+"""General MPC utility functions.
 
 """
 import numpy as np
 import scipy
 
 
-def get_cost_weight_matrix(weights, dim):
+def get_cost_weight_matrix(weights,
+                           dim
+                           ):
     """Gets weight matrix from input args.
 
-
     """
     if len(weights) == dim:
         W = np.diag(weights)
@@ -19,28 +20,31 @@ def get_cost_weight_matrix(weights, dim):
     return W
 
 
-def discretize_linear_system(A, B, dt, exact=False):
-    """ discretization of a linear system 
+def discretize_linear_system(A,
+                             B,
+                             dt,
+                             exact=False
+                             ):
+    """Discretize a linear system.
     
-    dx/dt = A x + B u 
+    dx/dt = A x + B u
     --> xd[k+1] = Ad xd[k] + Bd ud[k] where xd[k] = x(k*dt)
 
     Args:
-        A: np.array, system transition matrix  
-        B: np.array, input matrix 
-        dt: scalar, step time interval 
-        exact: bool, if to use exact discretization 
+        A: np.array, system transition matrix.
+        B: np.array, input matrix.
+        dt: scalar, step time interval.
+        exact: bool, if to use exact discretization.
 
     Returns:
-        discretized matrices Ad, Bd 
+        Discretized matrices Ad, Bd.
+
     """
     state_dim, input_dim = A.shape[1], B.shape[1]
-
     if exact:
         M = np.zeros((state_dim + input_dim, state_dim + input_dim))
         M[:state_dim, :state_dim] = A
         M[:state_dim, state_dim:] = B
-
         Md = scipy.linalg.expm(M * dt)
         Ad = Md[:state_dim, :state_dim]
         Bd = Md[:state_dim, state_dim:]
@@ -48,5 +52,4 @@ def discretize_linear_system(A, B, dt, exact=False):
         I = np.eye(state_dim)
         Ad = I + A * dt
         Bd = B * dt
-
     return Ad, Bd
diff --git a/safe_control_gym/controllers/mpsc/mpsc.py b/safe_control_gym/controllers/mpsc/mpsc.py
index 9791d1323..a3e9f4908 100644
--- a/safe_control_gym/controllers/mpsc/mpsc.py
+++ b/safe_control_gym/controllers/mpsc/mpsc.py
@@ -1,21 +1,21 @@
-"""Model Predictive Safety Certification (MPSC)
+"""Model Predictive Safety Certification (MPSC).
 
-Based on
-K.P. Wabsersich and M.N. Zeilinger "Linear model predictive safety certification for learning-based control" 2019
-https://arxiv.org/pdf/1803.08552.pdf
-
-The general idea is that any learning controller input can be certefied as safe, and if not safe, can be corrected
+The core idea is that any learning controller input can be either certificated as safe or, if not safe, corrected
 using an MPC controller based on Tube MPC.
 
+Based on
+    * K.P. Wabsersich and M.N. Zeilinger "Linear model predictive safety certification for learning-based control" 2019
+      https://arxiv.org/pdf/1803.08552.pdf
+
 """
+import numpy as np
+import casadi as cs
+import torch
 
 from copy import deepcopy
 from itertools import product
-import numpy as np
-import casadi as cs
 from munch import munchify
 from scipy.linalg import solve_discrete_are
-import torch
 
 from safe_control_gym.controllers.base_controller import BaseController
 from safe_control_gym.controllers.mpc.mpc_utils import get_cost_weight_matrix, discretize_linear_system
@@ -25,7 +25,10 @@
 
 
 class MPSC(BaseController):
-    """Model Predictive Safety Certification Class."""
+    """Model Predictive Safety Certification Class.
+
+    """
+
     def __init__(self,
                  env_func,
                  rl_controller=None,
@@ -37,8 +40,9 @@ def __init__(self,
                  warmstart: bool = True,
                  run_length: int = 200,
                  additional_constraints: list = None,
-                 **kwargs):
-        """Initialize the MPSC
+                 **kwargs
+                 ):
+        """Initialize the MPSC.
 
         Args:
             env_func (partial gym.Env): Environment for the task.
@@ -51,12 +55,11 @@ def __init__(self,
             additional_constraints (list): List of additional constraints to consider.
 
         """
-        # Setup the Environments
+        # Setup the Environments.
         self.env_func = env_func
         self.env = env_func(randomized_init=False)
         self.training_env = env_func(randomized_init=True)
-
-        # Setup attributes
+        # Setup attributes.
         self.model = self.env.symbolic
         self.dt = self.model.dt
         self.Q = get_cost_weight_matrix(q_lin, self.model.nx)
@@ -71,23 +74,23 @@ def __init__(self,
         self.tau = tau
         self.run_length = run_length
         self.rl_controller = rl_controller
-
         self.omega_AABB_verts = None
         self.z_prev = None
         self.v_prev = None
-
         if additional_constraints is None:
             additional_constraints = []
         self.constraints, self.state_constraints_sym, self.input_constraints_sym = self.reset_constraints(
                                                                                     self.env.constraints.constraints +
-                                                                                    additional_constraints
-                                                                                    )
+                                                                                    additional_constraints)
 
-    def reset_constraints(self, constraints):
+    def reset_constraints(self,
+                          constraints
+                          ):
         """ Setup the constraints list.
 
         Args:
             constraints (list): List of constraints controller is subject too.
+
         """
         constraints_list = ConstraintList(constraints)
         state_constraints_sym = constraints_list.get_state_constraint_symbolic_models()
@@ -97,14 +100,15 @@ def reset_constraints(self, constraints):
         return constraints_list, state_constraints_sym, input_constraints_sym
 
     def set_linear_dynamics(self):
-        """Compute the linear dynamics"""
-        # original version, used in shooting
+        """Compute the linear dynamics
+
+        """
+        # Original version, used in shooting.
         dfdxdfdu = self.model.df_func(x=self.X_LIN, u=self.U_LIN)
         dfdx = dfdxdfdu['dfdx'].toarray()
         dfdu = dfdxdfdu['dfdu'].toarray()
         delta_x = cs.MX.sym('delta_x', self.model.nx,1)
         delta_u = cs.MX.sym('delta_u', self.model.nu,1)
-
         x_dot_lin_vec = dfdx @ delta_x + dfdu @ delta_u
         linear_dynamics_func = cs.integrator(
             'linear_discrete_dynamics', self.model.integration_algo,
@@ -114,12 +118,13 @@ def set_linear_dynamics(self):
                 'ode': x_dot_lin_vec
             }, {'tf': self.dt}
         )
-
         discrete_dfdx, discrete_dfdu = discretize_linear_system(dfdx, dfdu, self.dt)
         return linear_dynamics_func, discrete_dfdx, discrete_dfdu
 
     def compute_lqr_gain(self):
-        """Compute LQR gain by solving the DARE."""
+        """Compute LQR gain by solving the DARE.
+
+        """
         P = solve_discrete_are(self.discrete_dfdx,
                                self.discrete_dfdu,
                                self.Q,
@@ -129,7 +134,8 @@ def compute_lqr_gain(self):
          self.discrete_dfdx))
 
     def learn(self,
-              env=None):
+              env=None
+              ):
         """Compute the Robust Positively Invariant (RPI) set that the MPSC used.
 
         Args:
@@ -138,144 +144,118 @@ def learn(self,
         """
         if env is None:
             env = self.training_env
-
-        # create set of error residuals
+        # Create set of error residuals.
         w = np.zeros((self.model.nx, self.n_samples))
         next_true_states = np.zeros((self.model.nx, self.n_samples))
         next_pred_states = np.zeros((self.model.nx, self.n_samples))
         actions = np.zeros((self.model.nu, self.n_samples))
-
         # Use uniform sampling of control inputs and states.
         for i in range(self.n_samples):
             init_state, info = env.reset()
-            u = env.action_space.sample() # will give a random action within action space
+            u = env.action_space.sample() # Will yield a random action within action space.
             actions[:,i] = u
             x_next_obs, _, _, _ = env.step(u)
             x_next_linear = self.linear_dynamics_func(x0=init_state - self.X_LIN, p=u - self.U_LIN)['xf'].toarray()
-
             next_true_states[:,i] = x_next_obs
             next_pred_states[:,i] = x_next_linear[:,0]
             w[:,i] = x_next_obs - x_next_linear[:,0]
-
         A_cl = self.discrete_dfdx + self.discrete_dfdu @ self.lqr_gain
-
         P = compute_RPI_set(A_cl, w, self.tau)
-
         self.learn_actions = actions
         self.learn_next_true_states = next_true_states
         self.learn_next_pred_states = next_pred_states
         self.w = w
         self.A_cl = A_cl
         self.omega_P = P
-
         self.omega_AABB_verts = ellipse_bounding_box(P)
         self.tighten_state_and_input_constraints()
         self.omega_constraint = QuadraticContstraint(self.env,
                                                      P,
                                                      1.0,
                                                      constraint_input_type=ConstraintInputType.STATE)
-
-        # Now that constraints are defined, setup the optimizer
+        # Now that constraints are defined, setup the optimizer.
         self.setup_optimizer()
 
     def tighten_state_and_input_constraints(self):
-        """Tigthen the state and input constraints based on the RPI."""
+        """Tigthen the state and input constraints based on the RPI.
+
+        """
         if self.omega_AABB_verts is not None:
             K_omega_AABB_verts_raw = (self.lqr_gain @ self.omega_AABB_verts.T).T
-            # take the outermost values
+            # Take the outermost values.
             self.K_omega_AABB_verts = np.vstack((np.max(K_omega_AABB_verts_raw), np.min(K_omega_AABB_verts_raw)))
-
-            # get the current input constraint vertices
+            # Get the current input constraint vertices.
             input_constraint = self.constraints.input_constraints
             if len(input_constraint) > 1:
                 raise NotImplementedError("MPSC currently can't handle more than 1 constraint")
             input_constraint = input_constraint[0]
             self.U_vertices = np.array([[input_constraint.high, input_constraint.low]]).T
-
-
             self.tightened_input_constraint_verts, tightened_input_constr_func\
                 = pontryagin_difference_AABB(
                 self.U_vertices,
                 self.K_omega_AABB_verts)
             self.tightened_input_constraint = tightened_input_constr_func(env=self.env,
-                                                                         constraint_input_type=ConstraintInputType.INPUT)
-            # get the state constraint vertices
+                                                                          constraint_input_type=ConstraintInputType.INPUT)
+            # Get the state constraint vertices.
             state_constraints = self.constraints.state_constraints
             if len(state_constraints) > 1:
                 raise NotImplementedError("MPSC currently can't handle more than 1 constraint")
             state_constraints = state_constraints[0]
             X_vertices_raw = [(state_constraints.high[i],state_constraints.low[i]) for i in range(self.model.nx)]
             self.X_vertices = np.clip(np.vstack(list(product(*X_vertices_raw))), -100, 100)
-
-            self.tightened_state_constraint_verts, tightened_state_constraint_func = pontryagin_difference_AABB(
-                self.X_vertices,
-                self.omega_AABB_verts)
+            self.tightened_state_constraint_verts, tightened_state_constraint_func = pontryagin_difference_AABB(self.X_vertices,
+                                                                                                                self.omega_AABB_verts)
             self.tightened_state_constraint = tightened_state_constraint_func(env=self.env,
                                                                               constraint_input_type=ConstraintInputType.STATE)
         else:
             raise ValueError("")
 
     def setup_optimizer(self):
-        """Setup the certifying MPC problem."""
-        # horizon is a parameter
+        """Setup the certifying MPC problem.
+
+        """
+        # Horizon parameter.
         horizon = self.horizon
         nx, nu = self.model.nx, self.model.nu
-
-        # define optimizer and variables
+        # Define optimizer and variables.
         opti = cs.Opti()
-        # states
+        # States.
         z_var = opti.variable(nx, horizon + 1)
-        # inputs
+        # Inputs.
         v_var = opti.variable(nu, horizon)
-        # Certified Input
+        # Certified input.
         u_tilde = opti.variable(nu, 1)
-        # Desired Input
+        # Desired input.
         u_L = opti.parameter(nu, 1)
-        # Current observed state
+        # Current observed state.
         x = opti.parameter(nx, 1)
-
-        # CONSTRAINTS
-        # Currently only handles a single constraint for state and input
+        # Constraints (currently only handles a single constraint for state and input).
         state_constraints = self.tightened_state_constraint.get_symbolic_model()
         input_constraints = self.tightened_input_constraint.get_symbolic_model()
         omega_constraint = self.omega_constraint.get_symbolic_model()
-
-        # constraints
         for i in range(self.horizon):
-            # dynamics constraints eqn 5.b
+            # Dynamics constraints (eqn 5.b).
             next_state = self.linear_dynamics_func(x0=z_var[:,i], p=v_var[:,i])['xf']
             opti.subject_to(z_var[:, i + 1] == next_state)
-
-            # Eqn 5.c
-            # input constraints
+            # Input constraints (eqn 5.c).
             opti.subject_to( input_constraints(v_var[:,i]) <= 0)
-
             # State Constraints
             opti.subject_to(state_constraints(z_var[:,i]) <= 0)
-
-        # final state constraints (5.d)
+        # Final state constraints (5.d).
         opti.subject_to(z_var[:, -1] == 0 )
-
-        # Initial state constraints (5.e)
+        # Initial state constraints (5.e).
         opti.subject_to(omega_constraint(x - z_var[:, 0]) <= 0)
-
-        # Real input (5.f)
+        # Real input (5.f).
         opti.subject_to(u_tilde == v_var[:,0] + self.lqr_gain @ (x - z_var[:,0]))
-
-        ## COST
-        # Note: Using 2norm or sqrt makes this infeasible
-        cost = (u_L - u_tilde).T @ (u_L - u_tilde)  # eqn 5.a
+        # Cost (# eqn 5.a, note: using 2norm or sqrt makes this infeasible).
+        cost = (u_L - u_tilde).T @ (u_L - u_tilde)  
         opti.minimize(cost)
-
-        # create solver (IPOPT solver for now)
+        # Create solver (IPOPT solver as of this version).
         opts = {"ipopt.print_level": 4,
                 "ipopt.sb": "yes",
                 "ipopt.max_iter": 50,
                 "print_time": 1}
         opti.solver('ipopt', opts)
-
-
-
         self.opti_dict = {
             "opti": opti,
             "z_var": z_var,
@@ -286,8 +266,13 @@ def setup_optimizer(self):
             "cost": cost
         }
 
-    def solve_optimization(self, obs, uncertified_input):
-        """Solve the MPC optimization problem for a given observation and uncertified input."""
+    def solve_optimization(self,
+                           obs,
+                           uncertified_input
+                           ):
+        """Solve the MPC optimization problem for a given observation and uncertified input.
+
+        """
         opti_dict = self.opti_dict
         opti = opti_dict["opti"]
         z_var = opti_dict["z_var"]
@@ -296,33 +281,29 @@ def solve_optimization(self, obs, uncertified_input):
         u_L = opti_dict["u_L"]
         x = opti_dict["x"]
         cost = opti_dict["cost"]
-
         opti.set_value(x, obs)
         opti.set_value(u_L, uncertified_input)
-
-        # initial guess for optim problem
+        # Initial guess for optimization problem.
         if (self.warmstart and
                 self.z_prev is not None and
                 self.v_prev is not None and
                 self.u_tilde_prev is not None):
-            # shift previous solutions by 1 step
+            # Shift previous solutions by 1 step.
             z_guess = deepcopy(self.x_prev)
             v_guess = deepcopy(self.u_prev)
             z_guess[:, :-1] = z_guess[:, 1:]
             v_guess[:-1] = v_guess[1:]
-
             opti.set_initial(z_var, z_guess)
             opti.set_initial(v_var, v_guess)
             opti.set_initial(u_tilde, deepcopy(self.u_tilde_prev))
-
-        # solve the optimization problem
+        # Solve the optimization problem.
         try:
             sol = opti.solve()
             x_val, u_val, u_tilde_val = sol.value(z_var), sol.value(v_var), sol.value(u_tilde)
             self.z_prev = x_val
             self.v_prev = u_val
             self.u_tilde_prev = u_tilde_val
-            # take the first one from solved action sequence
+            # Take the first one from solved action sequence.
             if u_val.ndim > 1:
                 action = u_tilde_val
             else:
@@ -332,11 +313,15 @@ def solve_optimization(self, obs, uncertified_input):
         except RuntimeError:
             feasible = False
             action = None
-
         return action, feasible
 
-    def certify_action(self, obs, u_L):
-        """Algorithm 1 from [1]."""
+    def certify_action(self,
+                       obs,
+                       u_L
+                       ):
+        """Algorithm 1 from Wabsersich 2019.
+
+        """
         action, feasible = self.solve_optimization(obs, u_L)
         self.results_dict['feasible'].append(feasible)
         if feasible:
@@ -356,7 +341,9 @@ def certify_action(self, obs, u_L):
                 action = self.lqr_gain @ obs
                 return action
 
-    def select_action(self, obs):
+    def select_action(self,
+                      obs
+                      ):
         """Selection feedback action.
 
         Args:
@@ -369,22 +356,21 @@ def select_action(self, obs):
         """
         if self.rl_controller is not None:
             with torch.no_grad():
-                u_L, v, logp = self.rl_controller.agent.ac.step(
-                    torch.FloatTensor(obs).to(self.rl_controller.device))
+                u_L, v, logp = self.rl_controller.agent.ac.step(torch.FloatTensor(obs).to(self.rl_controller.device))
         else:
             u_L = 2*np.sin(0.01*np.pi*self.time_step) + 0.5*np.sin(0.12*np.pi*self.time_step)
         self.results_dict['learning_actions'].append(u_L)
         action = self.certify_action(obs, u_L)
         action_diff = np.linalg.norm(u_L - action)
         self.results_dict['corrections'].append(action_diff)
-
         return action, u_L
 
     def run(self,
             env=None,
             uncertified_env=None,
             run_length=None,
-            **kwargs):
+            **kwargs
+            ):
         """Run the simulation.
 
         Args:
@@ -402,11 +388,9 @@ def run(self,
             run_length = self.run_length
         if uncertified_env is None:
             uncertified_env = self.env_func(randomized_init=False)
-
         self.setup_results_dict()
         obs, _ = env.reset()
         self.results_dict['obs'].append(obs)
-
         self.kinf = self.horizon - 1
         self.time_step = 0
         for i in range(run_length):
@@ -415,7 +399,6 @@ def run(self,
             self.results_dict['obs'].append(obs)
             self.results_dict['actions'].append(action)
             self.time_step += 1
-
         uncertified_obs, _ = uncertified_env.reset()
         self.results_dict['uncertified_obs'].append(uncertified_obs)
         for i in range(run_length):
@@ -427,13 +410,14 @@ def run(self,
             uncertified_obs, _, _, _ = uncertified_env.step(uncertified_action)
             self.results_dict['uncertified_actions'].append(uncertified_action)
             self.results_dict['uncertified_obs'].append(uncertified_obs)
-
         self.close_results_dict()
         uncertified_env.close()
         return self.results_dict
 
     def setup_results_dict(self):
-        """Setup the results dictionary to store run information."""
+        """Setup the results dictionary to store run information.
+
+        """
         self.results_dict = {}
         self.results_dict['obs'] = []
         self.results_dict['actions'] = []
@@ -446,7 +430,9 @@ def setup_results_dict(self):
         self.results_dict['kinf'] = []
 
     def close_results_dict(self):
-        """Cleanup the rtesults dict and munchify it."""
+        """Cleanup the rtesults dict and munchify it.
+
+        """
         self.results_dict['obs'] = np.vstack(self.results_dict['obs'])
         self.results_dict['uncertified_obs'] = np.vstack(self.results_dict['uncertified_obs'])
         self.results_dict['uncertified_actions'] = np.vstack(self.results_dict['uncertified_actions'])
@@ -457,12 +443,15 @@ def close_results_dict(self):
         self.results_dict = munchify(self.results_dict)
 
     def close(self):
-        """Cleans up resources."""
+        """Cleans up resources.
+
+        """
         self.env.close()
-        #self.logger.close()
 
     def reset(self):
-        """Prepares for training or evaluation."""
+        """Prepares for training or evaluation.
+
+        """
         # setup reference input
         if self.env.TASK == Task.STABILIZATION:
             self.mode = "stabilization"
diff --git a/safe_control_gym/controllers/mpsc/mpsc.yaml b/safe_control_gym/controllers/mpsc/mpsc.yaml
index b67af61be..11967bfcb 100644
--- a/safe_control_gym/controllers/mpsc/mpsc.yaml
+++ b/safe_control_gym/controllers/mpsc/mpsc.yaml
@@ -6,11 +6,11 @@ q_lin:
   - 1.
 warmstart: False
 
-# safe set calculation
+# Safe set calculation
 n_samples: 600
 
-# tau parameter for the calcuation of the RPI
+# Tau parameter for the calcuation of the RPI
 tau: 0.95
 
-# how long to run the mpsc for
+# How long to run the mpsc for
 run_length: 30
diff --git a/safe_control_gym/controllers/mpsc/mpsc_utils.py b/safe_control_gym/controllers/mpsc/mpsc_utils.py
index fc56039b1..6e341f95d 100644
--- a/safe_control_gym/controllers/mpsc/mpsc_utils.py
+++ b/safe_control_gym/controllers/mpsc/mpsc_utils.py
@@ -1,21 +1,29 @@
-"""Utility functions for Model Predictive Safety Certification
+"""Utility functions for Model Predictive Safety Certification.
 
 """
-from itertools import product
-from functools import partial
 import cvxpy as cp
 import numpy as np
 import pytope as pt
+
+from itertools import product
+from functools import partial
 from matplotlib.patches import Ellipse
 
 from safe_control_gym.envs.constraints import BoundedConstraint, LinearConstraint
 
-def compute_RPI_set(Acl, w, tau):
+
+def compute_RPI_set(Acl,
+                    w,
+                    tau
+                    ):
     """Compute a Robust positively invariant set (RPI).
 
     This follows the method in sec IV A from [1] to compute the robust positively invartiant set through a convex
     optimzation problem (LMI?) (equation 8a and 8b).
 
+    [1] K.P. Wabsersich and M.N. Zeilinger "Linear model predictive safety certification for learning-based control"
+        2019. https://arxiv.org/pdf/1803.08552.pdf
+
     Args:
         Acl (np.array): Closed loop gain matrix A+BK (nx by nx)
         w (np.array): Collection of dynamics error residuals with dim (nx by n_samples)
@@ -24,9 +32,6 @@ def compute_RPI_set(Acl, w, tau):
     Returns:
         P (np.array): P from eqn 8 that defines the ellipsoidal RPI set.
 
-    [1] K.P. Wabsersich and M.N. Zeilinger "Linear model predictive safety certification for learning-based control"
-        2019. https://arxiv.org/pdf/1803.08552.pdf
-
     """
     n_samples = w.shape[1]
     P = cp.Variable(Acl.shape, symmetric=True)
@@ -41,24 +46,22 @@ def compute_RPI_set(Acl, w, tau):
         con_22 = w_i.T @ P @ w_i + tau - 1
         constraints += [cp.bmat([[con_11, con_12],
                                  [con_21, con_22]]) << 0]
-
     prob = cp.Problem(cp.Minimize(-cp.log_det(P)), constraints)
     try:
         results =  prob.solve(solver='MOSEK', verbose=True)
     except cp.SolverError:
         print("[ERROR] RPI computation requires the MOSEK solver.")
         exit()
-
     return P.value
 
 def ellipse_bounding_box(P):
-    """Finds the bounding box of an ellipse defined by x^T P x <= 1
+    """Finds the bounding box of an ellipse defined by x^T P x <= 1.
 
     Args:
         P (np.array): n by n array defining the ellipse.
 
     Returns:
-        vertices (np.array): An vertical of the vertices (number of verts by dim of space)
+        vertices (np.array): An vertical of the vertices (number of verts by dim of space).
 
     """
     c = np.eye(P.shape[0])
@@ -66,9 +69,7 @@ def ellipse_bounding_box(P):
     for i in range(P.shape[0]):
         extremes.append((np.sqrt(c[:,i, None].T @ np.linalg.inv(P) @ c[:,i, None])[0,0],
                         -np.sqrt(c[:,i, None].T @ np.linalg.inv(P) @ c[:,i, None])[0,0]))
-
     vertices = list(product(*extremes))
-
     return np.vstack(vertices)
 
 def get_ellipse_eig_decomp(P):
@@ -87,15 +88,15 @@ def get_ellipse_eig_decomp(P):
     evals, evecs = np.linalg.eig(P)
     major_axis_ind = np.argmin(evals)
     minor_axis_ind = 0 if major_axis_ind == 1 else 1
-
     major_eval = evals[major_axis_ind]
     minor_eval = evals[minor_axis_ind]
     major_evec = evecs[:, major_axis_ind]
     minor_evec = evecs[:, minor_axis_ind]
-
     return minor_eval, major_eval, minor_evec, major_evec
 
-def get_ellipse_angle_rep(P, rads=True):
+def get_ellipse_angle_rep(P,
+                          rads=True
+                          ):
     """Gets the angle representation of the ellipse which is required for plotting.
 
     Args:
@@ -110,16 +111,18 @@ def get_ellipse_angle_rep(P, rads=True):
     """
     minor_eval, major_eval, minor_evec, major_evec = get_ellipse_eig_decomp(P)
     alpha = np.arctan2(major_evec[1], major_evec[0])
-
     major_axis_length = 1/np.sqrt(major_eval)
     minor_axis_length = 1/np.sqrt(minor_eval)
-
     if rads:
         return minor_axis_length, major_axis_length, alpha
     else:
         return minor_axis_length, major_axis_length, alpha*180/np.pi
 
-def add_2d_ellipse(position, cov, ax, legend=None):
+def add_2d_ellipse(position,
+                   cov,
+                   ax,
+                   legend=None
+                   ):
     """Add a 2D Ellipse patch to an axis.
 
     Args:
@@ -130,7 +133,6 @@ def add_2d_ellipse(position, cov, ax, legend=None):
 
     """
     minor_axis_length, major_axis_length, alpha = get_ellipse_angle_rep(cov, rads=False)
-
     if legend:
         ellipse = Ellipse(position,
                           2*major_axis_length,
@@ -144,16 +146,16 @@ def add_2d_ellipse(position, cov, ax, legend=None):
                           2*minor_axis_length,
                           angle=alpha,
                           alpha=0.5)
-
-
     ax.add_artist(ellipse)
 
-def pontryagin_difference_AABB(verts1, verts2):
-    """Computre verts1 (-) verts2
+def pontryagin_difference_AABB(verts1,
+                               verts2
+                               ):
+    """Computre verts1 (-) verts2.
 
     Args:
         verts1, verts2 (np.array): Array of vertices ( n vertices by space dimension) stacked vertically
-        so that every vertix is a horizonal array
+        so that every vertix is a horizonal array:
             vertsi = [v1,
                       v2,
                       ...,
@@ -164,7 +166,7 @@ def pontryagin_difference_AABB(verts1, verts2):
 
     """
     if verts1.shape[1] > 1:
-        # If greater than 2 dimensions
+        # If greater than 2 dimensions.
         poly1 = pt.Polytope(verts1)
         poly2 = pt.Polytope(verts2)
         diff = poly1 - poly2
@@ -174,7 +176,7 @@ def pontryagin_difference_AABB(verts1, verts2):
         const_func = partial(LinearConstraint, A=A, b=b)
         return verts, const_func
     else:
-        # If 1D data. Only handles closed compact sets
+        # If 1D data. Only handles closed compact sets.
         vert2_range = np.ptp(verts2)
         vert_min = np.min(verts1) + vert2_range/2
         vert_max = np.max(verts1) - vert2_range/2
diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py
index 48872091e..8e166f465 100644
--- a/safe_control_gym/controllers/ppo/ppo.py
+++ b/safe_control_gym/controllers/ppo/ppo.py
@@ -1,36 +1,22 @@
 """Proximal Policy Optimization (PPO)
 
-Adapted from https://github.com/openai/spinningup/blob/master/spinup/algos/pytorch/ppo/ppo.py
-Adapted hyperparameters from [here](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml) 
-
-Reference papers & code:
-    * [Proximal Policy Optimization Algorithms](https://arxiv.org/pdf/1707.06347.pdf)
-    * [Implementation Matters in Deep Policy Gradients: A Case Study on PPO and TRPO](https://arxiv.org/pdf/2005.12729.pdf)
-    * [pytorch-a2c-ppo-acktr-gail](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail)
-    * [openai spinning up - ppo](https://github.com/openai/spinningup/tree/master/spinup/algos/pytorch/ppo)
-    * [stable baselines3 - ppo](https://github.com/DLR-RM/stable-baselines3/tree/master/stable_baselines3/ppo)
-
-Example: 
-    train on cartpole balance::  
-        
-        $ python mains/main.py --algo ppo --task cartpole --overrides ppo_cartpole.yaml 
-
-    test on cartpole balance::
-
-        $ python mains/main.py --func test --restore {...}
-
-    train on quadrotor stabilization:: 
-        
-        $ 
-
-Todo:
-    *
+Based on:
+    * https://github.com/openai/spinningup/blob/master/spinup/algos/pytorch/ppo/ppo.py
+    * (hyperparameters) https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml
+
+Additional references:
+    * Proximal Policy Optimization Algorithms - https://arxiv.org/pdf/1707.06347.pdf
+    * Implementation Matters in Deep Policy Gradients: A Case Study on PPO and TRPO - https://arxiv.org/pdf/2005.12729.pdf
+    * pytorch-a2c-ppo-acktr-gail - https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail
+    * openai spinning up - ppo - https://github.com/openai/spinningup/tree/master/spinup/algos/pytorch/ppo
+    * stable baselines3 - ppo - https://github.com/DLR-RM/stable-baselines3/tree/master/stable_baselines3/ppo
     
 """
 import os
 import time
 import numpy as np
 import torch
+
 from collections import defaultdict
 
 from safe_control_gym.utils.logging import ExperimentLogger
@@ -43,24 +29,32 @@
 
 
 class PPO(BaseController):
-    """proximal policy optimization."""
-
-    def __init__(self, env_func, training=True, checkpoint_path="model_latest.pt", output_dir="temp", device="cpu", seed=0, **kwargs):
+    """Proximal policy optimization.
+
+    """
+
+    def __init__(self,
+                 env_func,
+                 training=True,
+                 checkpoint_path="model_latest.pt",
+                 output_dir="temp",
+                 device="cpu",
+                 seed=0,
+                 **kwargs
+                 ):
         super().__init__(env_func, training, checkpoint_path, output_dir, device, seed, **kwargs)
-
-        # task
+        # Task.
         if self.training:
-            # training (+ evaluation)
+            # Training and testing.
             self.env = make_vec_envs(env_func, None, self.rollout_batch_size, self.num_workers, seed)
             self.env = VecRecordEpisodeStatistics(self.env, self.deque_size)
             self.eval_env = env_func()
             self.eval_env = RecordEpisodeStatistics(self.eval_env, self.deque_size)
         else:
-            # testing only
+            # Testing only.
             self.env = env_func()
             self.env = RecordEpisodeStatistics(self.env)
-
-        # agent
+        # Agent.
         self.agent = PPOAgent(self.env.observation_space,
                               self.env.action_space,
                               hidden_dim=self.hidden_dim,
@@ -73,52 +67,55 @@ def __init__(self, env_func, training=True, checkpoint_path="model_latest.pt", o
                               opt_epochs=self.opt_epochs,
                               mini_batch_size=self.mini_batch_size)
         self.agent.to(device)
-
-        # pre-/post-processing
+        # Pre-/post-processing.
         self.obs_normalizer = BaseNormalizer()
         if self.norm_obs:
             self.obs_normalizer = MeanStdNormalizer(shape=self.env.observation_space.shape, clip=self.clip_obs, epsilon=1e-8)
-
         self.reward_normalizer = BaseNormalizer()
         if self.norm_reward:
             self.reward_normalizer = RewardStdNormalizer(gamma=self.gamma, clip=self.clip_reward, epsilon=1e-8)
-
-        # logging
+        # Logging.
         if self.training:
             log_file_out = True
             use_tensorboard = self.tensorboard
         else:
-            # disable logging to texts and tfboard for evaluation
+            # Disable logging to file and tfboard for evaluation.
             log_file_out = False
             use_tensorboard = False
         self.logger = ExperimentLogger(output_dir, log_file_out=log_file_out, use_tensorboard=use_tensorboard)
 
     def reset(self):
-        """Do initializations for training or evaluation."""
+        """Do initializations for training or evaluation.
+
+        """
         if self.training:
             # set up stats tracking
             self.env.add_tracker("constraint_violation", 0)
             self.eval_env.add_tracker("constraint_values", 0, mode="queue")
-
             self.total_steps = 0
             obs, _ = self.env.reset()
             self.obs = self.obs_normalizer(obs)
         else:
-            # add eposodic stats to be tracked
+            # Add episodic stats to be tracked.
             self.env.add_tracker("constraint_values", 0, mode="queue")
 
     def close(self):
-        """Shuts down and cleans up lingering resources."""
+        """Shuts down and cleans up lingering resources.
+
+        """
         self.env.close()
         if self.training:
             self.eval_env.close()
         self.logger.close()
 
-    def save(self, path):
-        """Saves model params and experiment state to checkpoint path."""
+    def save(self,
+             path
+             ):
+        """Saves model params and experiment state to checkpoint path.
+
+        """
         path_dir = os.path.dirname(path)
         os.makedirs(path_dir, exist_ok=True)
-
         state_dict = {
             "agent": self.agent.state_dict(),
             "obs_normalizer": self.obs_normalizer.state_dict(),
@@ -134,16 +131,18 @@ def save(self, path):
             state_dict.update(exp_state)
         torch.save(state_dict, path)
 
-    def load(self, path):
-        """Restores model and experiment given checkpoint path."""
-        state = torch.load(path)
+    def load(self,
+             path
+             ):
+        """Restores model and experiment given checkpoint path.
 
-        # restore policy
+        """
+        state = torch.load(path)
+        # Restore policy.
         self.agent.load_state_dict(state["agent"])
         self.obs_normalizer.load_state_dict(state["obs_normalizer"])
         self.reward_normalizer.load_state_dict(state["reward_normalizer"])
-
-        # restore experiment state
+        # Restore experiment state.
         if self.training:
             self.total_steps = state["total_steps"]
             self.obs = state["obs"]
@@ -151,22 +150,25 @@ def load(self, path):
             self.env.set_env_random_state(state["env_random_state"])
             self.logger.load(self.total_steps)
 
-    def learn(self, env=None, **kwargs):
-        """Performs learning (pre-training, training, fine-tuning, etc)."""
+    def learn(self,
+              env=None,
+              **kwargs
+              ):
+        """Performs learning (pre-training, training, fine-tuning, etc).
+
+        """
         while self.total_steps < self.max_env_steps:
             results = self.train_step()
-
-            # checkpoint
+            # Checkpoint.
             if self.total_steps >= self.max_env_steps or (self.save_interval and self.total_steps % self.save_interval == 0):
-                # latest/final checkpoint
+                # Latest/final checkpoint.
                 self.save(self.checkpoint_path)
                 self.logger.info("Checkpoint | {}".format(self.checkpoint_path))
             if self.num_checkpoints and self.total_steps % (self.max_env_steps // self.num_checkpoints) == 0:
-                # intermediate checkpoint
+                # Intermediate checkpoint.
                 path = os.path.join(self.output_dir, "checkpoints", "model_{}.pt".format(self.total_steps))
                 self.save(path)
-
-            # eval
+            # Evaluation.
             if self.eval_interval and self.total_steps % self.eval_interval == 0:
                 eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size)
                 results["eval"] = eval_results
@@ -174,19 +176,26 @@ def learn(self, env=None, **kwargs):
                                                                                                             eval_results["ep_lengths"].std(),
                                                                                                             eval_results["ep_returns"].mean(),
                                                                                                             eval_results["ep_returns"].std()))
-                # save best model
+                # Save best model.
                 eval_score = eval_results["ep_returns"].mean()
                 eval_best_score = getattr(self, "eval_best_score", -np.infty)
                 if self.eval_save_best and eval_best_score < eval_score:
                     self.eval_best_score = eval_score
                     self.save(os.path.join(self.output_dir, "model_best.pt"))
-
-            # logging
+            # Logging.
             if self.log_interval and self.total_steps % self.log_interval == 0:
                 self.log_step(results)
 
-    def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs):
-        """Runs evaluation with current policy."""
+    def run(self,
+            env=None,
+            render=False,
+            n_episodes=10,
+            verbose=False,
+            **kwargs
+            ):
+        """Runs evaluation with current policy.
+
+        """
         self.agent.eval()
         self.obs_normalizer.set_read_only()
         if env is None:
@@ -194,66 +203,59 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs):
         else:
             if not is_wrapped(env, RecordEpisodeStatistics):
                 env = RecordEpisodeStatistics(env, n_episodes)
-                # add eposodic stats to be tracked
+                # Add eposodic stats to be tracked.
                 env.add_tracker("constraint_values", 0, mode="queue")
-
         obs, info = env.reset()
         obs = self.obs_normalizer(obs)
         ep_returns, ep_lengths = [], []
         frames = []
-
         while len(ep_returns) < n_episodes:
             with torch.no_grad():
                 obs = torch.FloatTensor(obs).to(self.device)
                 action = self.agent.ac.act(obs)
-
             obs, reward, done, info = env.step(action)
             if render:
                 env.render()
                 frames.append(env.render("rgb_array"))
             if verbose:
                 print("obs {} | act {}".format(obs, action))
-
             if done:
                 assert "episode" in info
                 ep_returns.append(info["episode"]["r"])
                 ep_lengths.append(info["episode"]["l"])
                 obs, _ = env.reset()
             obs = self.obs_normalizer(obs)
-
-        # collect evaluation results
+        # Collect evaluation results.
         ep_lengths = np.asarray(ep_lengths)
         ep_returns = np.asarray(ep_returns)
         eval_results = {"ep_returns": ep_returns, "ep_lengths": ep_lengths}
         if len(frames) > 0:
             eval_results["frames"] = frames
-        # other episodic stats from evaluation env
+        # Other episodic stats from evaluation env.
         if len(env.queued_stats) > 0:
             queued_stats = {k: np.asarray(v) for k, v in env.queued_stats.items()}
             eval_results.update(queued_stats)
         return eval_results
 
     def train_step(self):
-        """Performs a training/fine-tuning step."""
+        """Performs a training/fine-tuning step.
+
+        """
         self.agent.train()
         self.obs_normalizer.unset_read_only()
         rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size)
         obs = self.obs
         start = time.time()
-
         for step in range(self.rollout_steps):
             with torch.no_grad():
                 act, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device))
             next_obs, rew, done, info = self.env.step(act)
-
             next_obs = self.obs_normalizer(next_obs)
             rew = self.reward_normalizer(rew, done)
             mask = 1 - done.astype(float)
-
-            # time truncation is not true termination
+            # Time truncation is not the same as true termination.
             terminal_v = np.zeros_like(v)
             for idx, inf in enumerate(info["n"]):
-                # if "TimeLimit.truncated" in inf and inf["TimeLimit.truncated"]:
                 if "terminal_info" not in inf:
                     continue
                 inff = inf["terminal_info"]
@@ -262,14 +264,11 @@ def train_step(self):
                     terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device)
                     terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().numpy()
                     terminal_v[idx] = terminal_val
-
             rollouts.push({"obs": obs, "act": act, "rew": rew, "mask": mask, "v": v, "logp": logp, "terminal_v": terminal_v})
             obs = next_obs
-
         self.obs = obs
         self.total_steps += self.rollout_batch_size * self.rollout_steps
-
-        # learn from rollout batch
+        # Learn from rollout batch.
         last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().numpy()
         ret, adv = compute_returns_and_advantages(rollouts.rew,
                                                   rollouts.v,
@@ -280,15 +279,18 @@ def train_step(self):
                                                   use_gae=self.use_gae,
                                                   gae_lambda=self.gae_lambda)
         rollouts.ret = ret
-        # prevent divide-by-0 for repetitive tasks
+        # Prevent divide-by-0 for repetitive tasks.
         rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6)
-
         results = self.agent.update(rollouts, self.device)
         results.update({"step": self.total_steps, "elapsed_time": time.time() - start})
         return results
 
-    def log_step(self, results):
-        """Does logging after a training step."""
+    def log_step(self,
+                 results
+                 ):
+        """Does logging after a training step.
+
+        """
         step = results["step"]
         # runner stats
         self.logger.add_scalars(
@@ -301,11 +303,9 @@ def log_step(self, results):
             prefix="time",
             write=False,
             write_tb=False)
-
-        # learning stats
+        # Learning stats.
         self.logger.add_scalars({k: results[k] for k in ["policy_loss", "value_loss", "entropy_loss", "approx_kl"]}, step, prefix="loss")
-
-        # performance stats
+        # Performance stats.
         ep_lengths = np.asarray(self.env.length_queue)
         ep_returns = np.asarray(self.env.return_queue)
         self.logger.add_scalars(
@@ -316,11 +316,9 @@ def log_step(self, results):
             },
             step,
             prefix="stat")
-
-        # total constraint violation during learning
+        # Total constraint violation during learning.
         total_violations = self.env.accumulated_stats["constraint_violation"]
         self.logger.add_scalars({"constraint_violation": total_violations}, step, prefix="stat")
-
         if "eval" in results:
             eval_ep_lengths = results["eval"]["ep_lengths"]
             eval_ep_returns = results["eval"]["ep_returns"]
@@ -332,21 +330,19 @@ def log_step(self, results):
                 },
                 step,
                 prefix="stat_eval")
-
-            # each episodic constraint
+            # Each episodic constraint.
             if "constraint_values" in results["eval"]:
                 con_dict = {}
                 con_vals = results["eval"]["constraint_values"]
                 if len(con_vals.shape) == 2:
-                    # multiple constraints
+                    # Multiple constraints.
                     for i in range(con_vals.shape[-1]):
                         con_name = "constraint_value_{}".format(i)
                         con_dict[con_name] = con_vals[:, i].mean()
                 else:
-                    # single constraint
+                    # Single constraint.
                     con_name = "constraint_value_0"
                     con_dict[con_name] = con_vals.mean()
                 self.logger.add_scalars(con_dict, step, prefix="stat_eval")
-
-        # print summary table
-        self.logger.dump_scalars()
\ No newline at end of file
+        # Print summary table
+        self.logger.dump_scalars()
diff --git a/safe_control_gym/controllers/ppo/ppo.yaml b/safe_control_gym/controllers/ppo/ppo.yaml
index 277170a3e..a5567a791 100644
--- a/safe_control_gym/controllers/ppo/ppo.yaml
+++ b/safe_control_gym/controllers/ppo/ppo.yaml
@@ -1,11 +1,11 @@
-# model args
+# Model args
 hidden_dim: 64
 norm_obs: False
 norm_reward: False
 clip_obs: 10.
 clip_reward: 10.
 
-# loss args
+# Loss args
 gamma:  0.99
 use_gae:  False
 gae_lambda: 0.95
@@ -14,14 +14,14 @@ clip_param: 0.2
 target_kl: 0.01
 entropy_coef: 0.01
 
-# optim args
+# Optim args
 opt_epochs: 10
 mini_batch_size: 64
 actor_lr: 0.0003
 critic_lr: 0.001
 max_grad_norm: 0.5
 
-# runner args
+# Runner args
 max_env_steps: 1000000
 num_workers: 1
 rollout_batch_size: 4
@@ -29,7 +29,7 @@ rollout_steps: 100
 deque_size: 10
 eval_batch_size: 10
 
-# misc
+# Misc
 log_interval: 0
 save_interval: 0
 num_checkpoints: 0
diff --git a/safe_control_gym/controllers/ppo/ppo_utils.py b/safe_control_gym/controllers/ppo/ppo_utils.py
index 086a8cfaa..baeb25d90 100644
--- a/safe_control_gym/controllers/ppo/ppo_utils.py
+++ b/safe_control_gym/controllers/ppo/ppo_utils.py
@@ -1,20 +1,22 @@
-from collections import defaultdict
-from copy import deepcopy
+"""PPO utilities.
+
+"""
 import numpy as np
 import torch
 import torch.nn as nn
+
 from gym.spaces import Box
+from collections import defaultdict
+from copy import deepcopy
 
 from safe_control_gym.math_and_models.neural_networks import MLP, CNN, RNN, init_
 from safe_control_gym.math_and_models.distributions import Normal, Categorical
 
 
-# -----------------------------------------------------------------------------------
-#                   Agent
-# -----------------------------------------------------------------------------------
-
 class PPOAgent:
-    """A PPO class that encapsulates model, optimizer and update functions."""
+    """A PPO class that encapsulates models, optimizers and update functions.
+
+    """
 
     def __init__(self,
                  obs_space,
@@ -28,83 +30,94 @@ def __init__(self,
                  critic_lr=0.001,
                  opt_epochs=10,
                  mini_batch_size=64,
-                 **kwargs):
-        # params
+                 **kwargs
+                 ):
+        # Parameters.
         self.obs_space = obs_space
         self.act_space = act_space
-
         self.use_clipped_value = use_clipped_value
         self.clip_param = clip_param
         self.target_kl = target_kl
         self.entropy_coef = entropy_coef
-
         self.opt_epochs = opt_epochs
         self.mini_batch_size = mini_batch_size
-
-        # model
+        # Model.
         self.ac = MLPActorCritic(obs_space,
                                  act_space,
                                  hidden_dims=[hidden_dim] * 2,
                                  activation="tanh")
-
-        # optimizers
+        # Optimizers.
         self.actor_opt = torch.optim.Adam(self.ac.actor.parameters(), actor_lr)
-        self.critic_opt = torch.optim.Adam(self.ac.critic.parameters(),
-                                           critic_lr)
+        self.critic_opt = torch.optim.Adam(self.ac.critic.parameters(), critic_lr)
 
-    def to(self, device):
-        """Puts agent to device."""
+    def to(self,
+           device
+           ):
+        """Puts agent to device.
+
+        """
         self.ac.to(device)
 
     def train(self):
-        """Sets training mode."""
+        """Sets training mode.
+
+        """
         self.ac.train()
 
     def eval(self):
-        """Sets evaluation mode."""
+        """Sets evaluation mode.
+
+        """
         self.ac.eval()
 
     def state_dict(self):
-        """Snapshots agent state."""
+        """Snapshots agent state.
+
+        """
         return {
             "ac": self.ac.state_dict(),
             "actor_opt": self.actor_opt.state_dict(),
             "critic_opt": self.critic_opt.state_dict()
         }
 
-    def load_state_dict(self, state_dict):
-        """Restores agent state."""
+    def load_state_dict(self,
+                        state_dict
+                        ):
+        """Restores agent state.
+
+        """
         self.ac.load_state_dict(state_dict["ac"])
         self.actor_opt.load_state_dict(state_dict["actor_opt"])
         self.critic_opt.load_state_dict(state_dict["critic_opt"])
 
-    def compute_policy_loss(self, batch):
-        """Returns policy loss(es) given batch of data."""
-        obs, act, logp_old, adv = batch["obs"], batch["act"], batch[
-            "logp"], batch["adv"]
-        dist, logp = self.ac.actor(obs, act)
+    def compute_policy_loss(self,
+                            batch
+                            ):
+        """Returns policy loss(es) given batch of data.
 
-        # policy
+        """
+        obs, act, logp_old, adv = batch["obs"], batch["act"], batch["logp"], batch["adv"]
+        dist, logp = self.ac.actor(obs, act)
+        # Policy.
         ratio = torch.exp(logp - logp_old)
-        clip_adv = torch.clamp(ratio, 1 - self.clip_param,
-                               1 + self.clip_param) * adv
+        clip_adv = torch.clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * adv
         policy_loss = -torch.min(ratio * adv, clip_adv).mean()
-
-        # entropy
+        # Entropy.
         entropy_loss = -dist.entropy().mean()
-
-        # kl/trust region
+        # KL/trust region.
         approx_kl = (logp_old - logp).mean()
         return policy_loss, entropy_loss, approx_kl
 
-    def compute_value_loss(self, batch):
-        """Returns value loss(es) given batch of data."""
+    def compute_value_loss(self,
+                           batch
+                           ):
+        """Returns value loss(es) given batch of data.
+
+        """
         obs, ret, v_old = batch["obs"], batch["ret"], batch["v"]
         v_cur = self.ac.critic(obs)
-
         if self.use_clipped_value:
-            v_old_clipped = v_old + (v_cur - v_old).clamp(
-                -self.clip_param, self.clip_param)
+            v_old_clipped = v_old + (v_cur - v_old).clamp(-self.clip_param, self.clip_param)
             v_loss = (v_cur - ret).pow(2)
             v_loss_clipped = (v_old_clipped - ret).pow(2)
             value_loss = 0.5 * torch.max(v_loss, v_loss_clipped).mean()
@@ -112,62 +125,60 @@ def compute_value_loss(self, batch):
             value_loss = 0.5 * (v_cur - ret).pow(2).mean()
         return value_loss
 
-    def update(self, rollouts, device="cpu"):
-        """Updates model parameters based on current training batch."""
+    def update(self,
+               rollouts,
+               device="cpu"
+               ):
+        """Updates model parameters based on current training batch.
+
+        """
         results = defaultdict(list)
         num_mini_batch = rollouts.max_length * rollouts.batch_size // self.mini_batch_size
-
         for i in range(self.opt_epochs):
             p_loss_epoch, v_loss_epoch, e_loss_epoch, kl_epoch = 0, 0, 0, 0
-
             for batch in rollouts.sampler(self.mini_batch_size, device):
-                # actor update
-                policy_loss, entropy_loss, approx_kl = self.compute_policy_loss(
-                    batch)
-                # update only when no kl constraint or constraint is satisfied
-                if (not self.target_kl > 0) or (self.target_kl > 0 and approx_kl
-                                                <= 1.5 * self.target_kl):
+                # Actor update.
+                policy_loss, entropy_loss, approx_kl = self.compute_policy_loss(batch)
+                # Update only when no KL constraint or constraint is satisfied.
+                if (not self.target_kl > 0) or (self.target_kl > 0 and approx_kl <= 1.5 * self.target_kl):
                     self.actor_opt.zero_grad()
                     (policy_loss + self.entropy_coef * entropy_loss).backward()
                     self.actor_opt.step()
-
-                # critic update
+                # Critic update.
                 value_loss = self.compute_value_loss(batch)
                 self.critic_opt.zero_grad()
                 value_loss.backward()
                 self.critic_opt.step()
-
                 p_loss_epoch += policy_loss.item()
                 v_loss_epoch += value_loss.item()
                 e_loss_epoch += entropy_loss.item()
                 kl_epoch += approx_kl.item()
-
             results["policy_loss"].append(p_loss_epoch / num_mini_batch)
             results["value_loss"].append(v_loss_epoch / num_mini_batch)
             results["entropy_loss"].append(e_loss_epoch / num_mini_batch)
             results["approx_kl"].append(kl_epoch / num_mini_batch)
-
         results = {k: sum(v) / len(v) for k, v in results.items()}
         return results
 
 
-# -----------------------------------------------------------------------------------
-#                   Models
-# -----------------------------------------------------------------------------------
-
-
 class MLPActor(nn.Module):
+    """Actor MLP model.
+
+    """
 
     def __init__(self,
                  obs_dim,
                  act_dim,
                  hidden_dims,
                  activation,
-                 discrete=False):
+                 discrete=False
+                 ):
+        """
+
+        """
         super().__init__()
         self.pi_net = MLP(obs_dim, act_dim, hidden_dims, activation)
-
-        # construct output action distribution
+        # Construct output action distribution.
         self.discrete = discrete
         if discrete:
             self.dist_fn = lambda x: Categorical(logits=x)
@@ -175,7 +186,13 @@ def __init__(self,
             self.logstd = nn.Parameter(-0.5 * torch.ones(act_dim))
             self.dist_fn = lambda x: Normal(x, self.logstd.exp())
 
-    def forward(self, obs, act=None):
+    def forward(self,
+                obs,
+                act=None
+                ):
+        """
+
+        """
         dist = self.dist_fn(self.pi_net(obs))
         logp_a = None
         if act is not None:
@@ -184,12 +201,27 @@ def forward(self, obs, act=None):
 
 
 class MLPCritic(nn.Module):
+    """Critic MLP model.
+
+    """
+
+    def __init__(self,
+                 obs_dim,
+                 hidden_dims,
+                 activation
+                 ):
+        """
 
-    def __init__(self, obs_dim, hidden_dims, activation):
+        """
         super().__init__()
         self.v_net = MLP(obs_dim, 1, hidden_dims, activation)
 
-    def forward(self, obs):
+    def forward(self,
+                obs
+                ):
+        """
+
+        """
         return self.v_net(obs)
 
 
@@ -197,17 +229,21 @@ class MLPActorCritic(nn.Module):
     """Model for the actor-critic agent.
 
     Attributes:
-        actor (MLPActor): policy network. 
-        critic (MLPCritic): value network.  
+        actor (MLPActor): policy network.
+        critic (MLPCritic): value network.
+
     """
 
     def __init__(self,
                  obs_space,
                  act_space,
                  hidden_dims=(64, 64),
-                 activation="tanh"):
-        super().__init__()
+                 activation="tanh"
+                 ):
+        """
 
+        """
+        super().__init__()
         obs_dim = obs_space.shape[0]
         if isinstance(act_space, Box):
             act_dim = act_space.shape[0]
@@ -215,31 +251,34 @@ def __init__(self,
         else:
             act_dim = act_space.n
             discrete = True
-
-        # policy
-        self.actor = MLPActor(obs_dim, act_dim, hidden_dims, activation,
-                              discrete)
-        # value function
+        # Policy.
+        self.actor = MLPActor(obs_dim, act_dim, hidden_dims, activation, discrete)
+        # Value function.
         self.critic = MLPCritic(obs_dim, hidden_dims, activation)
 
-    def step(self, obs):
+    def step(self,
+             obs
+             ):
+        """
+
+        """
         dist, _ = self.actor(obs)
         a = dist.sample()
         logp_a = dist.log_prob(a)
         v = self.critic(obs)
         return a.numpy(), v.numpy(), logp_a.numpy()
 
-    def act(self, obs):
+    def act(self,
+            obs
+            ):
+        """
+
+        """
         dist, _ = self.actor(obs)
         a = dist.mode()
         return a.cpu().numpy()
 
 
-# -----------------------------------------------------------------------------------
-#                   Storage
-# -----------------------------------------------------------------------------------
-
-
 class PPOBuffer(object):
     """Storage for a batch of episodes during training.
     
@@ -248,20 +287,24 @@ class PPOBuffer(object):
         batch_size (int): number of episodes per batch.
         scheme (dict): describs shape & other info of data to be stored.
         keys (list): names of all data from scheme.
+
     """
 
-    def __init__(self, obs_space, act_space, max_length, batch_size):
+    def __init__(self,
+                 obs_space,
+                 act_space,
+                 max_length,
+                 batch_size
+                 ):
         super().__init__()
         self.max_length = max_length
         self.batch_size = batch_size
-
         T, N = max_length, batch_size
         obs_dim = obs_space.shape
         if isinstance(act_space, Box):
             act_dim = act_space.shape[0]
         else:
             act_dim = act_space.n
-
         self.scheme = {
             "obs": {
                 "vshape": (T, N, *obs_dim)
@@ -296,30 +339,37 @@ def __init__(self, obs_space, act_space, max_length, batch_size):
         self.reset()
 
     def reset(self):
-        """Allocates space for containers."""
+        """Allocates space for containers.
+
+        """
         for k, info in self.scheme.items():
-            assert "vshape" in info, "Scheme must define vshape for {}".format(
-                k)
+            assert "vshape" in info, "Scheme must define vshape for {}".format(k)
             vshape = info["vshape"]
             dtype = info.get("dtype", np.float32)
             init = info.get("init", np.zeros)
             self.__dict__[k] = init(vshape, dtype=dtype)
-
         self.t = 0
 
-    def push(self, batch):
-        """Inserts transition step data (as dict) to storage."""
+    def push(self,
+             batch
+             ):
+        """Inserts transition step data (as dict) to storage.
+
+        """
         for k, v in batch.items():
             assert k in self.keys
             shape = self.scheme[k]["vshape"][1:]
             dtype = self.scheme[k].get("dtype", np.float32)
             v_ = np.asarray(deepcopy(v), dtype=dtype).reshape(shape)
             self.__dict__[k][self.t] = v_
-
         self.t = (self.t + 1) % self.max_length
 
-    def get(self, device="cpu"):
-        """Returns all data."""
+    def get(self,
+            device="cpu"
+            ):
+        """Returns all data.
+
+        """
         batch = {}
         for k, info in self.scheme.items():
             shape = info["vshape"][2:]
@@ -327,20 +377,28 @@ def get(self, device="cpu"):
             batch[k] = torch.as_tensor(data, device=device)
         return batch
 
-    def sample(self, indices):
-        """Returns partial data."""
+    def sample(self,
+               indices
+               ):
+        """Returns partial data.
+
+        """
         batch = {}
         for k, info in self.scheme.items():
             shape = info["vshape"][2:]
             batch[k] = self.__dict__[k].reshape(-1, *shape)[indices]
         return batch
 
-    def sampler(self, mini_batch_size, device="cpu", drop_last=True):
-        """Makes sampler to loop through all data."""
-        total_steps = self.max_length * self.batch_size
-        sampler = random_sample(np.arange(total_steps), mini_batch_size,
-                                drop_last)
+    def sampler(self,
+                mini_batch_size,
+                device="cpu",
+                drop_last=True
+                ):
+        """Makes sampler to loop through all data.
 
+        """
+        total_steps = self.max_length * self.batch_size
+        sampler = random_sample(np.arange(total_steps), mini_batch_size, drop_last)
         for indices in sampler:
             batch = self.sample(indices)
             batch = {
@@ -349,13 +407,13 @@ def sampler(self, mini_batch_size, device="cpu", drop_last=True):
             yield batch
 
 
-# -----------------------------------------------------------------------------------
-#                   Misc
-# -----------------------------------------------------------------------------------
-
+def random_sample(indices,
+                  batch_size,
+                  drop_last=True
+                  ):
+    """Returns index batches to iterate over.
 
-def random_sample(indices, batch_size, drop_last=True):
-    """Returns index batches to iterave over"""
+    """
     indices = np.asarray(np.random.permutation(indices))
     batches = indices[:len(indices) // batch_size * batch_size].reshape(
         -1, batch_size)
@@ -374,17 +432,18 @@ def compute_returns_and_advantages(rews,
                                    last_val=0,
                                    gamma=0.99,
                                    use_gae=False,
-                                   gae_lambda=0.95):
-    """Useful for policy-gradient algos."""
+                                   gae_lambda=0.95
+                                   ):
+    """Useful for policy-gradient algorithms.
+
+    """
     T, N = rews.shape[:2]
     rets, advs = np.zeros((T, N, 1)), np.zeros((T, N, 1))
     ret, adv = last_val, np.zeros((N, 1))
     vals = np.concatenate([vals, last_val[np.newaxis, ...]], 0)
-
-    # compensate for time truncation
+    # Compensate for time truncation.
     rews += gamma * terminal_vals
-
-    # cumulative discounted sums
+    # Cumulative discounted sums.
     for i in reversed(range(T)):
         ret = rews[i] + gamma * masks[i] * ret
         if not use_gae:
@@ -394,5 +453,4 @@ def compute_returns_and_advantages(rews,
             adv = adv * gae_lambda * gamma * masks[i] + td_error
         rets[i] = deepcopy(ret)
         advs[i] = deepcopy(adv)
-
     return rets, advs
diff --git a/safe_control_gym/controllers/safe_explorer/safe_explorer_utils.py b/safe_control_gym/controllers/safe_explorer/safe_explorer_utils.py
index 26d5f2356..8c480f9e2 100644
--- a/safe_control_gym/controllers/safe_explorer/safe_explorer_utils.py
+++ b/safe_control_gym/controllers/safe_explorer/safe_explorer_utils.py
@@ -1,23 +1,25 @@
-from gym.spaces import Box
-from collections import defaultdict
-from copy import deepcopy
+"""Utility function for a generic safe explorer.
+
+"""
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+from gym.spaces import Box
+from collections import defaultdict
+from copy import deepcopy
+
 from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env_utils import _flatten_obs
 from safe_control_gym.math_and_models.neural_networks import MLP, CNN, RNN, init_
 from safe_control_gym.math_and_models.distributions import Normal, Categorical
 from safe_control_gym.controllers.ppo.ppo_utils import random_sample
 
 
-# -----------------------------------------------------------------------------------
-#                   Safety Layer
-# -----------------------------------------------------------------------------------
-
 class SafetyLayer:
-    """Layer to learn constraint models and to impose action projection."""
+    """Layer to learn constraint models and to impose action projection.
+
+    """
 
     def __init__(self,
                  obs_space,
@@ -27,61 +29,76 @@ def __init__(self,
                  lr=0.001,
                  slack=None,
                  **kwargs):
-        # params
+        # Parameters.
         self.num_constraints = num_constraints
-
-        # seperate model per constraint
+        # Seperate model per constraint.
         input_dim = obs_space.shape[0]
         output_dim = act_space.shape[0]
         self.constraint_models = nn.ModuleList([
             MLP(input_dim, output_dim, hidden_dims=[hidden_dim])
             for _ in range(self.num_constraints)
         ])
-        # constraint slack variables/values
+        # Constraint slack variables/values.
         assert slack is not None and isinstance(slack, (int, float, list))
         if isinstance(slack, (int, float)):
             slack = [slack]
         self.slack = np.array(slack)
-
-        # optimizers
+        # Optimizers.
         self.optimizers = [
             torch.optim.Adam(model.parameters(), lr=lr)
             for model in self.constraint_models
         ]
 
-    def to(self, device):
-        """Puts agent to device."""
+    def to(self,
+           device
+           ):
+        """Puts agent to device.
+
+        """
         self.constraint_models.to(device)
 
     def train(self):
-        """Sets training mode."""
+        """Sets training mode.
+
+        """
         self.constraint_models.train()
 
     def eval(self):
-        """Sets evaluation mode."""
+        """Sets evaluation mode.
+
+        """
         self.constraint_models.eval()
 
     def state_dict(self):
-        """Snapshots agent state."""
+        """Snapshots agent state.
+
+        """
         return {
             "constraint_models": self.constraint_models.state_dict(),
             "optimizers": [opt.state_dict() for opt in self.optimizers]
         }
 
-    def load_state_dict(self, state_dict):
-        """Restores agent state."""
+    def load_state_dict(self,
+                        state_dict
+                        ):
+        """Restores agent state.
+
+        """
         self.constraint_models.load_state_dict(state_dict["constraint_models"])
         for i, opt_state_dict in enumerate(state_dict["optimizers"]):
             self.optimizers[i].load_state_dict(opt_state_dict)
 
-    def compute_loss(self, batch):
-        """Gets constraint value L2 loss for each constraint."""
+    def compute_loss(self,
+                     batch
+                     ):
+        """Gets constraint value L2 loss for each constraint.
+
+        """
         obs, act = batch["obs"], batch["act"]
         c, c_next = batch["c"], batch["c_next"]
 
         gs = [model(obs) for model in self.constraint_models]
-
-        # each is (N,1,A) x (N,A,1) -> (N,), so [(N,)]_{n_constriants}
+        # Each is (N,1,A) x (N,A,1) -> (N,), so [(N,)]_{n_constriants}
         c_next_pred = [
             c[:, i] + torch.bmm(g.view(g.shape[0], 1, -1),
                                 act.view(act.shape[0], -1, 1)).view(-1)
@@ -94,26 +111,30 @@ def compute_loss(self, batch):
         return losses
 
     def update(self, batch):
-        """Updates the constraint models from data batch."""
-        losses = self.compute_loss(batch)
+        """Updates the constraint models from data batch.
 
+        """
+        losses = self.compute_loss(batch)
         for loss, opt in zip(losses, self.optimizers):
             opt.zero_grad()
             loss.backward()
             opt.step()
-
-        resutls = {
+        results = {
             "constraint_{}_loss".format(i): loss.item()
             for i, loss in enumerate(losses)
         }
-        return resutls
+        return results
 
-    def get_safe_action(self, obs, act, c):
+    def get_safe_action(self,
+                        obs,
+                        act,
+                        c
+                        ):
         """Does action projection with the trained safety layer.
 
-        According to the paper, this simple projection works when only 1 constraint
-        is active at a time; for multiple active constriants, either consult in-graph
-        QP solver like in OptLayer or consider cvxpylayers (https://github.com/cvxgrp/cvxpylayers)
+        According to Dalal 2018, this simple projection works when only 1 constraint at a time
+        is active; for multiple active constriants, either resort to in-graph QP solver such as
+        OptLayer or see cvxpylayers (https://github.com/cvxgrp/cvxpylayers).
 
         Args:
             obs (torch.FloatTensor): observations, shape (B,O).
@@ -127,7 +148,6 @@ def get_safe_action(self, obs, act, c):
         self.eval()
         # [(B,A)]_C
         g = [model(obs) for model in self.constraint_models]
-
         # Find the lagrange multipliers [(B,)]_C
         multipliers = []
         for i in range(len(g)):
@@ -138,12 +158,11 @@ def get_safe_action(self, obs, act, c):
                               act.unsqueeze(2)).view(-1) + c_i + self.slack[i]
             denomin = torch.bmm(g_i.unsqueeze(1),
                                 g_i.unsqueeze(2)).view(-1) + 1e-8
-            # formula (5) from paper
+            # Equation (5) from Dalal 2018.
             mult = F.relu(numer / denomin)  # (B,)
             multipliers.append(mult)
         multipliers = torch.stack(multipliers, -1)  # (B,C)
-
-        # Calculate correction, formula (6) from paper
+        # Calculate correction, equation (6) from Dalal 2018.
         max_mult, max_idx = torch.topk(multipliers, 1, dim=-1)  # (B,1)
         max_idx = max_idx.view(-1).tolist()  # []_B
         # [(A,)]_B -> (B,A)
@@ -154,11 +173,6 @@ def get_safe_action(self, obs, act, c):
         return action_new
 
 
-# -----------------------------------------------------------------------------------
-#                   Storage
-# -----------------------------------------------------------------------------------
-
-
 class ConstraintBuffer(object):
     """Storage for replay buffer during training.
 
@@ -167,6 +181,7 @@ class ConstraintBuffer(object):
         batch_size (int): number of samples (steps) per batch.
         scheme (dict): describs shape & other info of data to be stored.
         keys (list): names of all data from scheme.
+
     """
 
     def __init__(self,
@@ -174,17 +189,16 @@ def __init__(self,
                  act_space,
                  num_constraints,
                  max_size,
-                 batch_size=None):
+                 batch_size=None
+                 ):
         super().__init__()
         self.max_size = max_size
         self.batch_size = batch_size
-
         obs_dim = obs_space.shape
         if isinstance(act_space, Box):
             act_dim = act_space.shape[0]
         else:
             act_dim = act_space.n
-
         N = max_size
         self.scheme = {
             "obs": {
@@ -204,7 +218,9 @@ def __init__(self,
         self.reset()
 
     def reset(self):
-        """Allocate space for containers."""
+        """Allocate space for containers.
+
+        """
         for k, info in self.scheme.items():
             assert "vshape" in info, "Scheme must define vshape for {}".format(
                 k)
@@ -212,16 +228,19 @@ def reset(self):
             dtype = info.get("dtype", np.float32)
             init = info.get("init", np.zeros)
             self.__dict__[k] = init(vshape, dtype=dtype)
-
         self.pos = 0
         self.buffer_size = 0
 
     def __len__(self):
-        """Returns current size of the buffer."""
+        """Returns current size of the buffer.
+
+        """
         return self.buffer_size
 
     def state_dict(self):
-        """Returns a snapshot of current buffer."""
+        """Returns a snapshot of current buffer.
+
+        """
         state = dict(
             pos=self.pos,
             buffer_size=self.buffer_size,
@@ -231,47 +250,61 @@ def state_dict(self):
             state[k] = v
         return state
 
-    def load_state_dict(self, state):
-        """Restores buffer from previous state."""
+    def load_state_dict(self,
+                        state
+                        ):
+        """Restores buffer from previous state.
+
+        """
         for k, v in state.items():
             self.__dict__[k] = v
 
-    def push(self, batch):
-        """Inserts transition step data (as dict) to storage."""
-        # batch size
+    def push(self,
+             batch
+             ):
+        """Inserts transition step data (as dict) to storage.
+
+        """
+        # Batch size.
         k = list(batch.keys())[0]
         n = batch[k].shape[0]
-
         for k, v in batch.items():
             shape = self.scheme[k]["vshape"][1:]
             dtype = self.scheme[k].get("dtype", np.float32)
             v_ = np.asarray(v, dtype=dtype).reshape((n,) + shape)
-
             if self.pos + n <= self.max_size:
                 self.__dict__[k][self.pos:self.pos + n] = v_
             else:
-                # wrap around
+                # Wrap.
                 remain_n = self.pos + n - self.max_size
                 self.__dict__[k][self.pos:self.max_size] = v_[:-remain_n]
                 self.__dict__[k][:remain_n] = v_[-remain_n:]
-
         self.pos = (self.pos + n) % self.max_size
         if self.buffer_size < self.max_size:
             self.buffer_size = min(self.max_size, self.pos + n)
 
-    def sample(self, indices):
-        """Returns partial data."""
+    def sample(self,
+               indices
+               ):
+        """Returns partial data.
+
+        """
         batch = {}
         for k, info in self.scheme.items():
             shape = info["vshape"][1:]
             batch[k] = self.__dict__[k].reshape(-1, *shape)[indices]
         return batch
 
-    def sampler(self, batch_size, device="cpu", drop_last=True):
-        """Makes sampler to loop through all data."""
+    def sampler(self,
+                batch_size,
+                device="cpu",
+                drop_last=True
+                ):
+        """Makes sampler to loop through all data.
+
+        """
         total_steps = len(self)
         sampler = random_sample(np.arange(total_steps), batch_size, drop_last)
-
         for indices in sampler:
             batch = self.sample(indices)
             batch = {
diff --git a/safe_control_gym/controllers/safe_explorer/safe_ppo.py b/safe_control_gym/controllers/safe_explorer/safe_ppo.py
index 3d728bf04..011644958 100644
--- a/safe_control_gym/controllers/safe_explorer/safe_ppo.py
+++ b/safe_control_gym/controllers/safe_explorer/safe_ppo.py
@@ -1,9 +1,11 @@
-"""[summary]
+"""PPO-based safe explorer.
+
 """
 import os
 import time
 import numpy as np
 import torch
+
 from collections import defaultdict
 
 from safe_control_gym.utils.logging import ExperimentLogger
@@ -12,7 +14,6 @@
 from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env_utils import _flatten_obs
 from safe_control_gym.envs.env_wrappers.record_episode_statistics import RecordEpisodeStatistics, VecRecordEpisodeStatistics
 from safe_control_gym.math_and_models.normalization import BaseNormalizer, MeanStdNormalizer, RewardStdNormalizer
-
 from safe_control_gym.controllers.base_controller import BaseController
 from safe_control_gym.controllers.ppo.ppo_utils import compute_returns_and_advantages
 from safe_control_gym.controllers.safe_explorer.safe_explorer_utils import SafetyLayer, ConstraintBuffer
@@ -20,26 +21,34 @@
 
 
 class SafeExplorerPPO(BaseController):
-    """Safety layer for constraint satisfaction for RL."""
-
-    def __init__(self, env_func, training=True, checkpoint_path="model_latest.pt", output_dir="temp", device="cpu", seed=0, **kwargs):
+    """Safety layer for constraint satisfaction for RL.
+
+    """
+
+    def __init__(self,
+                 env_func,
+                 training=True,
+                 checkpoint_path="model_latest.pt",
+                 output_dir="temp",
+                 device="cpu",
+                 seed=0,
+                 **kwargs
+                 ):
         super().__init__(env_func, training, checkpoint_path, output_dir, device, seed, **kwargs)
-
-        # task
+        # Task.
         if self.training:
-            # training (+ evaluation)
+            # Training and testing.
             self.env = make_vec_envs(env_func, None, self.rollout_batch_size, self.num_workers, seed)
             self.env = VecRecordEpisodeStatistics(self.env, self.deque_size)
             self.eval_env = env_func()
             self.eval_env = RecordEpisodeStatistics(self.eval_env, self.deque_size)
             self.num_constraints = self.env.envs[0].num_constraints
         else:
-            # testing only
+            # Testing only.
             self.env = env_func()
             self.env = RecordEpisodeStatistics(self.env)
             self.num_constraints = self.env.num_constraints
-
-        # safety layer
+        # Safety layer.
         self.safety_layer = SafetyLayer(self.env.observation_space,
                                         self.env.action_space,
                                         hidden_dim=self.constraint_hidden_dim,
@@ -47,8 +56,7 @@ def __init__(self, env_func, training=True, checkpoint_path="model_latest.pt", o
                                         lr=self.constraint_lr,
                                         slack=self.constraint_slack)
         self.safety_layer.to(device)
-
-        # agent
+        # Agent.
         self.agent = SafePPOAgent(
             self.env.observation_space,
             self.env.action_space,
@@ -64,66 +72,67 @@ def __init__(self, env_func, training=True, checkpoint_path="model_latest.pt", o
             action_modifier=self.safety_layer.get_safe_action,
         )
         self.agent.to(device)
-
-        # pre-/post-processing
+        # Pre-/post-processing.
         self.obs_normalizer = BaseNormalizer()
         if self.norm_obs:
             self.obs_normalizer = MeanStdNormalizer(shape=self.env.observation_space.shape, clip=self.clip_obs, epsilon=1e-8)
-
         self.reward_normalizer = BaseNormalizer()
         if self.norm_reward:
             self.reward_normalizer = RewardStdNormalizer(gamma=self.gamma, clip=self.clip_reward, epsilon=1e-8)
-
-        # logging
+        # Logging.
         if self.training:
             log_file_out = True
             use_tensorboard = self.tensorboard
         else:
-            # disable logging to texts and tfboard for evaluation
+            # Disable logging to texts and tfboard for evaluation.
             log_file_out = False
             use_tensorboard = False
         self.logger = ExperimentLogger(output_dir, log_file_out=log_file_out, use_tensorboard=use_tensorboard)
 
     def reset(self):
-        """Do initializations for training or evaluation."""
+        """Do initializations for training or evaluation.
+
+        """
         if self.training:
             if self.pretraining:
                 self.constraint_buffer = ConstraintBuffer(self.env.observation_space, self.env.action_space, self.num_constraints, self.constraint_buffer_size)
             else:
-                # load safety layer for 2nd stage training
+                # Load safety layer for 2nd stage training.
                 assert self.pretrained, "Must provide a pre-trained model for adaptation."
                 if os.path.isdir(self.pretrained):
                     self.pretrained = os.path.join(self.pretrained, "model_latest.pt")
                 state = torch.load(self.pretrained)
                 self.safety_layer.load_state_dict(state["safety_layer"])
-
-                # set up stats tracking
+                # Set up stats tracking.
                 self.env.add_tracker("constraint_violation", 0)
-
             self.total_steps = 0
             obs, info = self.env.reset()
             self.obs = self.obs_normalizer(obs)
             self.c = np.array([inf["constraint_values"] for inf in info["n"]])
 
     def close(self):
-        """Shuts down and cleans up lingering resources."""
+        """Shuts down and cleans up lingering resources.
+
+        """
         self.env.close()
         if self.training:
             self.eval_env.close()
         self.logger.close()
 
-    def save(self, path):
-        """Saves model params and experiment state to checkpoint path."""
+    def save(self,
+             path
+             ):
+        """Saves model params and experiment state to checkpoint path.
+
+        """
         path_dir = os.path.dirname(path)
         os.makedirs(path_dir, exist_ok=True)
-
         state_dict = {
             "agent": self.agent.state_dict(),
             "safety_layer": self.safety_layer.state_dict(),
             "obs_normalizer": self.obs_normalizer.state_dict(),
             "reward_normalizer": self.reward_normalizer.state_dict(),
         }
-
         if self.training:
             exp_state = {
                 "total_steps": self.total_steps,
@@ -137,17 +146,19 @@ def save(self, path):
                 state_dict["constraint_buffer"] = self.constraint_buffer.state_dict()
         torch.save(state_dict, path)
 
-    def load(self, path):
-        """Restores model and experiment given checkpoint path."""
-        state = torch.load(path)
+    def load(self,
+             path
+             ):
+        """Restores model and experiment given checkpoint path.
 
-        # restore policy
+        """
+        state = torch.load(path)
+        # Restore policy.
         self.agent.load_state_dict(state["agent"])
         self.safety_layer.load_state_dict(state["safety_layer"])
         self.obs_normalizer.load_state_dict(state["obs_normalizer"])
         self.reward_normalizer.load_state_dict(state["reward_normalizer"])
-
-        # restore experiment state
+        # Restore experiment state.
         if self.training:
             self.total_steps = state["total_steps"]
             self.obs = state["obs"]
@@ -158,34 +169,35 @@ def load(self, path):
             if self.pretraining:
                 self.constraint_buffer.load_state_dict(state["constraint_buffer"])
 
-    def learn(self, env=None, **kwargs):
-        """Performs learning (pre-training, training, fine-tuning, etc)."""
+    def learn(self,
+              env=None,
+              **kwargs
+              ):
+        """Performs learning (pre-training, training, fine-tuning, etc).
+
+        """
         if self.pretraining:
             final_step = self.constraint_epochs
             train_func = self.pretrain_step
         else:
             final_step = self.max_env_steps
             train_func = self.train_step
-
         while self.total_steps < final_step:
             results = train_func()
-
-            # checkpoint
+            # Checkpoint.
             if self.total_steps >= final_step or (self.save_interval and self.total_steps % self.save_interval == 0):
-                # latest/final checkpoint
+                # Latest/final checkpoint.
                 self.save(self.checkpoint_path)
                 self.logger.info("Checkpoint | {}".format(self.checkpoint_path))
             if self.num_checkpoints and self.total_steps % (final_step // self.num_checkpoints) == 0:
-                # intermediate checkpoint
+                # Intermediate checkpoint.
                 path = os.path.join(self.output_dir, "checkpoints", "model_{}.pt".format(self.total_steps))
                 self.save(path)
-
-            # eval
+            # Evaluation.
             if self.eval_interval and self.total_steps % self.eval_interval == 0:
                 if self.pretraining:
                     eval_results = self.eval_constraint_models()
                     results["eval"] = eval_results
-                    # TODO: no single criterion to determine best eval model
                 else:
                     eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size)
                     results["eval"] = eval_results
@@ -193,19 +205,26 @@ def learn(self, env=None, **kwargs):
                                                                                                                 eval_results["ep_lengths"].std(),
                                                                                                                 eval_results["ep_returns"].mean(),
                                                                                                                 eval_results["ep_returns"].std()))
-                    # save best model
+                    # Save the best model.
                     eval_score = eval_results["ep_returns"].mean()
                     eval_best_score = getattr(self, "eval_best_score", -np.infty)
                     if self.eval_save_best and eval_best_score < eval_score:
                         self.eval_best_score = eval_score
                         self.save(os.path.join(self.output_dir, "model_best.pt"))
-
-            # logging
+            # Logging.
             if self.log_interval and self.total_steps % self.log_interval == 0:
                 self.log_step(results)
 
-    def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs):
-        """Runs evaluation with current policy."""
+    def run(self,
+            env=None,
+            render=False,
+            n_episodes=10,
+            verbose=False,
+            **kwargs
+            ):
+        """Runs evaluation with current policy.
+
+        """
         self.agent.eval()
         self.obs_normalizer.set_read_only()
         if env is None:
@@ -213,26 +232,22 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs):
         else:
             if not is_wrapped(env, RecordEpisodeStatistics):
                 env = RecordEpisodeStatistics(env)
-
         obs, info = env.reset()
         obs = self.obs_normalizer(obs)
         c = info["constraint_values"]
         ep_returns, ep_lengths = [], []
         frames = []
-
         while len(ep_returns) < n_episodes:
             with torch.no_grad():
                 obs = torch.FloatTensor(obs).to(self.device)
                 c = torch.FloatTensor(c).to(self.device)
                 action = self.agent.ac.act(obs, c=c)
-
             obs, reward, done, info = env.step(action)
             if render:
                 env.render()
                 frames.append(env.render("rgb_array"))
             if verbose:
                 print("obs {} | act {}".format(obs, action))
-
             if done:
                 assert "episode" in info
                 ep_returns.append(info["episode"]["r"])
@@ -240,8 +255,7 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs):
                 obs, info = env.reset()
             obs = self.obs_normalizer(obs)
             c = info["constraint_values"]
-
-        # collect evaluation results
+        # Collect evaluation results.
         ep_lengths = np.asarray(ep_lengths)
         ep_returns = np.asarray(ep_returns)
         eval_results = {"ep_returns": ep_returns, "ep_lengths": ep_lengths}
@@ -250,50 +264,50 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs):
         return eval_results
 
     def pretrain_step(self):
-        """Performs a pre-trianing step."""
+        """Performs a pre-trianing step.
+
+        """
         results = defaultdict(list)
         start = time.time()
         self.safety_layer.train()
         self.obs_normalizer.unset_read_only()
-
-        # Just sample episodes for the whole epoch
+        # Just sample episodes for the whole epoch.
         self.collect_constraint_data(self.constraint_steps_per_epoch)
         self.total_steps += 1
-
-        # Do the update from memory
+        # Do the update from memory.
         for batch in self.constraint_buffer.sampler(self.constraint_batch_size):
             res = self.safety_layer.update(batch)
             for k, v in res.items():
                 results[k].append(v)
-
-        self.constraint_buffer.reset()  # TODO: check perf. w/ & wo/ it.
-
+        self.constraint_buffer.reset()
         results = {k: sum(v) / len(v) for k, v in results.items()}
         results.update({"step": self.total_steps, "elapsed_time": time.time() - start})
         return results
 
     def train_step(self):
-        """Performs a training/fine-tuning step."""
+        """Performs a training/fine-tuning step.
+
+        """
         self.agent.train()
         self.obs_normalizer.unset_read_only()
-        rollouts = SafePPOBuffer(self.env.observation_space, self.env.action_space, self.num_constraints, self.rollout_steps, self.rollout_batch_size)
+        rollouts = SafePPOBuffer(self.env.observation_space,
+                                 self.env.action_space,
+                                 self.num_constraints,
+                                 self.rollout_steps,
+                                 self.rollout_batch_size)
         obs = self.obs
         c = self.c
         start = time.time()
-
         for step in range(self.rollout_steps):
             with torch.no_grad():
                 act, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device), c=torch.FloatTensor(c).to(self.device))
             next_obs, rew, done, info = self.env.step(act)
-
             next_obs = self.obs_normalizer(next_obs)
             rew = self.reward_normalizer(rew, done)
             mask = 1 - done.astype(float)
-
-            # time truncation is not true termination
+            # Time truncation is not the same as the true termination.
             terminal_v = np.zeros_like(v)
             for idx, inf in enumerate(info["n"]):
-                # if "TimeLimit.truncated" in inf and inf["TimeLimit.truncated"]:
                 if "terminal_info" not in inf:
                     continue
                 inff = inf["terminal_info"]
@@ -302,7 +316,6 @@ def train_step(self):
                     terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device)
                     terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().numpy()
                     terminal_v[idx] = terminal_val
-
             rollouts.push({
                 "obs": obs,
                 "act": act,
@@ -315,12 +328,10 @@ def train_step(self):
             })
             obs = next_obs
             c = np.array([inf["constraint_values"] for inf in info["n"]])
-
         self.obs = obs
         self.c = c
         self.total_steps += self.rollout_batch_size * self.rollout_steps
-
-        # learn from rollout batch
+        # Learn from rollout batch.
         last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().numpy()
         ret, adv = compute_returns_and_advantages(rollouts.rew,
                                                   rollouts.v,
@@ -331,84 +342,82 @@ def train_step(self):
                                                   use_gae=self.use_gae,
                                                   gae_lambda=self.gae_lambda)
         rollouts.ret = ret
-        # prevent divide-by-0 for repetitive tasks
+        # Prevent divide-by-0 for repetitive tasks.
         rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6)
-
         results = self.agent.update(rollouts, self.device)
         results.update({"step": self.total_steps, "elapsed_time": time.time() - start})
         return results
 
-    def log_step(self, results):
-        """Does logging after a training step."""
+    def log_step(self,
+                 results
+                 ):
+        """Does logging after a training step.
+
+        """
         step = results["step"]
         final_step = self.constraint_epochs if self.pretraining else self.max_env_steps
-        # runner stats
+        # Runner stats.
         self.logger.add_scalars({
-            "step": step,
-            "time": results["elapsed_time"],
-            "progress": step / final_step
-        },
+                                    "step": step,
+                                    "time": results["elapsed_time"],
+                                    "progress": step / final_step
+                                },
                                 step,
                                 prefix="time",
                                 write=False,
                                 write_tb=False)
-
         if self.pretraining:
-            # constraint learning stats
+            # Constraint learning stats.
             for i in range(self.safety_layer.num_constraints):
                 name = "constraint_{}_loss".format(i)
                 self.logger.add_scalars({name: results[name]}, step, prefix="constraint_loss")
                 if "eval" in results:
                     self.logger.add_scalars({name: results["eval"][name]}, step, prefix="constraint_loss_eval")
         else:
-            # learning stats
+            # Learning stats.
             self.logger.add_scalars({k: results[k] for k in ["policy_loss", "value_loss", "entropy_loss", "approx_kl"]}, step, prefix="loss")
-
-            # performance stats
+            # Performance stats.
             ep_lengths = np.asarray(self.env.length_queue)
             ep_returns = np.asarray(self.env.return_queue)
             self.logger.add_scalars({
-                "ep_length": ep_lengths.mean(),
-                "ep_return": ep_returns.mean(),
-                "ep_reward": (ep_returns / ep_lengths).mean()
-            },
+                                        "ep_length": ep_lengths.mean(),
+                                        "ep_return": ep_returns.mean(),
+                                        "ep_reward": (ep_returns / ep_lengths).mean()
+                                    },
                                     step,
                                     prefix="stat")
-
-            # total constraint violation during learning
+            # Total constraint violation during learning.
             total_violations = self.env.accumulated_stats["constraint_violation"]
             self.logger.add_scalars({"constraint_violation": total_violations}, step, prefix="stat")
-
             if "eval" in results:
                 eval_ep_lengths = results["eval"]["ep_lengths"]
                 eval_ep_returns = results["eval"]["ep_returns"]
-                self.logger.add_scalars(
-                    {
-                        "ep_length": eval_ep_lengths.mean(),
-                        "ep_return": eval_ep_returns.mean(),
-                        "ep_reward": (eval_ep_returns / eval_ep_lengths).mean()
-                    },
-                    step,
-                    prefix="stat_eval")
-
-        # print summary table
+                self.logger.add_scalars({
+                                            "ep_length": eval_ep_lengths.mean(),
+                                            "ep_return": eval_ep_returns.mean(),
+                                            "ep_reward": (eval_ep_returns / eval_ep_lengths).mean()
+                                        },
+                                        step,
+                                        prefix="stat_eval")
+
+        # Print summary table.
         self.logger.dump_scalars()
 
-    def collect_constraint_data(self, num_steps):
-        """Uses random policy to collect data for pre-training constriant models."""
+    def collect_constraint_data(self,
+                                num_steps
+                                ):
+        """Uses random policy to collect data for pre-training constriant models.
+
+        """
         step = 0
         obs, info = self.env.reset()
         obs = self.obs_normalizer(obs)
         c = np.array([inf["constraint_values"] for inf in info["n"]])
-
         while step < num_steps:
-            # action = self.env.action_space.sample()
             action_spaces = self.env.get_attr("action_space")
             action = np.array([space.sample() for space in action_spaces])
-
             obs_next, _, done, info = self.env.step(action)
             obs_next = self.obs_normalizer(obs_next)
-
             c_next = []
             for i, d in enumerate(done):
                 if d:
@@ -417,27 +426,24 @@ def collect_constraint_data(self, num_steps):
                     c_next_i = info["n"][i]["constraint_values"]
                 c_next.append(c_next_i)
             c_next = np.array(c_next)
-
             self.constraint_buffer.push({"act": action, "obs": obs, "c": c, "c_next": c_next})
-
             obs = obs_next
             c = np.array([inf["constraint_values"] for inf in info["n"]])
             step += self.rollout_batch_size
 
     def eval_constraint_models(self):
-        """Runs evaluation for the constraint models."""
+        """Runs evaluation for the constraint models.
+
+        """
         eval_resutls = defaultdict(list)
         self.safety_layer.eval()
         self.obs_normalizer.set_read_only()
-
-        # collect evaluation data
+        # Collect evaluation data.
         self.collect_constraint_data(self.constraint_eval_steps)
-
         for batch in self.constraint_buffer.sampler(self.constraint_batch_size):
             losses = self.safety_layer.compute_loss(batch)
             for i, loss in enumerate(losses):
                 eval_resutls["constraint_{}_loss".format(i)].append(loss.item())
-
-        self.constraint_buffer.reset()  # TODO: use a different buffer for evaluation
+        self.constraint_buffer.reset()
         eval_resutls = {k: sum(v) / len(v) for k, v in eval_resutls.items()}
         return eval_resutls
diff --git a/safe_control_gym/controllers/safe_explorer/safe_ppo.yaml b/safe_control_gym/controllers/safe_explorer/safe_ppo.yaml
index 56ab772d5..c82b355b2 100644
--- a/safe_control_gym/controllers/safe_explorer/safe_ppo.yaml
+++ b/safe_control_gym/controllers/safe_explorer/safe_ppo.yaml
@@ -1,11 +1,11 @@
-# model args
+# Model args
 hidden_dim: 64
 norm_obs: False
 norm_reward: False
 clip_obs: 10.
 clip_reward: 10.
 
-# safety layer args
+# Safety layer args
 pretraining: True 
 pretrained: null
 constraint_hidden_dim: 10
@@ -18,7 +18,7 @@ constraint_eval_interval: 5
 constraint_buffer_size: 1000000
 constraint_slack: null
 
-# loss args
+# Loss args
 gamma:  0.99
 use_gae:  False
 gae_lambda: 0.95
@@ -27,14 +27,14 @@ clip_param: 0.2
 target_kl: 0.01
 entropy_coef: 0.01
 
-# optim args
+# Optim args
 opt_epochs: 10
 mini_batch_size: 64
 actor_lr: 0.0003
 critic_lr: 0.001
 max_grad_norm: 0.5
 
-# runner args
+# Runner args
 max_env_steps: 1000000
 num_workers: 1
 rollout_batch_size: 4
@@ -42,7 +42,7 @@ rollout_steps: 100
 deque_size: 10
 eval_batch_size: 10
 
-# misc
+# Misc
 log_interval: 0
 save_interval: 0
 num_checkpoints: 0
diff --git a/safe_control_gym/controllers/safe_explorer/safe_ppo_utils.py b/safe_control_gym/controllers/safe_explorer/safe_ppo_utils.py
index fb0fcb10b..79bf561c7 100644
--- a/safe_control_gym/controllers/safe_explorer/safe_ppo_utils.py
+++ b/safe_control_gym/controllers/safe_explorer/safe_ppo_utils.py
@@ -1,20 +1,21 @@
-from gym.spaces import Box
+"""Utility function for the PPO-based safe explorer.
+
+"""
 import numpy as np
 import torch
 import torch.nn as nn
 
+from gym.spaces import Box
+
 from safe_control_gym.math_and_models.neural_networks import MLP, CNN, RNN, init_
 from safe_control_gym.math_and_models.distributions import Normal, Categorical
-
 import safe_control_gym.controllers.ppo.ppo_utils as ppo_utils
 
-# -----------------------------------------------------------------------------------
-#                   Agent
-# -----------------------------------------------------------------------------------
-
 
 class SafePPOAgent(ppo_utils.PPOAgent):
-    """A PPO class that encapsulates model, optimizer and update functions."""
+    """A PPO class that encapsulates models, optimizers and update functions.
+
+    """
 
     def __init__(self,
                  obs_space,
@@ -29,57 +30,48 @@ def __init__(self,
                  opt_epochs=10,
                  mini_batch_size=64,
                  action_modifier=None,
-                 **kwargs):
-        # params
+                 **kwargs
+                 ):
+        # Parameters.
         self.obs_space = obs_space
         self.act_space = act_space
-
         self.use_clipped_value = use_clipped_value
         self.clip_param = clip_param
         self.target_kl = target_kl
         self.entropy_coef = entropy_coef
-
         self.opt_epochs = opt_epochs
         self.mini_batch_size = mini_batch_size
-
-        # model
+        # Model.
         self.ac = MLPActorCritic(obs_space,
                                  act_space,
                                  hidden_dims=[hidden_dim] * 2,
                                  activation="tanh",
                                  action_modifier=action_modifier)
-
-        # optimizers
+        # Optimizers.
         self.actor_opt = torch.optim.Adam(self.ac.actor.parameters(), actor_lr)
-        self.critic_opt = torch.optim.Adam(self.ac.critic.parameters(),
-                                           critic_lr)
+        self.critic_opt = torch.optim.Adam(self.ac.critic.parameters(), critic_lr)
 
     def compute_policy_loss(self, batch):
-        """Returns policy loss(es) given batch of data."""
-        obs, act, logp_old, adv, c = batch["obs"], batch["act"], batch[
-            "logp"], batch["adv"], batch["c"]
-        dist, logp = self.ac.actor(obs, act, c=c)
+        """Returns policy loss(es) given batch of data.
 
-        # policy
+        """
+        obs, act, logp_old, adv, c = batch["obs"], batch["act"], batch["logp"], batch["adv"], batch["c"]
+        dist, logp = self.ac.actor(obs, act, c=c)
+        # Policy.
         ratio = torch.exp(logp - logp_old)
-        clip_adv = torch.clamp(ratio, 1 - self.clip_param,
-                               1 + self.clip_param) * adv
+        clip_adv = torch.clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * adv
         policy_loss = -torch.min(ratio * adv, clip_adv).mean()
-
-        # entropy
+        # Entropy.
         entropy_loss = -dist.entropy().mean()
-
-        # kl/trust region
+        # KL/trust region.
         approx_kl = (logp_old - logp).mean()
         return policy_loss, entropy_loss, approx_kl
 
 
-# -----------------------------------------------------------------------------------
-#                   Models
-# -----------------------------------------------------------------------------------
-
-
 class MLPActor(nn.Module):
+    """Actor model.
+
+    """
 
     def __init__(self,
                  obs_dim,
@@ -87,36 +79,44 @@ def __init__(self,
                  hidden_dims,
                  activation,
                  discrete=False,
-                 action_modifier=None):
+                 action_modifier=None
+                 ):
+        """
+
+        """
         super().__init__()
         self.pi_net = MLP(obs_dim, act_dim, hidden_dims, activation)
-
-        # construct output action distribution
+        # Construct output action distribution.
         self.discrete = discrete
         if discrete:
             self.dist_fn = lambda x: Categorical(logits=x)
         else:
             self.logstd = nn.Parameter(-0.5 * torch.ones(act_dim))
             self.dist_fn = lambda x: Normal(x, self.logstd.exp())
-
-        # safety filter
+        # Safety filter.
         self.action_modifier = action_modifier
 
-    def forward(self, obs, act=None, c=None):
+    def forward(self,
+                obs,
+                act=None,
+                c=None
+                ):
+        """
+
+        """
         mu = self.pi_net(obs)
-        # filter action if needed
+        # Filter action if needed.
         if self.action_modifier:
             if len(mu.shape) == 1:
-                # during evalution or single env runs
+                # During evalution or single env runs.
                 mu_safe = self.action_modifier(obs.unsqueeze(0),
                                                mu.unsqueeze(0),
                                                c.unsqueeze(0)).view(-1)
             else:
-                # during training or vectorized runs
+                # During training or vectorized runs.
                 mu_safe = self.action_modifier(obs, mu, c)
         else:
             mu_safe = mu
-
         dist = self.dist_fn(mu_safe)
         logp_a = None
         if act is not None:
@@ -130,6 +130,7 @@ class MLPActorCritic(ppo_utils.MLPActorCritic):
     Attributes:
         actor (MLPActor): policy network. 
         critic (MLPCritic): value network.  
+
     """
 
     def __init__(self,
@@ -137,9 +138,12 @@ def __init__(self,
                  act_space,
                  hidden_dims=(64, 64),
                  activation="tanh",
-                 action_modifier=None):
-        nn.Module.__init__(self)
+                 action_modifier=None
+                 ):
+        """
 
+        """
+        nn.Module.__init__(self)
         obs_dim = obs_space.shape[0]
         if isinstance(act_space, Box):
             act_dim = act_space.shape[0]
@@ -147,31 +151,36 @@ def __init__(self,
         else:
             act_dim = act_space.n
             discrete = True
-
-        # policy
-        self.actor = MLPActor(obs_dim, act_dim, hidden_dims, activation,
-                              discrete, action_modifier)
-        # value function
+        # Policy.
+        self.actor = MLPActor(obs_dim, act_dim, hidden_dims, activation, discrete, action_modifier)
+        # Value function.
         self.critic = ppo_utils.MLPCritic(obs_dim, hidden_dims, activation)
 
-    def step(self, obs, c=None):
+    def step(self,
+             obs,
+             c=None
+             ):
+        """
+
+        """
         dist, _ = self.actor(obs, c=c)
         a = dist.sample()
         logp_a = dist.log_prob(a)
         v = self.critic(obs)
         return a.numpy(), v.numpy(), logp_a.numpy()
 
-    def act(self, obs, c=None):
+    def act(self,
+            obs,
+            c=None
+            ):
+        """
+
+        """
         dist, _ = self.actor(obs, c=c)
         a = dist.mode()
         return a.numpy()
 
 
-# -----------------------------------------------------------------------------------
-#                   Storage
-# -----------------------------------------------------------------------------------
-
-
 class SafePPOBuffer(ppo_utils.PPOBuffer):
     """Storage for a batch of episodes during training.
     
@@ -180,20 +189,27 @@ class SafePPOBuffer(ppo_utils.PPOBuffer):
         batch_size (int): number of episodes per batch.
         scheme (dict): describs shape & other info of data to be stored.
         keys (list): names of all data from scheme.
+
     """
 
-    def __init__(self, obs_space, act_space, num_constraints, max_length,
-                 batch_size):
+    def __init__(self,
+                 obs_space,
+                 act_space,
+                 num_constraints,
+                 max_length,
+                 batch_size
+                 ):
+        """
+
+        """
         self.max_length = max_length
         self.batch_size = batch_size
-
         T, N = max_length, batch_size
         obs_dim = obs_space.shape
         if isinstance(act_space, Box):
             act_dim = act_space.shape[0]
         else:
             act_dim = act_space.n
-
         self.scheme = {
             "obs": {
                 "vshape": (T, N, *obs_dim)
@@ -228,4 +244,4 @@ def __init__(self, obs_space, act_space, num_constraints, max_length,
             },
         }
         self.keys = list(self.scheme.keys())
-        self.reset()
\ No newline at end of file
+        self.reset()
diff --git a/safe_control_gym/envs/benchmark_env.py b/safe_control_gym/envs/benchmark_env.py
index 39989f4b4..e9779da83 100644
--- a/safe_control_gym/envs/benchmark_env.py
+++ b/safe_control_gym/envs/benchmark_env.py
@@ -1,7 +1,6 @@
 """Base environment class module.
 
-This module also contains enumerations for cost functions, tasks,
-disturbances, and quadrotor types.
+This module also contains enumerations for cost functions, tasks, disturbances, and quadrotor types.
 
 """
 import os
@@ -14,14 +13,18 @@
 
 
 class Cost(str, Enum):
-    """Reward/cost functions enumeration class."""
+    """Reward/cost functions enumeration class.
+
+    """
 
     RL_REWARD = "rl_reward"  # Default RL reward function.
     QUADRATIC = "quadratic"  # Quadratic cost.
 
 
 class Task(str, Enum):
-    """Environment tasks enumeration class."""
+    """Environment tasks enumeration class.
+
+    """
 
     STABILIZATION = "stabilization"  # Stabilization task.
     TRAJ_TRACKING = "traj_tracking"  # Trajectory tracking task.
@@ -32,16 +35,17 @@ class BenchmarkEnv(gym.Env):
     
     Attributes:
         id (int): unique identifier of the current env instance (among other instances). 
+
     """
-    # class variable, count env instance in current process
-    _count = 0
+    _count = 0  # Class variable, count env instance in current process.
 
     def __init__(self,
                  output_dir=None,
                  seed: int = 0,
                  info_in_reset: bool = False,
                  episode_len_sec: int = 5,
-                 cost: Cost = Cost.RL_REWARD):
+                 cost: Cost = Cost.RL_REWARD
+                 ):
         """Initialization method for BenchmarkEnv.
 
         Args:
@@ -50,32 +54,28 @@ def __init__(self,
                                             environment's symbolic model.
             episode_len_sec (int, optional): Maximum episode duration in seconds.
             cost (Cost, optional): Cost function choice used to compute the reward in .step().
+
         """
-        # assign unique ID based on env instance count
+        # Assign unique ID based on env instance count.
         self.id = self.__class__._count
         self.__class__._count += 1
-
-        # directory to save any env output
+        # Directory to save any env output.
         if output_dir is None:
             output_dir = os.getcwd()
         self.output_dir = output_dir
-
-        # default seed None means pure randomness/no seeding
+        # Default seed None means pure randomness/no seeding.
         self.seed(seed)
         self.INFO_IN_RESET = info_in_reset
         self.initial_reset = False
-
-        # maximum episode length in seconds
+        # Maximum episode length in seconds.
         self.EPISODE_LEN_SEC = episode_len_sec
-
-        # define cost-related quantities
+        # Define cost-related quantities.
         self.COST = Cost(cost)
         # Default Q and R matrices for quadratic cost.
         if self.COST == Cost.QUADRATIC:
             self.Q = np.eye(self.observation_space.shape[0])
             self.R = np.eye(self.action_space.shape[0])
-
-        # custom setups
+        # Custom setups.
         self._setup_symbolic()
         self._setup_disturbances()
         self._setup_constraints()
@@ -87,7 +87,10 @@ def _check_initial_reset(self):
                 "[ERROR] You must call env.reset() at least once before using env.step()."
             )
 
-    def _randomize_values_by_info(self, original_values, randomization_info):
+    def _randomize_values_by_info(self,
+                                  original_values,
+                                  randomization_info
+                                  ):
         """Randomizes a list of values according to desired distributions.
 
         Args:
@@ -97,30 +100,33 @@ def _randomize_values_by_info(self, original_values, randomization_info):
 
         Returns:
             dict: A dict of randomized values.
+
         """
-        randomized_values = copy.deepcopy(
-            original_values)  # Start from a copy of the original values
-        rand_info_copy = copy.deepcopy(
-            randomization_info)  # Copy the info dict to parse it with "pop"
-        # Randomized and replace values for which randomization info are given
+        # Start from a copy of the original values.
+        randomized_values = copy.deepcopy(original_values)
+        # Copy the info dict to parse it with "pop".
+        rand_info_copy = copy.deepcopy(randomization_info)
+        # Randomized and replace values for which randomization info are given.
         for key in original_values:
             if key in rand_info_copy:
-                # Get distribution removing it from info dict
+                # Get distribution removing it from info dict.
                 distrib = getattr(self.np_random,
                                   rand_info_copy[key].pop("distrib"))
-                # Pop positional args
+                # Pop positional args.
                 d_args = rand_info_copy[key].pop("args", [])
-                # Keyword args are just anything left
+                # Keyword args are just anything left.
                 d_kwargs = rand_info_copy[key]
-                # Randomize
+                # Randomize.
                 randomized_values[key] = distrib(*d_args, **d_kwargs)
         return randomized_values
 
-    def seed(self, seed=None):
+    def seed(self,
+             seed=None
+             ):
         """Sets up a random number generator for a given seed.
         
-        Current repo convention: non-positive seed same as None 
-        (might not be the best, subject to discussion). 
+        Current convention: non-positive seed same as None 
+
         """
         if seed is not None:
             seed = seed if seed > 0 else None
@@ -167,8 +173,7 @@ def _generate_trajectory(self,
         # Get trajectory type.
         valid_traj_type = ["circle", "square", "figure8"]
         if traj_type not in valid_traj_type:
-            raise ValueError(
-                "Trajectory type should be one of [circle, square, figure8].")
+            raise ValueError("Trajectory type should be one of [circle, square, figure8].")
         traj_period = traj_length / num_cycles
         direction_list = ["x", "y", "z"]
         # Get coordinates indexes.
@@ -177,9 +182,7 @@ def _generate_trajectory(self,
             coord_index_a = direction_list.index(traj_plane[0])
             coord_index_b = direction_list.index(traj_plane[1])
         else:
-            raise ValueError("Trajectory plane should be in form of "
-                             "ab"
-                             ", where a and b can be {x, y, z}.")
+            raise ValueError("Trajectory plane should be in form of ab, where a and b can be {x, y, z}.")
         # Generate time stamps.
         times = np.arange(0, traj_length, sample_time)
         pos_ref_traj = np.zeros((len(times), 3))
@@ -187,15 +190,27 @@ def _generate_trajectory(self,
         speed_traj = np.zeros((len(times), 1))
         # Compute trajectory points.
         for t in enumerate(times):
-            pos_ref_traj[t[0]], vel_ref_traj[t[0]] = self._get_coordinates(
-                t[1], traj_type, traj_period, coord_index_a, coord_index_b,
-                position_offset[0], position_offset[1], scaling)
+            pos_ref_traj[t[0]], vel_ref_traj[t[0]] = self._get_coordinates(t[1],
+                                                                           traj_type,
+                                                                           traj_period,
+                                                                           coord_index_a,
+                                                                           coord_index_b,
+                                                                           position_offset[0],
+                                                                           position_offset[1],
+                                                                           scaling)
             speed_traj[t[0]] = np.linalg.norm(vel_ref_traj[t[0]])
         return pos_ref_traj, vel_ref_traj, speed_traj
 
-    def _get_coordinates(self, t, traj_type, traj_period, coord_index_a,
-                         coord_index_b, position_offset_a, position_offset_b,
-                         scaling):
+    def _get_coordinates(self,
+                         t,
+                         traj_type,
+                         traj_period,
+                         coord_index_a,
+                         coord_index_b,
+                         position_offset_a,
+                         position_offset_b,
+                         scaling
+                         ):
         """Computes the coordinates of a specified trajectory at time t.
 
         Args:
@@ -233,7 +248,11 @@ def _get_coordinates(self, t, traj_type, traj_period, coord_index_a,
         vel_ref[coord_index_b] = coords_b_dot
         return pos_ref, vel_ref
 
-    def _figure8(self, t, traj_period, scaling):
+    def _figure8(self,
+                 t,
+                 traj_period,
+                 scaling
+                 ):
         """Computes the coordinates of a figure8 trajectory at time t.
 
         Args:
@@ -252,11 +271,14 @@ def _figure8(self, t, traj_period, scaling):
         coords_a = scaling * np.sin(traj_freq * t)
         coords_b = scaling * np.sin(traj_freq * t) * np.cos(traj_freq * t)
         coords_a_dot = scaling * traj_freq * np.cos(traj_freq * t)
-        coords_b_dot = scaling * traj_freq * (np.cos(traj_freq * t)**2
-                                              - np.sin(traj_freq * t)**2)
+        coords_b_dot = scaling * traj_freq * (np.cos(traj_freq * t)**2 - np.sin(traj_freq * t)**2)
         return coords_a, coords_b, coords_a_dot, coords_b_dot
 
-    def _circle(self, t, traj_period, scaling):
+    def _circle(self,
+                t,
+                traj_period,
+                scaling
+                ):
         """Computes the coordinates of a circle trajectory at time t.
 
         Args:
@@ -278,7 +300,11 @@ def _circle(self, t, traj_period, scaling):
         coords_b_dot = scaling * traj_freq * np.cos(traj_freq * t)
         return coords_a, coords_b, coords_a_dot, coords_b_dot
 
-    def _square(self, t, traj_period, scaling):
+    def _square(self,
+                t,
+                traj_period,
+                scaling
+                ):
         """Computes the coordinates of a square trajectory at time t.
 
         Args:
@@ -330,8 +356,15 @@ def _square(self, t, traj_period, scaling):
             coords_b_dot = 0.0
         return coords_a, coords_b, coords_a_dot, coords_b_dot
 
-    def _plot_trajectory(self, traj_type, traj_plane, traj_length, num_cycles,
-                         pos_ref_traj, vel_ref_traj, speed_traj):
+    def _plot_trajectory(self,
+                         traj_type,
+                         traj_plane,
+                         traj_length,
+                         num_cycles,
+                         pos_ref_traj,
+                         vel_ref_traj,
+                         speed_traj
+                         ):
         """Plots a trajectory along x, y, z, and in a 3D projection.
 
         Args:
@@ -350,8 +383,7 @@ def _plot_trajectory(self, traj_type, traj_plane, traj_length, num_cycles,
         print("Trajectory length: %s sec" % traj_length)
         print("Number of cycles: %d" % num_cycles)
         print("Trajectory period: %.2f sec" % (traj_length / num_cycles))
-        print("Angular speed: %.2f rad/sec" % (2.0 * np.pi /
-                                               (traj_length / num_cycles)))
+        print("Angular speed: %.2f rad/sec" % (2.0 * np.pi / (traj_length / num_cycles)))
         print(
             "Position bounds: x [%.2f, %.2f] m, y [%.2f, %.2f] m, z [%.2f, %.2f] m"
             % (min(pos_ref_traj[:, 0]), max(pos_ref_traj[:, 0]),
@@ -382,7 +414,7 @@ def _plot_trajectory(self, traj_type, traj_plane, traj_length, num_cycles,
         axs[2, 1].set_ylabel('vel z (m)')
         axs[2, 1].set_xlabel('time (s)')
         plt.show()
-        # Plot in 3D
+        # Plot in 3D.
         fig = plt.figure()
         ax = fig.gca(projection='3d')
         ax.plot(pos_ref_traj[:, 0], pos_ref_traj[:, 1], pos_ref_traj[:, 2])
diff --git a/safe_control_gym/envs/constraints.py b/safe_control_gym/envs/constraints.py
index 33554e8de..a0d6f8a56 100644
--- a/safe_control_gym/envs/constraints.py
+++ b/safe_control_gym/envs/constraints.py
@@ -1,23 +1,20 @@
 """Constraints module.
 
-A base class for constraints and a class of a list of constraints.
+Classes for constraints and lists of constraints.
 
-Reference implementations:
-    * github.com/google-research/realworldrl_suite/blob/master/docs/README.md
-    * github.com/google-research/realworldrl_suite/blob/be7a51cffa7f5f9cb77a387c16bad209e0f851f8/realworldrl_suite/environments/cartpole.py#L38
-
-TODO:
-    * Allow for states/inputs to be randomly sampled within constraints limits
-    * Constraint List have combined representation, and vertex representation
 """
 import numpy as np
-from enum import Enum
 import casadi as cs
+
+from enum import Enum
+
 from safe_control_gym.envs.benchmark_env import BenchmarkEnv
 
 
 class ConstraintInputType(str, Enum):
-    """Allowable constraint type specifiers."""
+    """Allowable constraint type specifiers.
+
+    """
 
     STATE = 'STATE'  # Constraints who are a function of the state X.
     INPUT = 'INPUT'  # Constraints who are a function of the input U.
@@ -32,19 +29,26 @@ class Constraint:
 
     Attributes:
         dim (int): The number of inequalities representing the constraint.
+
     """
 
     dim = 0  # todo: why is this a class attribute?
 
-    def __init__(self, env: BenchmarkEnv, constraint_input_type: ConstraintInputType, active_dims=None, tolerance=None, **kwargs):
+    def __init__(self,
+                 env: BenchmarkEnv,
+                 constraint_input_type: ConstraintInputType,
+                 active_dims=None,
+                 tolerance=None,
+                 **kwargs
+                 ):
         """Defines params (e.g. bounds) and state.
 
         Args:
             env (safe_control_gym.envs.bechmark_env.BenchmarkEnv): The environment the constraint is for.
             constraint_input_type (ConstraintInputType): Specifies the input type to the constraint as a constraint
-            that acts on the state, input, or both
-            active_dims (list of ints): Filters the constraint to only act only select certian dimensions
-            tolerance (list or np.array): The distance from the constraint at which is_almost_active returns True
+                                                         that acts on the state, input, or both.
+            active_dims (list of ints): Filters the constraint to only act only select certian dimensions.
+            tolerance (list or np.array): The distance from the constraint at which is_almost_active returns True.
 
         """
         self.constraint_input_type = ConstraintInputType(constraint_input_type)
@@ -54,8 +58,7 @@ def __init__(self, env: BenchmarkEnv, constraint_input_type: ConstraintInputType
             self.dim = env.action_space.shape[0]
         else:
             self.dim = env.observation_space.shape[0] + env.action_space.shape[0]
-
-        # Only want to select specific dimensions. Implemented via a filter matrix
+        # Only want to select specific dimensions, implemented via a filter matrix.
         if active_dims is not None:
             if isinstance(active_dims, int):
                 active_dims = [active_dims]
@@ -63,20 +66,22 @@ def __init__(self, env: BenchmarkEnv, constraint_input_type: ConstraintInputType
             self.dim = len(active_dims)
         else:
             self.constraint_filter = np.eye(self.dim)
-
         if tolerance is not None:
             self.tolerance = np.array(tolerance, ndmin=1)
             assert self.tolerance.shape[0] == len(active_dims)
         else:
             self.tolerance = np.array(np.zeros(self.dim), ndmin=1)
-
         self.num_input_constraints = None
 
     def reset(self):
-        """Clears up the constraint state (if any)."""
+        """Clears up the constraint state (if any).
+
+        """
         pass
 
-    def get_symbolic_model(self, env):
+    def get_symbolic_model(self,
+                           env
+                           ):
         """Gets the symbolic form of the constraint function.
 
         Args:
@@ -84,10 +89,13 @@ def get_symbolic_model(self, env):
 
         Returns:
             obj: The symbolic form of the constraint.
+
         """
         return NotImplementedError
 
-    def get_value(self, env):
+    def get_value(self,
+                  env
+                  ):
         """Gets the constraint function value.
 
         Args:
@@ -100,7 +108,9 @@ def get_value(self, env):
         env_value = self.get_env_constraint_var(env)
         return np.squeeze(self.sym_func(np.array(env_value, ndmin=1)))
 
-    def is_violated(self, env):
+    def is_violated(self,
+                    env
+                    ):
         """Checks if constraint is violated.
 
         Args:
@@ -108,25 +118,30 @@ def is_violated(self, env):
 
         Returns:
             bool: Whether the constraint was violeted.
+
         """
         c_value = self.get_value(env)
         flag = np.any(np.greater(c_value, 0.))
         return bool(flag)
 
-    def is_almost_active(self, env):
+    def is_almost_active(self,
+                         env
+                         ):
         """Checks if constraint is nearly violated.
 
         This is checked by using a slack variable (from init args).
         This can be used for reward shaping/constraint penalty in RL methods.
-        """
 
+        """
         if not hasattr(self, "tolerance") or self.tolerance is None:
             return False
         c_value = self.get_value(env)
         flag = np.any(np.greater(c_value + self.tolerance, 0.))
         return bool(flag)
 
-    def get_env_constraint_var(self, env: BenchmarkEnv):
+    def get_env_constraint_var(self,
+                               env: BenchmarkEnv
+                               ):
         if self.constraint_input_type == ConstraintInputType.STATE:
             return env.state
         elif self.constraint_input_type == ConstraintInputType.INPUT:
@@ -137,11 +152,19 @@ def get_env_constraint_var(self, env: BenchmarkEnv):
             raise NotImplementedError("Constraint input type not implemented.")
 
 
-# TODO: Add these constraints to benchmark constraints
 class QuadraticContstraint(Constraint):
-    """Constraint class for constraints of the form x.T @ P @ x <= b."""
+    """Constraint class for constraints of the form x.T @ P @ x <= b.
+
+    """
 
-    def __init__(self, env: BenchmarkEnv, P: np.ndarray, b: float, constraint_input_type: ConstraintInputType, active_dims=None, tolerance=None):
+    def __init__(self,
+                 env: BenchmarkEnv,
+                 P: np.ndarray,
+                 b: float,
+                 constraint_input_type: ConstraintInputType,
+                 active_dims=None,
+                 tolerance=None
+                 ):
         """Initializes the class.
 
         An example of how to specify in YAML, one would add:
@@ -160,9 +183,9 @@ def __init__(self, env: BenchmarkEnv, P: np.ndarray, b: float, constraint_input_
             P (np.array): The square matrix representing the quadratic.
             b (float): The scalar limit for the quadatic constraint.
             constraint_input_type (ConstraintInputType): Specifies the input type to the constraint as a constraint
-            that acts on the state, input, or both
-            active_dims (list of ints): Filters the constraint to only act only select certian dimensions
-            tolerance (list or np.array): The distance from the constraint at which is_almost_active returns True
+                                                        that acts on the state, input, or both.
+            active_dims (list of ints): Filters the constraint to only act only select certian dimensions.
+            tolerance (list or np.array): The distance from the constraint at which is_almost_active returns True.
 
         """
         super().__init__(env, constraint_input_type, active_dims=active_dims, tolerance=tolerance)
@@ -170,8 +193,7 @@ def __init__(self, env: BenchmarkEnv, P: np.ndarray, b: float, constraint_input_
         self.P = P
         assert isinstance(b, float), ValueError("b is not a scalar!")
         self.b = b
-        self.num_constraints = 1  # always scalar
-
+        self.num_constraints = 1  # Always scalar.
         self.sym_func = lambda x: x.T @ self.constraint_filter.T @ self.P @ self.constraint_filter @ x - self.b
 
     def get_symbolic_model(self):
@@ -185,12 +207,21 @@ def get_symbolic_model(self):
 
 
 class LinearConstraint(Constraint):
-    """Constraint class for constraints of the form A @ x <= b"""
+    """Constraint class for constraints of the form A @ x <= b.
+
+    """
 
-    def __init__(self, env: BenchmarkEnv, A: np.ndarray, b: np.ndarray, constraint_input_type: ConstraintInputType, active_dims=None, tolerance=None):
+    def __init__(self,
+                 env: BenchmarkEnv,
+                 A: np.ndarray,
+                 b: np.ndarray,
+                 constraint_input_type: ConstraintInputType,
+                 active_dims=None,
+                 tolerance=None
+                 ):
         """Initialize the class.
 
-        An example of how to specify in YAML, one would add:
+        An example of how to specify this in YAML:
 
         constraints:
           linear_constraint:
@@ -206,7 +237,7 @@ def __init__(self, env: BenchmarkEnv, A: np.ndarray, b: np.ndarray, constraint_i
             env (BenchmarkEnv): The environment to constraint.
             A (np.array or list): A matrix of the constraint (self.num_constraints by self.dim).
             b (np.array or list): b matrix of the constraint (1D array self.num_constraints)
-            constraint_input_type (ConstraintInputType): Type of constraint.
+                                  constraint_input_type (ConstraintInputType): Type of constraint.
             active_dims (list or int): List specifying which dimensions the constraint is active for.
             tolerance (float): The distance at which is_almost_active(env) triggers.
 
@@ -219,7 +250,6 @@ def __init__(self, env: BenchmarkEnv, A: np.ndarray, b: np.ndarray, constraint_i
         assert b.shape[0] == A.shape[0], ValueError("Dimension 0 of b does not match A!")
         self.b = b
         self.num_constraints = A.shape[0]
-
         self.sym_func = lambda x: self.A @ self.constraint_filter @ x - self.b
 
     def get_symbolic_model(self):
@@ -233,7 +263,9 @@ def get_symbolic_model(self):
 
 
 class BoundedConstraint(LinearConstraint):
-    """ Class for bounded constraints lb <= x <= ub as polytopic constraints -Ix + b <= 0 and Ix - b <= 0 """
+    """ Class for bounded constraints lb <= x <= ub as polytopic constraints -Ix + b <= 0 and Ix - b <= 0.
+
+    """
 
     def __init__(self,
                  env: BenchmarkEnv,
@@ -244,7 +276,7 @@ def __init__(self,
                  tolerance=None):
         """Initialize the constraint.
 
-        An example of how to specify in YAML, one would add:
+        An example of how to specify this in YAML:
 
         constraints:
           bounded_constraint:
@@ -276,22 +308,28 @@ def __init__(self,
 
 
 def get_symbolic_constraint_models(constraint_list):
-    """Create list of symbolic models from list of constraints."""
+    """Create list of symbolic models from list of constraints.
+
+    """
     symbolic_models = [con.get_symbolic_model() for con in constraint_list]
     return symbolic_models
 
 
 class ConstraintList:
-    """Collates a (ordered) list of constraints."""
+    """Collates a (ordered) list of constraints.
 
-    def __init__(self, constraints):
+    """
+
+    def __init__(self,
+                 constraints
+                 ):
         """Initialize the constraint list.
 
         Args:
             constraints: The list of constraints.
 
         """
-        self.constraints = constraints  # Todo: this could probably be names better since we often see constraints.constraints
+        self.constraints = constraints
         self.num_constraints = sum([con.num_constraints for con in self.constraints])
         self.state_constraints = [con for con in self.constraints if con.constraint_input_type == ConstraintInputType.STATE]
         self.num_state_constraints = sum([con.num_constraints for con in self.state_constraints])
@@ -310,23 +348,30 @@ def __len__(self):
         return len(self.constraints)
 
     def get_all_symbolic_models(self):
-        """Return all the symbolic models the constraints."""
+        """Return all the symbolic models the constraints.
+
+        """
         return get_symbolic_constraint_models(self.constraints)
 
     def get_state_constraint_symbolic_models(self):
-        """Return only the constraints that act on the state."""
+        """Return only the constraints that act on the state.
+
+        """
         return get_symbolic_constraint_models(self.state_constraints)
 
     def get_input_constraint_symbolic_models(self):
-        """Return only the constraints that act on the input."""
+        """Return only the constraints that act on the input.
+
+        """
         return get_symbolic_constraint_models(self.input_constraints)
 
     def get_input_and_state_constraint_symbolic_models(self):
-        """Return only the constraints that act on both state and inputs simultaneously."""
+        """Return only the constraints that act on both state and inputs simultaneously.
+
+        """
         return get_symbolic_constraint_models(self.input_state_constraints)
 
     def get_stacked_symbolic_model(self, env):
-        # Todo: Can this be removed?
         """Gets the symbolic form of all constraints.
 
         Args:
@@ -334,16 +379,19 @@ def get_stacked_symbolic_model(self, env):
 
         Returns:
             obj: The symbolic form of the constraint.
+
         """
         symbolic_models = [con.get_symbolic_model() for con in self.constraints]
-
         X = env.symbolic.x_sym
         U = env.symbolic.u_sym
         stack_c_sym = cs.vertcat(*[func(X, U) for func in symbolic_models])
         sym_func = cs.Function("constraints", [X, U], [stack_c_sym])
         return sym_func
 
-    def get_values(self, env, only_state=False):
+    def get_values(self,
+                   env,
+                   only_state=False
+                   ):
         """Gets all constraint function values.
 
         Args:
@@ -359,7 +407,10 @@ def get_values(self, env, only_state=False):
             con_values = np.concatenate([con.get_value(env) for con in self.constraints])
         return con_values
 
-    def get_violations(self, env, only_state=False):
+    def get_violations(self,
+                       env,
+                       only_state=False
+                       ):
         """Gets all constraint violations.
 
         Args:
@@ -375,7 +426,9 @@ def get_violations(self, env, only_state=False):
             flags = [con.is_violated(env) for con in self.constraints]
         return flags
 
-    def is_violated(self, env):
+    def is_violated(self,
+                    env
+                    ):
         """Checks if any of the constraints is violated.
 
         Args:
@@ -388,11 +441,14 @@ def is_violated(self, env):
         flag = any([con.is_violated(env) for con in self.constraints])
         return flag
 
-    def is_almost_active(self, env):
+    def is_almost_active(self,
+                         env
+                         ):
         """Checks if constraint is nearly violated.
 
-        This is checked by using a slack variable (from init args).
-        This can be used for reward shaping/constraint penalty in RL methods.
+        This is checked by using a slack variable (from init args) and can be used
+        for reward shaping/constraint penalty in RL methods.
+
         """
         flag = any([con.is_almost_active(env) for con in self.constraints])
         return flag
@@ -405,18 +461,21 @@ def is_almost_active(self, env):
 }
 
 
-def create_ConstraintList_from_dict(constraint_dict, available_constraints, env):
+def create_ConstraintList_from_dict(constraint_dict,
+                                    available_constraints,
+                                    env
+                                    ):
     """Create a constraint list from a dict (or YAML output).
 
     Args:
-        constraint_dict (dict): Dict specifying the constraint parameters
-        available_constraints (dict): Dict of the constraints that are available
-        env (BenchmarkEnv): Env for which the constraints will be applied
+        constraint_dict (dict): Dict specifying the constraint parameters.
+        available_constraints (dict): Dict of the constraints that are available.
+        env (BenchmarkEnv): Env for which the constraints will be applied.
+
     """
     constraint_list = []
     for name, cfg in constraint_dict.items():
         assert name in available_constraints, "[ERROR]. constraint not in list of available constraints"
         con_class = available_constraints[name]
         constraint_list.append(con_class(env, **cfg))
-
     return ConstraintList(constraint_list)
diff --git a/safe_control_gym/envs/disturbances.py b/safe_control_gym/envs/disturbances.py
index f5412ea6e..f4d2d6506 100644
--- a/safe_control_gym/envs/disturbances.py
+++ b/safe_control_gym/envs/disturbances.py
@@ -1,13 +1,21 @@
 """Disturbances.
+
 """
 import numpy as np
 from enum import Enum
 
 
 class Disturbance:
-    """Base class for disturbance or noise applied to inputs or dyanmics."""
+    """Base class for disturbance or noise applied to inputs or dyanmics.
+
+    """
 
-    def __init__(self, env, dim, mask=None, **kwargs):
+    def __init__(self,
+                 env,
+                 dim,
+                 mask=None,
+                 **kwargs
+                 ):
         self.np_random = env.np_random
         self.dim = dim
         self.mask = mask
@@ -15,29 +23,50 @@ def __init__(self, env, dim, mask=None, **kwargs):
             self.mask = np.asarray(mask)
             assert self.dim == len(self.mask)
 
-    def reset(self, env):
-        """Default nothing."""
+    def reset(self,
+              env
+              ):
         pass
 
-    def apply(self, target, env):
-        """Default is identity."""
+    def apply(self,
+              target,
+              env
+              ):
+        """Default is identity.
+
+        """
         return target
 
 
 class DisturbanceList:
-    """Combine list of disturbances as one."""
+    """Combine list of disturbances as one.
+
+    """
+
+    def __init__(self,
+                 disturbances
+                 ):
+        """Initialization of the list of disturbances.
 
-    def __init__(self, disturbances):
-        """Initialization of the list of disturbances."""
+        """
         self.disturbances = disturbances
 
-    def reset(self, env):
-        """Sequentially reset disturbances."""
+    def reset(self,
+              env
+              ):
+        """Sequentially reset disturbances.
+
+        """
         for disturb in self.disturbances:
             disturb.reset(env)
 
-    def apply(self, target, env):
-        """Sequentially (in order) apply disturbances."""
+    def apply(self,
+              target,
+              env
+              ):
+        """Sequentially apply disturbances.
+
+        """
         disturbed = target
         for disturb in self.disturbances:
             disturbed = disturb.apply(disturbed, env)
@@ -47,31 +76,46 @@ def apply(self, target, env):
 class ImpulseDisturbance(Disturbance):
     """Impulse applied during a short time interval.
     
-    Example impulses:
-    * single step, square (duration=1, decay_rate=1): ______|-|_______
-    * multiple step, square (duration>1, decay_rate=1): ______|-----|_____
-    * multiple step, triangle (duration>1, decay_rate<1): ______/\_____
+    Examples:
+        * single step, square (duration=1, decay_rate=1): ______|-|_______
+        * multiple step, square (duration>1, decay_rate=1): ______|-----|_____
+        * multiple step, triangle (duration>1, decay_rate<1): ______/\_____
+
     """
 
-    def __init__(self, env, dim, mask=None, magnitude=1, step_offset=None, duration=1, decay_rate=1, **kwargs):
+    def __init__(self,
+                 env,
+                 dim,
+                 mask=None,
+                 magnitude=1,
+                 step_offset=None,
+                 duration=1,
+                 decay_rate=1,
+                 **kwargs
+                 ):
         super().__init__(env, dim, mask)
         self.magnitude = magnitude
         self.step_offset = step_offset
         self.max_step = int(env.EPISODE_LEN_SEC / env.CTRL_TIMESTEP)
-        # specify shape of impulse
+        # Specify shape of the impulse.
         assert duration > 1
         assert decay_rate > 0 and decay_rate <= 1
         self.duration = duration
         self.decay_rate = decay_rate
 
-    def reset(self, env):
+    def reset(self,
+              env
+              ):
         if self.step_offset is None:
             self.current_step_offset = self.np_random.randint(self.max_step)
         else:
             self.current_step_offset = self.step_offset
         self.current_peak_step = int(self.current_step_offset + self.duration / 2)
 
-    def apply(self, target, env):
+    def apply(self,
+              target,
+              env
+              ):
         noise = 0
         if env.control_step_counter > self.step_offset:
             peak_offset = np.abs(env.control_step_counter - self.current_peak_step)
@@ -89,22 +133,35 @@ def apply(self, target, env):
 class StepDisturbance(Disturbance):
     """Constant disturbance at all time steps (but after offset).
     
-    disturbance is applied after offset step (which can be randomized or given): _______|---------
+    Applied after offset step (randomized or given): _______|---------
+
     """
 
-    def __init__(self, env, dim, mask=None, magnitude=1, step_offset=None, **kwargs):
+    def __init__(self,
+                 env,
+                 dim,
+                 mask=None,
+                 magnitude=1,
+                 step_offset=None,
+                 **kwargs
+                 ):
         super().__init__(env, dim, mask)
         self.magnitude = magnitude
         self.step_offset = step_offset
         self.max_step = int(env.EPISODE_LEN_SEC / env.CTRL_TIMESTEP)
 
-    def reset(self, env):
+    def reset(self,
+              env
+              ):
         if self.step_offset is None:
             self.current_step_offset = self.np_random.randint(self.max_step)
         else:
             self.current_step_offset = self.step_offset
 
-    def apply(self, target, env):
+    def apply(self,
+              target,
+              env
+              ):
         noise = 0
         if env.control_step_counter > self.step_offset:
             noise = self.magnitude
@@ -115,21 +172,31 @@ def apply(self, target, env):
 
 
 class WhiteNoise(Disturbance):
-    """i.i.d Gaussian noise per time step."""
+    """I.i.d Gaussian noise per time step.
+
+    """
 
-    def __init__(self, env, dim, mask=None, std=1.0, **kwargs):
+    def __init__(self,
+                 env,
+                 dim,
+                 mask=None,
+                 std=1.0,
+                 **kwargs
+                 ):
         super().__init__(env, dim, mask)
-        # i.i.d gaussian variance
+        # I.i.d gaussian variance.
         if isinstance(std, float):
             self.std = np.asarray([std] * self.dim)
         elif isinstance(std, list):
             self.std = np.asarray(std)
         else:
             raise ValueError("[ERROR] WhiteNoise.__init__(): std must be specified as a float or list.")
-
         assert self.dim == len(self.std), "std shape should be the same as dim."
 
-    def apply(self, target, env):
+    def apply(self,
+              target,
+              env
+              ):
         noise = self.np_random.normal(0, self.std, size=self.dim)
         if self.mask is not None:
             noise *= self.mask
@@ -138,22 +205,36 @@ def apply(self, target, env):
 
 
 class BrownianNoise(Disturbance):
-    """Simple random walk noise."""
+    """Simple random walk noise.
+
+    """
 
     def __init__(self):
         super().__init__()
 
 
 class PeriodicNoise(Disturbance):
-    """Sinuisodal noise."""
+    """Sinuisodal noise.
+
+    """
 
-    def __init__(self, env, dim, mask=None, scale=1.0, frequency=1.0, **kwargs):
+    def __init__(self,
+                 env,
+                 dim,
+                 mask=None,
+                 scale=1.0,
+                 frequency=1.0,
+                 **kwargs
+                 ):
         super().__init__(env, dim)
-        # sin function parameters
+        # Sine function parameters.
         self.scale = scale
         self.frequency = frequency
 
-    def apply(self, target, env):
+    def apply(self,
+              target,
+              env
+              ):
         phase = self.np_random.uniform(low=-np.pi, high=np.pi, size=self.dim)
         t = env.step_counter
         noise = self.scale * self.np.sin(self.frequency * t + phase)
@@ -167,9 +248,15 @@ class StateDependentDisturbance(Disturbance):
     """Time varying and state varying, e.g. friction.
     
     Here to provide an explicit form, can also enable friction in simulator directly.
+
     """
 
-    def __init__(self, env, dim, mask=None, **kwargs):
+    def __init__(self,
+                 env,
+                 dim,
+                 mask=None,
+                 **kwargs
+                 ):
         super().__init__()
 
 
diff --git a/safe_control_gym/envs/env_wrappers/record_episode_statistics.py b/safe_control_gym/envs/env_wrappers/record_episode_statistics.py
index b80988d57..dbb8aa173 100644
--- a/safe_control_gym/envs/env_wrappers/record_episode_statistics.py
+++ b/safe_control_gym/envs/env_wrappers/record_episode_statistics.py
@@ -1,39 +1,48 @@
-from collections import deque
-from copy import deepcopy
 import time
-
 import gym
 import numpy as np
 
+from collections import deque
+from copy import deepcopy
+
 from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env import VecEnvWrapper
 
 
 class RecordEpisodeStatistics(gym.Wrapper):
     """ Keep track of episode length and returns per instantiated env 
-        adapted from https://github.com/openai/gym/blob/38a1f630dc9815a567aaf299ae5844c8f8b9a6fa/gym/wrappers/record_episode_statistics.py
+
+        Based on OpenAI's Gym wrapper record_episode_statistics.py
+
     """
 
-    def __init__(self, env, deque_size=None, **kwargs):
+    def __init__(self,
+                 env,
+                 deque_size=None,
+                 **kwargs
+                 ):
         super(RecordEpisodeStatistics, self).__init__(env)
         self.deque_size = deque_size
-
         self.t0 = time.time()
         self.episode_return = 0.0
         self.episode_length = 0
         self.return_queue = deque(maxlen=deque_size)
         self.length_queue = deque(maxlen=deque_size)
-
-        # other tracked stats
+        # Other tracked stats.
         self.episode_stats = {}
         self.accumulated_stats = {}
         self.queued_stats = {}
 
-    def add_tracker(self, name, init_value, mode="accumulate"):
+    def add_tracker(self,
+                    name,
+                    init_value,
+                    mode="accumulate"
+                    ):
         """Adds a specific stat to be tracked (accumulate|queue).
         
-        Different modes to track stats
-        - accumulate: rolling sum, e.g. total # of constraint violations during training.
-        - queue: finite, individual storage, e.g. returns, lengths, constraint costs.
+        Modes to track stats
+            * accumulate: rolling sum, e.g. total # of constraint violations during training.
+            * queue: finite, individual storage, e.g. returns, lengths, constraint costs.
+
         """
         self.episode_stats[name] = init_value
         if mode == "accumulate":
@@ -43,30 +52,33 @@ def add_tracker(self, name, init_value, mode="accumulate"):
         else:
             raise Exception("Tracker mode not implemented.")
 
-    def reset(self, **kwargs):
+    def reset(self,
+              **kwargs
+              ):
         self.episode_return = 0.0
         self.episode_length = 0
-        # reset other tracked stats
+        # Reset other tracked stats.
         for key in self.episode_stats:
             self.episode_stats[key] *= 0
         return self.env.reset(**kwargs)
 
-    def step(self, action):
+    def step(self,
+             action
+             ):
         observation, reward, done, info = self.env.step(action)
         self.episode_return += reward
         self.episode_length += 1
-        # add other tracked stats
+        # Add other tracked stats.
         for key in self.episode_stats:
             if key in info:
                 self.episode_stats[key] += info[key]
-
         if done:
             info['episode'] = {'r': self.episode_return, 'l': self.episode_length, 't': round(time.time() - self.t0, 6)}
             self.return_queue.append(self.episode_return)
             self.length_queue.append(self.episode_length)
             self.episode_return = 0.0
             self.episode_length = 0
-            # other tracked stats
+            # Other tracked stats.
             for key in self.episode_stats:
                 info['episode'][key] = deepcopy(self.episode_stats[key])
                 if key in self.accumulated_stats:
@@ -74,31 +86,40 @@ def step(self, action):
                 elif key in self.queued_stats:
                     self.queued_stats[key].append(deepcopy(self.episode_stats[key]))
                 self.episode_stats[key] *= 0
-
         return observation, reward, done, info
 
 
 class VecRecordEpisodeStatistics(VecEnvWrapper):
-    """ A vectorized wrapper that records eposodic statistics
-        e.g. episode lengths, returns, constraint violations 
+    """ A vectorized wrapper that records eposodic statistics.
+
+    E.g. episode lengths, returns, constraint violations.
+
     """
 
-    def __init__(self, venv, deque_size=None, **kwargs):
+    def __init__(self,
+                 venv,
+                 deque_size=None,
+                 **kwargs
+                 ):
         super(VecRecordEpisodeStatistics, self).__init__(venv)
         self.deque_size = deque_size
-
         self.episode_return = np.zeros(self.num_envs)
         self.episode_length = np.zeros(self.num_envs)
         self.return_queue = deque(maxlen=deque_size)
         self.length_queue = deque(maxlen=deque_size)
-
-        # other tracked stats
+        # Other tracked stats.
         self.episode_stats = {}
         self.accumulated_stats = {}
         self.queued_stats = {}
 
-    def add_tracker(self, name, init_value, mode="accumulate"):
-        """Adds a specific stat to be tracked (accumulated)."""
+    def add_tracker(self,
+                    name,
+                    init_value,
+                    mode="accumulate"
+                    ):
+        """Adds a specific stat to be tracked (accumulated).
+
+        """
         self.episode_stats[name] = [init_value for _ in range(self.num_envs)]
         if mode == "accumulate":
             self.accumulated_stats[name] = init_value
@@ -107,10 +128,12 @@ def add_tracker(self, name, init_value, mode="accumulate"):
         else:
             raise Exception("Tracker mode not implemented.")
 
-    def reset(self, **kwargs):
+    def reset(self,
+              **kwargs
+              ):
         self.episode_return = np.zeros(self.num_envs)
         self.episode_length = np.zeros(self.num_envs)
-        # reset other tracked stats
+        # Reset other tracked stats.
         for key in self.episode_stats:
             for i in range(self.num_envs):
                 self.episode_stats[key][i] *= 0
@@ -121,7 +144,7 @@ def step_wait(self):
         for i, (r, d) in enumerate(zip(reward, done)):
             self.episode_return[i] += r
             self.episode_length[i] += 1
-            # add other tracked stats
+            # Add other tracked stats.
             for key in self.episode_stats:
                 if d:
                     inf = info["n"][i]["terminal_info"]
@@ -129,14 +152,13 @@ def step_wait(self):
                     inf = info["n"][i]
                 if key in inf:
                     self.episode_stats[key][i] += inf[key]
-
             if d:
                 info["n"][i]['episode'] = {'r': self.episode_return[i], 'l': self.episode_length[i]}
                 self.return_queue.append(deepcopy(self.episode_return[i]))
                 self.length_queue.append(deepcopy(self.episode_length[i]))
                 self.episode_return[i] = 0
                 self.episode_length[i] = 0
-                # other tracked stats
+                # Other tracked stats.
                 for key in self.episode_stats:
                     info["n"][i]['episode'][key] = deepcopy(self.episode_stats[key][i])
                     if key in self.accumulated_stats:
diff --git a/safe_control_gym/envs/env_wrappers/vectorized_env/__init__.py b/safe_control_gym/envs/env_wrappers/vectorized_env/__init__.py
index 6c4e3b810..d4d01472f 100644
--- a/safe_control_gym/envs/env_wrappers/vectorized_env/__init__.py
+++ b/safe_control_gym/envs/env_wrappers/vectorized_env/__init__.py
@@ -1,5 +1,3 @@
-""" 
-"""
 import numpy as np
 import random
 import torch
@@ -8,40 +6,45 @@
 from safe_control_gym.envs.env_wrappers.vectorized_env.subproc_vec_env import SubprocVecEnv
 
 
-def make_env_fn(env_func, env_config, seed, rank):
-    """higher-order function for env init func 
+def make_env_fn(env_func,
+                env_config,
+                seed,
+                rank):
+    """Higher-order function for env init func.
 
     Args:
-        env_func (function): partial function that can accept args
-        env_config (dict): env init args 
-        seed (int): base seed 
-        rank (int): unique seed for each parallel env 
+        env_func (function): partial function that can accept args.
+        env_config (dict): env init args.
+        seed (int): base seed.
+        rank (int): unique seed for each parallel env.
         
     Returns:
-        _thunk: env-constructing func 
+        _thunk: env-constructing func.
+
     """
 
     def _thunk():
-        # do not set seed i if 0 (e.g. for evaluation)
+        # Do not set seed i if 0 (e.g. for evaluation).
         if seed > 0:
             e_seed = seed + rank
-
             random.seed(e_seed)
             np.random.seed(e_seed)
             torch.manual_seed(e_seed)
-
             env = env_func(**env_config)
             env.seed(e_seed)
             env.action_space.seed(e_seed)
         else:
             env = env_func(**env_config)
         return env
-
     return _thunk
 
 
-def make_vec_envs(env_func, env_configs=None, batch_size=1, n_processes=1, seed=0):
-    """Produce envs with parallel rollout abilities 
+def make_vec_envs(env_func,
+                  env_configs=None,
+                  batch_size=1,
+                  n_processes=1,
+                  seed=0):
+    """Produce envs with parallel rollout abilities. 
 
     Args:
         env_func (function): partial function that can accept args.
@@ -51,7 +54,8 @@ def make_vec_envs(env_func, env_configs=None, batch_size=1, n_processes=1, seed=
         seed (int): base seed for the run. 
 
     Returns:
-        VecEnv: (wrapped) parallel envs 
+        VecEnv: (wrapped) parallel envs.
+
     """
     if env_configs is None:
         env_configs = [{}] * batch_size
@@ -59,5 +63,5 @@ def make_vec_envs(env_func, env_configs=None, batch_size=1, n_processes=1, seed=
     if n_processes > 1:
         return SubprocVecEnv(env_fns, n_workers=n_processes)
     else:
-        # e.g. can use in evaluation (with seed -1)
-        return DummyVecEnv(env_fns)
\ No newline at end of file
+        # E.g. can use in evaluation (with seed -1).
+        return DummyVecEnv(env_fns)
diff --git a/safe_control_gym/envs/env_wrappers/vectorized_env/dummy_vec_env.py b/safe_control_gym/envs/env_wrappers/vectorized_env/dummy_vec_env.py
index dccefd714..51727f37d 100644
--- a/safe_control_gym/envs/env_wrappers/vectorized_env/dummy_vec_env.py
+++ b/safe_control_gym/envs/env_wrappers/vectorized_env/dummy_vec_env.py
@@ -1,32 +1,40 @@
 import copy
 import numpy as np
 
-from safe_control_gym.utils.utils import get_random_state, set_random_state
 from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env import VecEnv
 from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env_utils import _flatten_list, _flatten_obs
+from safe_control_gym.utils.utils import get_random_state, set_random_state
 
 
 class DummyVecEnv(VecEnv):
-    """Single thread env (allow multiple envs sequentially) 
+    """Single thread env (allow multiple envs sequentially).
     
-    original version from baseline is bad  
-    adapted from https://github.com/ShangtongZhang/DeepRL/blob/master/deep_rl/component/envs.py
     """
 
-    def __init__(self, env_fns):
+    def __init__(self,
+                 env_fns
+                 ):
+        """
+        
+        """
         self.envs = [fn() for fn in env_fns]
         env = self.envs[0]
-
-        VecEnv.__init__(self, len(env_fns), env.observation_space,
-                        env.action_space)
-
+        VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
         self.actions = None
         self.closed = False
 
-    def step_async(self, actions):
+    def step_async(self,
+                   actions
+                   ):
+        """
+        
+        """
         self.actions = actions
 
     def step_wait(self):
+        """
+        
+        """
         results = []
         for i in range(self.num_envs):
             obs, rew, done, info = self.envs[i].step(self.actions[i])
@@ -37,11 +45,13 @@ def step_wait(self):
                 info["terminal_observation"] = end_obs
                 info["terminal_info"] = end_info
             results.append([obs, rew, done, info])
-
         obs, rews, dones, infos = zip(*results)
         return _flatten_obs(obs), np.array(rews), np.array(dones), {"n": infos}
 
     def reset(self):
+        """
+        
+        """
         results = []
         for env in self.envs:
             results.append(env.reset())
@@ -49,6 +59,9 @@ def reset(self):
         return _flatten_obs(obs), {"n": infos}
 
     def close(self):
+        """
+        
+        """
         for env in self.envs:
             env.close()
         if self.viewer is not None:
@@ -56,27 +69,54 @@ def close(self):
         self.closed = True
 
     def get_images(self):
+        """
+        
+        """
         return [env.render(mode='rgb_array') for env in self.envs]
 
-    def render(self, mode='human'):
+    def render(self,
+               mode='human'
+               ):
+        """
+        
+        """
         if self.num_envs == 1:
             return self.envs[0].render(mode=mode)
         else:
             return super().render(mode=mode)
 
     def get_env_random_state(self):
+        """
+        
+        """
         return [get_random_state()]
 
-    def set_env_random_state(self, worker_random_states):
+    def set_env_random_state(self,
+                             worker_random_states
+                             ):
+        """
+
+        """
         set_random_state(worker_random_states[0])
 
-    def get_attr(self, attr_name, indices=None):
-        """Return attribute from vectorized environment (see base class)."""
+    def get_attr(self,
+                 attr_name,
+                 indices=None
+                 ):
+        """Return attribute from vectorized environment (see base class).
+
+        """
         target_envs = self._get_target_envs(indices)
         return [getattr(env_i, attr_name) for env_i in target_envs]
 
-    def set_attr(self, attr_name, values, indices=None):
-        """Set attribute inside vectorized environments (see base class)."""
+    def set_attr(self,
+                 attr_name,
+                 values,
+                 indices=None
+                 ):
+        """Set attribute inside vectorized environments (see base class).
+
+        """
         target_envs = self._get_target_envs(indices)
         assert len(target_envs) == len(values)
         for env_i, val_i in zip(target_envs, values):
@@ -87,7 +127,9 @@ def env_method(self,
                    method_args=None,
                    method_kwargs=None,
                    indices=None):
-        """Call instance methods of vectorized environments."""
+        """Call instance methods of vectorized environments.
+
+        """
         target_envs = self._get_target_envs(indices)
         if method_args is None:
             method_args = [[]] * len(target_envs)
@@ -100,7 +142,12 @@ def env_method(self,
             in zip(target_envs, method_args, method_kwargs)
         ]
 
-    def _get_target_envs(self, indices):
+    def _get_target_envs(self,
+                         indices
+                         ):
+        """
+
+        """
         assert indices is None or sorted(
             indices) == indices, "Indices must be ordered"
         indices = self._get_indices(indices)
diff --git a/safe_control_gym/envs/env_wrappers/vectorized_env/subproc_vec_env.py b/safe_control_gym/envs/env_wrappers/vectorized_env/subproc_vec_env.py
index 88b85c021..ba9f1d755 100644
--- a/safe_control_gym/envs/env_wrappers/vectorized_env/subproc_vec_env.py
+++ b/safe_control_gym/envs/env_wrappers/vectorized_env/subproc_vec_env.py
@@ -1,3 +1,10 @@
+"""Subprocess vectorized environments.
+
+See also:
+    * https://github.com/openai/baselines/blob/master/baselines/common/vec_env/subproc_vec_env.py
+    * https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/subproc_vec_env.py
+
+"""
 import copy
 import numpy as np
 import multiprocessing as mp
@@ -8,24 +15,18 @@
 
 
 class SubprocVecEnv(VecEnv):
-    """ Multiprocess envs  
-    
-    references
-    - https://github.com/openai/baselines/blob/master/baselines/common/vec_env/subproc_vec_env.py
-    - https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/subproc_vec_env.py
+    """Multiprocess envs.
+
     """
 
     def __init__(self, env_fns, spaces=None, context="spawn", n_workers=1):
         self.waiting = False
         self.closed = False
-
         nenvs = len(env_fns)
         self.n_workers = n_workers
         assert nenvs % n_workers == 0, "Number of envs must be divisible by number of workers to run in series"
         env_fns = np.array_split(env_fns, self.n_workers)
-
-        # context is necessary for multiprocessing with cuda
-        # see https://pytorch.org/docs/stable/notes/multiprocessing.html
+        # Context is necessary for multiprocessing with CUDA, see pytorch.org/docs/stable/notes/multiprocessing.html
         ctx = mp.get_context(context)
         self.remotes, self.work_remotes = zip(
             *[ctx.Pipe() for _ in range(self.n_workers)])
@@ -36,16 +37,14 @@ def __init__(self, env_fns, spaces=None, context="spawn", n_workers=1):
                  env_fn) in zip(self.work_remotes, self.remotes, env_fns)
         ]
         for p in self.ps:
-            p.daemon = True  # if the main process crashes, we should not cause things to hang
+            p.daemon = True  # If the main process crashes, we should not cause things to hang.
             with clear_mpi_env_vars():
                 p.start()
         for remote in self.work_remotes:
             remote.close()
-
         self.remotes[0].send(('get_spaces_spec', None))
         observation_space, action_space = self.remotes[0].recv().x
         self.viewer = None
-
         VecEnv.__init__(self, nenvs, observation_space, action_space)
 
     def step_async(self, actions):
@@ -73,7 +72,9 @@ def reset(self):
         return _flatten_obs(obs), {"n": infos}
 
     def get_images(self):
-        """Called by parent `render` to support tiling images"""
+        """Called by parent `render` to support tiling images.
+
+        """
         self._assert_not_closed()
         for pipe in self.remotes:
             pipe.send(('render', None))
@@ -110,14 +111,18 @@ def set_env_random_state(self, worker_random_states):
         res = [remote.recv() for remote in self.remotes]
 
     def get_attr(self, attr_name, indices=None):
-        """Return attribute from vectorized environment (see base class)."""
+        """Return attribute from vectorized environment (see base class).
+
+        """
         target_remotes, remote_env_indices = self._get_target_envs(indices)
         for remote, env_indices in zip(target_remotes, remote_env_indices):
             remote.send(("get_attr", (env_indices, attr_name)))
         return _flatten_list([remote.recv() for remote in target_remotes])
 
     def set_attr(self, attr_name, values, indices=None):
-        """Set attribute inside vectorized environments (see base class)."""
+        """Set attribute inside vectorized environments (see base class).
+
+        """
         target_remotes, remote_env_indices, splits = self._get_target_envs(
             indices)
         value_splits = []
@@ -137,9 +142,10 @@ def env_method(self,
                    method_args=None,
                    method_kwargs=None,
                    indices=None):
-        """Call instance methods of vectorized environments."""
-        target_remotes, remote_env_indices, splits = self._get_target_envs(
-            indices)
+        """Call instance methods of vectorized environments.
+
+        """
+        target_remotes, remote_env_indices, splits = self._get_target_envs(indices)
         method_arg_splits, method_kwarg_splits = [], []
         for i in range(len(splits) - 1):
             start, end = splits[i], splits[i + 1]
@@ -161,6 +167,7 @@ def env_method(self,
 
     def _get_target_envs(self, indices):
         """
+
         Example:
             n_workers: 3
             current envs: [0,1,2,3,4,5]
@@ -169,28 +176,26 @@ def _get_target_envs(self, indices):
 
             remote_indices: [0,0,1,1] -> [0,1]
             splits: [0,2] -> [0,2,4]
-            remote_env_indices: [1,1,0,1] -> [1,1], [0,1] 
+            remote_env_indices: [1,1,0,1] -> [1,1], [0,1]
+
         """
         assert indices is None or sorted(
             indices) == indices, "Indices must be ordered"
         indices = self._get_indices(indices)
-
         remote_indices = [idx // self.n_workers for idx in indices]
         remote_env_indices = [idx % self.n_workers for idx in indices]
-
-        remote_indices, splits = np.unique(np.array(remote_indices),
-                                           return_index=True)
+        remote_indices, splits = np.unique(np.array(remote_indices), return_index=True)
         target_remotes = [self.remotes[idx] for idx in remote_indices]
         remote_env_indices = np.split(np.array(remote_env_indices), splits[1:])
         remote_env_indices = remote_env_indices.tolist()
-
         splits = np.append(splits, [len(indices)])
         return target_remotes, remote_env_indices, splits
 
 
 def worker(remote, parent_remote, env_fn_wrappers):
-    """worker func to execute vec_env commands"""
+    """Worker func to execute vec_env commands.
 
+    """
     def step_env(env, action):
         ob, reward, done, info = env.step(action)
         if done:
@@ -200,13 +205,12 @@ def step_env(env, action):
             info["terminal_observation"] = end_obs
             info["terminal_info"] = end_info
         return ob, reward, done, info
-
     parent_remote.close()
     envs = [env_fn_wrapper() for env_fn_wrapper in env_fn_wrappers.x]
     try:
         while True:
             cmd, data = remote.recv()
-            # branch out for requests
+            # Branch out for requests.
             if cmd == 'step':
                 remote.send(
                     [step_env(env, action) for env, action in zip(envs, data)])
@@ -225,7 +229,7 @@ def step_env(env, action):
                 remote.send(CloudpickleWrapper(get_random_state()))
             elif cmd == "set_random_state":
                 set_random_state(data)
-                # just a placeholder to return something
+                # Placeholder for the return.
                 remote.send(True)
             elif cmd == "get_attr":
                 env_indices, attr_name = data
diff --git a/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env.py b/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env.py
index 3125329e0..449ea0658 100644
--- a/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env.py
+++ b/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env.py
@@ -1,12 +1,13 @@
-"""Adapted from OpenAI Baselines code 
+"""Adapted from OpenAI Baselines. 
 
-references
-- https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_env.py
-- https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/base_vec_env.py
+See also:
+    * https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_env.py
+    * https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/base_vec_env.py
 
 """
 import os
 import numpy as np
+
 from abc import ABC, abstractmethod
 
 from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env_utils import tile_images
@@ -15,35 +16,42 @@
 class VecEnv(ABC):
     """An abstract asynchronous, vectorized environment.
     
-    Used to batch data from multiple copies of an environment, 
-    so that each observation becomes an batch of observations, 
-    and expected action is a batch of actions to be applied per-environment.
+    Used to batch data from multiple copies of an environment, so that each observation becomes a
+    batch of observations, and expected action is a batch of actions to be applied per-environment.
+
     """
     closed = False
     viewer = None
     metadata = {'render.modes': ['human', 'rgb_array']}
 
-    def __init__(self, num_envs, observation_space, action_space):
+    def __init__(self,
+                 num_envs,
+                 observation_space,
+                 action_space
+                 ):
         self.num_envs = num_envs
         self.observation_space = observation_space
         self.action_space = action_space
 
     @abstractmethod
     def reset(self):
-        """Reset all the environments and return an array of observations, 
-        or a dict of observation arrays.
+        """Reset all the environments and return an array of observations, or a dict of observation arrays.
+
+        If step_async is still doing work, that work will be cancelled and step_wait() should not
+        be called until step_async() is invoked again.
 
-        If step_async is still doing work, that work will be cancelled 
-        and step_wait() should not be called until step_async() is invoked again.
         """
         pass
 
     @abstractmethod
-    def step_async(self, actions):
+    def step_async(self,
+                   actions
+                   ):
         """Tell all the environments to start taking a step with the given actions.
 
         Call step_wait() to get the results of the step.
         You should not call this if a step_async run is already pending.
+
         """
         pass
 
@@ -52,15 +60,18 @@ def step_wait(self):
         """Wait for the step taken with step_async().
 
         Returns (obs, rews, dones, infos):
-        - obs: an array of observations, or a dict of arrays of observations.
-        - rews: an array of rewards
-        - dones: an array of "episode done" booleans
-        - infos: a sequence of info objects
+            - obs: an array of observations, or a dict of arrays of observations.
+            - rews: an array of rewards
+            - dones: an array of "episode done" booleans
+            - infos: a sequence of info objects
+
         """
         pass
 
     def close_extras(self):
-        """Clean up the  extra resources. Only runs when not self.closed."""
+        """Clean up the  extra resources. Only runs when not self.closed.
+
+        """
         pass
 
     def close(self):
@@ -71,13 +82,21 @@ def close(self):
         self.close_extras()
         self.closed = True
 
-    def step(self, actions):
-        """Step the environments synchronously."""
+    def step(self,
+             actions
+             ):
+        """Step the environments synchronously.
+
+        """
         self.step_async(actions)
         return self.step_wait()
 
-    def render(self, mode='human'):
-        """Display environment via a viewer"""
+    def render(self,
+               mode='human'
+               ):
+        """Display environment via a viewer.
+
+        """
         imgs = self.get_images()
         bigimg = tile_images(imgs)
         if mode == 'human':
@@ -89,7 +108,9 @@ def render(self, mode='human'):
             raise NotImplementedError
 
     def get_images(self):
-        """Return RGB images from each environment"""
+        """Return RGB images from each environment.
+
+        """
         raise NotImplementedError
 
     @property
@@ -107,12 +128,16 @@ def get_viewer(self):
 
     @abstractmethod
     def get_attr(self, attr_name, indices=None):
-        """Return attribute from vectorized environment."""
+        """Return attribute from vectorized environment.
+
+        """
         pass
 
     @abstractmethod
     def set_attr(self, attr_name, values, indices=None):
-        """Set attribute inside vectorized environments."""
+        """Set attribute inside vectorized environments.
+
+        """
         pass
 
     @abstractmethod
@@ -121,10 +146,14 @@ def env_method(self,
                    method_args=None,
                    method_kwargs=None,
                    indices=None):
-        """Call instance methods of vectorized environments."""
+        """Call instance methods of vectorized environments.
+
+        """
         raise NotImplementedError()
 
-    def _get_indices(self, indices):
+    def _get_indices(self,
+                     indices
+                     ):
         """Convert a flexibly-typed reference to environment indices to an implied list of indices."""
         if indices is None:
             indices = range(self.num_envs)
@@ -136,11 +165,14 @@ def _get_indices(self, indices):
 class VecEnvWrapper(VecEnv):
     """An environment wrapper that applies to an entire batch of environments at once."""
 
-    def __init__(self, venv, observation_space=None, action_space=None):
+    def __init__(self,
+                 venv,
+                 observation_space=None,
+                 action_space=None
+                 ):
         self.venv = venv
         super().__init__(num_envs=venv.num_envs,
-                         observation_space=observation_space or
-                         venv.observation_space,
+                         observation_space=observation_space or venv.observation_space,
                          action_space=action_space or venv.action_space)
 
     def step_async(self, actions):
@@ -163,16 +195,25 @@ def render(self, mode='human'):
     def get_images(self):
         return self.venv.get_images()
 
-    def __getattr__(self, name):
+    def __getattr__(self,
+                    name
+                    ):
         if name.startswith('_'):
             raise AttributeError(
                 "attempted to get missing private attribute '{}'".format(name))
         return getattr(self.venv, name)
 
-    def get_attr(self, attr_name, indices=None):
+    def get_attr(self,
+                 attr_name,
+                 indices=None
+                 ):
         return self.venv.get_attr(attr_name, indices)
 
-    def set_attr(self, attr_name, values, indices=None):
+    def set_attr(self,
+                 attr_name,
+                 values,
+                 indices=None
+                 ):
         return self.venv.set_attr(attr_name, values, indices)
 
     def env_method(self,
diff --git a/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env_utils.py b/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env_utils.py
index e719476c6..a59a663b1 100644
--- a/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env_utils.py
+++ b/safe_control_gym/envs/env_wrappers/vectorized_env/vec_env_utils.py
@@ -4,8 +4,8 @@
 
 
 class CloudpickleWrapper(object):
-    """ Uses cloudpickle to serialize contents 
-    (otherwise multiprocessing tries to use pickle)
+    """Uses cloudpickle to serialize contents and stop multiprocessing from using pickle.
+
     """
 
     def __init__(self, x):
@@ -22,15 +22,14 @@ def __setstate__(self, ob):
 
 @contextlib.contextmanager
 def clear_mpi_env_vars():
-    """
-    from mpi4py import MPI will call MPI_Init by default.  
+    """From mpi4py import MPI will call MPI_Init by default.  
     
-    If the child process has MPI environment variables, 
-    MPI will think that the child process is an MPI process 
-    just like the parent and do bad things such as hang.
+    If the child process has MPI environment variables, MPI will think that the child process
+    is an MPI process just like the parent and do bad things such as hang.
     
-    This context manager is a hacky way to clear those environment variables 
-    temporarily such as when we are starting multiprocessing Processes.
+    This context manager is a hacky way to clear those environment variables temporarily such
+    as when we are starting multiprocessing Processes.
+
     """
     removed_environment = {}
     for k, v in list(os.environ.items()):
@@ -45,16 +44,17 @@ def clear_mpi_env_vars():
 
 
 def tile_images(img_nhwc):
-    """Tile N images into one big PxQ image
+    """Tile N images into one big PxQ image.
 
     (P,Q) are chosen to be as close as possible, and if N is square, then P=Q.
 
     Args: 
-        img_nhwc: list or array of images, ndim=4 once turned into array 
-            n = batch index, h = height, w = width, c = channel
+        img_nhwc: list or array of images, ndim=4 once turned into array,
+                  n = batch index, h = height, w = width, c = channel.
 
     Returns:
-        img_Hh_Ww_c: ndarray with ndim=3
+        img_Hh_Ww_c: ndarray with ndim=3.
+
     """
     img_nhwc = np.asarray(img_nhwc)
     N, h, w, c = img_nhwc.shape
@@ -69,6 +69,9 @@ def tile_images(img_nhwc):
 
 
 def _flatten_obs(obs):
+    """
+
+    """
     assert isinstance(obs, (list, tuple))
     assert len(obs) > 0
 
@@ -80,11 +83,12 @@ def _flatten_obs(obs):
 
 
 def _unflatten_obs(obs):
+    """
+    
+    """
     assert isinstance(obs, (np.ndarray, dict))
-
     def split_batch(data):
         return [d[0] for d in np.split(data, len(data))]
-
     if isinstance(obs, dict):
         keys = list(obs.keys())
         unflat_obs = [split_batch(obs[k]) for k in keys]
@@ -96,8 +100,10 @@ def split_batch(data):
 
 
 def _flatten_list(l):
+    """
+    
+    """
     assert isinstance(l, (list, tuple))
     assert len(l) > 0
     assert all([len(l_) > 0 for l_ in l])
-
     return [l__ for l_ in l for l__ in l_]
diff --git a/safe_control_gym/envs/gym_control/cartpole.py b/safe_control_gym/envs/gym_control/cartpole.py
index a81a2bc0c..bfc22f82d 100644
--- a/safe_control_gym/envs/gym_control/cartpole.py
+++ b/safe_control_gym/envs/gym_control/cartpole.py
@@ -3,12 +3,9 @@
 Classic cart-pole system implemented by Rich Sutton et al.
     * http://incompleteideas.net/sutton/book/code/pole.c
 
-Reference implementations:
-    * https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
-    * https://github.com/bulletphysics/bullet3/blob/master/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
-
-Todo:
-    * Should environment record all past values up to the last reset?
+Also see:
+    * github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
+    * github.com/bulletphysics/bullet3/blob/master/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py
 
 """
 import os
@@ -23,8 +20,7 @@
 from safe_control_gym.envs.benchmark_env import BenchmarkEnv, Cost, Task
 from safe_control_gym.envs.disturbances import DisturbanceList, DISTURBANCE_TYPES
 from safe_control_gym.envs.constraints import create_ConstraintList_from_dict, GENERAL_CONSTRAINTS
-from safe_control_gym.envs.gym_control.cartpole_constraints import CartPoleStateConstraint, CartPoleInputConstraint, \
-    CartPoleSymmetricStateConstraint, CartPoleBoundConstraint
+from safe_control_gym.envs.gym_control.cartpole_constraints import CartPoleStateConstraint, CartPoleInputConstraint, CartPoleSymmetricStateConstraint, CartPoleBoundConstraint
 from safe_control_gym.math_and_models.symbolic_systems import SymbolicModel
 
 
@@ -78,16 +74,16 @@ class CartPole(BenchmarkEnv):
     DISTURBANCE_MODES = {"observation": {"dim": 4}, "action": {"dim": 1}, "dynamics": {"dim": 2}}
 
     INERTIAL_PROP_RAND_INFO = {
-        "pole_length": {  # 1
+        "pole_length": {  # Nominal: 1
             "distrib": "choice",
             "args": [[1, 5, 10]]
         },
-        "cart_mass": {  # 1
+        "cart_mass": {  # Nominal: 1
             "distrib": "uniform",
             "low": 0.5,
             "high": 1.5
         },
-        "pole_mass": {  # 0.1
+        "pole_mass": {  # Nominal: 0.1
             "distrib": "uniform",
             "low": 0.05,
             "high": 0.15
@@ -198,12 +194,10 @@ def __init__(self,
             raise ValueError("[ERROR] in CartPole.__init__(), pyb_freq is not divisible by env_freq.")
         self.CTRL_TIMESTEP = 1. / self.CTRL_FREQ
         self.PYB_TIMESTEP = 1. / self.PYB_FREQ
-
         # Set GUI and rendering constants.
         self.GUI = gui
         self.RENDER_HEIGHT = int(200)
         self.RENDER_WIDTH = int(320)
-
         # Set the initial state.
         if init_state is None:
             self.INIT_X, self.INIT_X_DOT, self.INIT_THETA, self.INIT_THETA_DOT = np.zeros(4)
@@ -220,11 +214,9 @@ def __init__(self,
         self.RANDOMIZED_INIT = randomized_init
         if init_state_randomization_info is not None:
             self.INIT_STATE_RAND_INFO = init_state_randomization_info
-
         # Get physical properties from URDF (as default parameters).
         self.GRAVITY_ACC = 9.8
         EFFECTIVE_POLE_LENGTH, POLE_MASS, CART_MASS = self._parse_urdf_parameters(self.URDF_PATH)
-
         # Store prior parameters.
         if prior_prop is None:
             self.PRIOR_EFFECTIVE_POLE_LENGTH = EFFECTIVE_POLE_LENGTH
@@ -236,7 +228,6 @@ def __init__(self,
             self.PRIOR_CART_MASS = prior_prop.get("cart_mass", CART_MASS)
         else:
             raise ValueError("[ERROR] in CartPole.__init__(), prior_prop incorrect format.")
-
         # Store ground truth parameters.
         if inertial_prop is None:
             self.EFFECTIVE_POLE_LENGTH = EFFECTIVE_POLE_LENGTH
@@ -252,20 +243,16 @@ def __init__(self,
         self.RANDOMIZED_INERTIAL_PROP = randomized_inertial_prop
         if inertial_prop_randomization_info is not None:
             self.INERTIAL_PROP_RAND_INFO = inertial_prop_randomization_info
-
         # Store disturbance info.
         self.DISTURBANCES = disturbances
         self.adversary_disturbance = adversary_disturbance
         self.adversary_disturbance_scale = adversary_disturbance_scale
-
         # Store constraint info
         self.CONSTRAINTS = constraints
         self.DONE_ON_VIOLATION = done_on_violation
         self.use_constraint_penalty = use_constraint_penalty
         self.constraint_penalty = constraint_penalty
-
         self.VERBOSE = verbose
-
         # Set up action and observation space.
         self._set_action_space()
         self._set_observation_space()
@@ -277,10 +264,8 @@ def __init__(self,
             self.PYB_CLIENT = p.connect(p.DIRECT)
         # disable urdf caching for randomization via reloading urdf
         p.setPhysicsEngineParameter(enableFileCaching=0)
-
         # Call BenchmarkEnv constructor.
         super().__init__(seed=seed, output_dir=output_dir, info_in_reset=info_in_reset, episode_len_sec=episode_len_sec, cost=cost)
-
         # Create X_GOAL and U_GOAL references for the assigned task.
         self.TASK = Task(task)
         if task_info is not None:
@@ -301,7 +286,7 @@ def __init__(self,
                                               )
 
             self.X_GOAL = np.vstack([
-                POS_REF[:, 0],  # TODO Add offset?
+                POS_REF[:, 0],  # TODO Add offset.
                 VEL_REF[:, 0],
                 np.zeros(POS_REF.shape[0]),
                 np.zeros(VEL_REF.shape[0])
@@ -309,7 +294,9 @@ def __init__(self,
 
     @property
     def control_step_counter(self):
-        """Smiliar to `step_counter` but on control frequency."""
+        """Smiliar to `step_counter` but on control frequency.
+
+        """
         return self.step_counter // (self.PYB_FREQ // self.CTRL_FREQ)
 
     def step(self, action):
@@ -363,40 +350,33 @@ def reset(self):
         self.current_preprocessed_action = None
         if self.adversary_disturbance is not None:
             self.adv_action = None
-
         # PyBullet simulation reset.
         p.resetSimulation(physicsClientId=self.PYB_CLIENT)
         p.setGravity(0, 0, -self.GRAVITY_ACC, physicsClientId=self.PYB_CLIENT)
         p.setTimeStep(self.PYB_TIMESTEP, physicsClientId=self.PYB_CLIENT)
         p.setRealTimeSimulation(0, physicsClientId=self.PYB_CLIENT)
-
         # Choose randomized or deterministic inertial properties.
         prop_values = {"pole_length": self.EFFECTIVE_POLE_LENGTH, "cart_mass": self.CART_MASS, "pole_mass": self.POLE_MASS}
         if self.RANDOMIZED_INERTIAL_PROP:
             prop_values = self._randomize_values_by_info(prop_values, self.INERTIAL_PROP_RAND_INFO)
             if any(phy_quantity < 0 for phy_quantity in prop_values.values()):
                 raise ValueError("[ERROR] in CartPole.reset(), negative randomized inertial properties.")
-
         self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH = prop_values["pole_length"]
         self.OVERRIDDEN_CART_MASS = prop_values["cart_mass"]
         self.OVERRIDDEN_POLE_MASS = prop_values["pole_mass"]
-
-        # refer to `slender rod` in https://en.wikipedia.org/wiki/List_of_moments_of_inertia
+        # See `slender rod`, https://en.wikipedia.org/wiki/List_of_moments_of_inertia.
         OVERRIDDEN_POLE_INERTIA = (1 / 12) * self.OVERRIDDEN_POLE_MASS * (2 * self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH)**2
-
         # Load the cartpole with new urdf.
         override_urdf_tree = self._create_urdf(self.URDF_PATH, length=self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH, inertia=OVERRIDDEN_POLE_INERTIA)
         self.override_path = os.path.join(self.output_dir, "pid-{}_id-{}_cartpole.urdf".format(os.getpid(), self.id))
         override_urdf_tree.write(self.override_path)
-
         self.CARTPOLE_ID = p.loadURDF(
             self.override_path,
             basePosition=[0, 0, 0],
             # flags = p.URDF_USE_INERTIA_FROM_FILE,
             physicsClientId=self.PYB_CLIENT)
-        # remove cache file after loading into pybullet
+        # Remove cache file after loading it into PyBullet.
         os.remove(self.override_path)
-
         # Cartpole settings.
         for i in [-1, 0, 1]:  # Slider, cart, and pole.
             p.changeDynamics(self.CARTPOLE_ID, linkIndex=i, linearDamping=0, angularDamping=0, physicsClientId=self.PYB_CLIENT)
@@ -413,17 +393,14 @@ def reset(self):
             linkIndex=1,  # Pole.
             mass=self.OVERRIDDEN_POLE_MASS,
             physicsClientId=self.PYB_CLIENT)
-
         # Randomize initial state.
         init_values = {"init_x": self.INIT_X, "init_x_dot": self.INIT_X_DOT, "init_theta": self.INIT_THETA, "init_theta_dot": self.INIT_THETA_DOT}
         if self.RANDOMIZED_INIT:
             init_values = self._randomize_values_by_info(init_values, self.INIT_STATE_RAND_INFO)
-
         OVERRIDDEN_INIT_X = init_values["init_x"]
         OVERRIDDEN_INIT_X_DOT = init_values["init_x_dot"]
         OVERRIDDEN_INIT_THETA = init_values["init_theta"]
         OVERRIDDEN_INIT_THETA_DOT = init_values["init_theta_dot"]
-
         p.resetJointState(
             self.CARTPOLE_ID,
             jointIndex=0,  # Slider-to-cart joint.
@@ -436,15 +413,12 @@ def reset(self):
             targetValue=OVERRIDDEN_INIT_THETA,
             targetVelocity=OVERRIDDEN_INIT_THETA_DOT,
             physicsClientId=self.PYB_CLIENT)
-
         # Compute state (x, x_dot, theta, theta_dot).
         self.state = np.hstack(
             (p.getJointState(self.CARTPOLE_ID, jointIndex=0,
                              physicsClientId=self.PYB_CLIENT)[0:2], p.getJointState(self.CARTPOLE_ID, jointIndex=1, physicsClientId=self.PYB_CLIENT)[0:2]))
-
         # Debug visualization if GUI enabled
         self.line = None
-
         # Return either an observation and dictionary or just the observation.
         if self.INFO_IN_RESET:
             return self._get_observation(), self._get_reset_info()
@@ -483,20 +457,26 @@ def render(self, mode='human'):
         return np.reshape(rgb, (h, w, 4))
 
     def close(self):
-        """Clean up the environment and PyBullet connection."""
+        """Clean up the environment and PyBullet connection.
+
+        """
         if self.PYB_CLIENT >= 0:
             p.disconnect(physicsClientId=self.PYB_CLIENT)
         self.PYB_CLIENT = -1
 
     def set_adversary_control(self, action):
-        """Sets disturbance by an adversary controller, called before (each) step()."""
+        """Sets disturbance by an adversary controller, called before (each) step().
+
+        """
         if self.adversary_disturbance is not None:
             clipped_adv_action = np.clip(action, self.adversary_action_space.low, self.adversary_action_space.high)
             self.adv_action = clipped_adv_action * self.adversary_disturbance_scale
 
     def _setup_disturbances(self):
-        """Sets up scaling and actions space for an adversarial disturbance."""
-        # default no passive disturbance
+        """Sets up scaling and actions space for an adversarial disturbance.
+
+        """
+        # Default: no passive disturbances.
         self.disturbances = {}
 
         if self.DISTURBANCES is not None:
@@ -504,44 +484,41 @@ def _setup_disturbances(self):
                 assert mode in self.DISTURBANCE_MODES, "[ERROR] in Cartpole._setup_disturbances(), disturbance mode not available."
                 disturb_list = []
                 shared_args = self.DISTURBANCE_MODES[mode]
-
-                # each disturbance for this mode
+                # Each disturbance for the mode.
                 for name, cfg in disturbs.items():
                     assert name in DISTURBANCE_TYPES, "[ERROR] in Cartpole._setup_disturbances(), disturbance type not available."
                     disturb_cls = DISTURBANCE_TYPES[name]
                     disturb = disturb_cls(self, **shared_args, **cfg)
                     disturb_list.append(disturb)
-
-                # combine as one for this mode
+                # Combine as one for the mode.
                 self.disturbances[mode] = DisturbanceList(disturb_list)
-
-        # adversary disturbance (set from outside of env, non-passive)
+        # Adversary disturbance (set from outside of env, active/non-passive).
         if self.adversary_disturbance is not None:
             assert self.adversary_disturbance in self.DISTURBANCE_MODES, "[ERROR] in Cartpole._setup_disturbances()"
-
             shared_args = self.DISTURBANCE_MODES[self.adversary_disturbance]
             dim = shared_args["dim"]
-            # TODO: symmetric space can be problematic for obs ?
             self.adversary_action_space = spaces.Box(low=-1, high=1, shape=(dim,))
-            # adversary obs same as protagonist
+            # Adversary obs are the same as those of the protagonist.
             self.adversary_observation_space = self.observation_space
 
     def _setup_constraints(self):
-        """Sets up a list (ConstraintList) of constraints."""
+        """Sets up a list (ConstraintList) of constraints.
+
+        """
         self.constraints = None
         self.num_constraints = 0
-
         if self.CONSTRAINTS is not None:
             self.constraints = create_ConstraintList_from_dict(self.CONSTRAINTS, self.AVAILABLE_CONSTRAINTS, self)
             self.num_constraints = self.constraints.num_constraints
 
     def _setup_symbolic(self):
-        """Creates symbolic (CasADi) models for dynamics, observation, and cost."""
+        """Creates symbolic (CasADi) models for dynamics, observation, and cost.
+
+        """
         l, m, M = self.PRIOR_EFFECTIVE_POLE_LENGTH, self.PRIOR_POLE_MASS, self.PRIOR_CART_MASS
         Mm, ml = m + M, m * l
         g = self.GRAVITY_ACC
         dt = self.CTRL_TIMESTEP
-
         # Input variables.
         x = cs.MX.sym('x')
         x_dot = cs.MX.sym('x_dot')
@@ -551,42 +528,40 @@ def _setup_symbolic(self):
         U = cs.MX.sym('U')
         nx = 4
         nu = 1
-
         # Dynamics.
         temp_factor = (U + ml * theta_dot**2 * cs.sin(theta)) / Mm
         theta_dot_dot = ((g * cs.sin(theta) - cs.cos(theta) * temp_factor) / (l * (4.0 / 3.0 - m * cs.cos(theta)**2 / Mm)))
         X_dot = cs.vertcat(x_dot, temp_factor - ml * theta_dot_dot * cs.cos(theta) / Mm, theta_dot, theta_dot_dot)
-
         # Observation.
         Y = cs.vertcat(x, x_dot, theta, theta_dot)
-
         # Define cost (quadratic form).
         Q = cs.MX.sym('Q', nx, nx)
         R = cs.MX.sym('R', nu, nu)
         Xr = cs.MX.sym('Xr', nx, 1)
         Ur = cs.MX.sym('Ur', nu, 1)
         cost_func = 0.5 * (X - Xr).T @ Q @ (X - Xr) + 0.5 * (U - Ur).T @ R @ (U - Ur)
-
         # Define dynamics and cost dictionaries.
         dynamics = {"dyn_eqn": X_dot, "obs_eqn": Y, "vars": {"X": X, "U": U}}
         cost = {"cost_func": cost_func, "vars": {"X": X, "U": U, "Xr": Xr, "Ur": Ur, "Q": Q, "R": R}}
-
         # Setup symbolic model.
         self.symbolic = SymbolicModel(dynamics=dynamics, cost=cost, dt=dt)
 
     def _set_action_space(self):
-        """Returns the action space of the environment."""
+        """Returns the action space of the environment.
+
+        """
         self.action_scale = 10
         self.action_threshold = 1 if self.NORMALIZED_RL_ACTION_SPACE else self.action_scale
         self.action_space = spaces.Box(low=-self.action_threshold, high=self.action_threshold, shape=(1,))
 
     def _set_observation_space(self):
-        """Returns the observation space of the environment."""
+        """Returns the observation space of the environment.
+
+        """
         # Angle at which to fail the episode (0.20943951023931953).
         self.theta_threshold_radians = 12 * (2 * math.pi / 360)
         # NOTE: different value in PyBullet gym (0.4) and OpenAI gym (2.4).
         self.x_threshold = 2.4
-
         # Limit set to 2x: i.e. a failing observation is still within bounds.
         OBS_BOUND = np.array([self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max])
         self.observation_space = spaces.Box(-OBS_BOUND, OBS_BOUND, dtype=np.float32)
@@ -606,14 +581,12 @@ def _preprocess_control(self, action):
             print("[WARNING]: action was clipped in CartPole._preprocess_control().")
         if self.NORMALIZED_RL_ACTION_SPACE:
             force = self.action_scale * force
-
-        # apply disturbances
+        # Apply disturbances.
         if "action" in self.disturbances:
             force = self.disturbances["action"].apply(force, self)
         if self.adversary_disturbance == "action":
             force = force + self.adv_action
-
-        # only use the scalar value.
+        # Only use the scalar value.
         force = force[0]
         return force
 
@@ -627,7 +600,7 @@ def _advance_simulation(self, force):
 
         """
         tab_force = None
-        # determine disturbance force
+        # Determine the disturbance force.
         passive_disturb = "dynamics" in self.disturbances
         adv_disturb = self.adversary_disturbance == "dynamics"
         if passive_disturb or adv_disturb:
@@ -636,13 +609,12 @@ def _advance_simulation(self, force):
             tab_force = self.disturbances["dynamics"].apply(tab_force, self)
         if adv_disturb and self.adv_action is not None:
             tab_force = tab_force + self.adv_action
-            # clear adversary action, wait for next one
+            # Clear adversary's action, wait for the next one.
             self.adv_action = None
-
         for _ in range(int(self.PYB_FREQ / self.CTRL_FREQ)):
-            # apply disturbance (by tabbing pole on x-z plane)
+            # apply disturbance (by tabbing pole on x-z plane).
             if tab_force is not None:
-                # convert 2D force to 3D for pybullet
+                # Convert 2D force to 3D on for PyBullet.
                 tab_force_3d = [float(tab_force[0]), 0.0, float(tab_force[1])]
                 p.applyExternalForce(
                     self.CARTPOLE_ID,
@@ -654,7 +626,6 @@ def _advance_simulation(self, force):
                         physicsClientId=self.PYB_CLIENT)[0],  # exert force on pole center
                     flags=p.WORLD_FRAME,
                     physicsClientId=self.PYB_CLIENT)
-
                 # Debug visualization
                 if self.GUI:
                     center = np.asarray(p.getLinkState(self.CARTPOLE_ID, linkIndex=1, physicsClientId=self.PYB_CLIENT)[0])
@@ -663,16 +634,14 @@ def _advance_simulation(self, force):
                         self.line = p.addUserDebugLine(center.tolist(), (center - ff).tolist(), lineColorRGB=[0, 0, 0], lineWidth=1)
                     else:
                         p.addUserDebugLine(center.tolist(), (center - ff).tolist(), lineColorRGB=[0, 0, 0], lineWidth=1, replaceItemUniqueId=self.line)
-
-            # apply main control
+            # Apply control.
             p.setJointMotorControl2(
                 self.CARTPOLE_ID,
                 jointIndex=0,  # Slider-to-cart joint.
                 controlMode=p.TORQUE_CONTROL,
                 force=force,
                 physicsClientId=self.PYB_CLIENT)
-
-            # step simulation and counter.
+            # Step simulation and counter.
             p.stepSimulation(physicsClientId=self.PYB_CLIENT)
             self.step_counter += 1
 
@@ -685,7 +654,7 @@ def _get_observation(self):
         """
         if not np.array_equal(self.state, np.clip(self.state, self.observation_space.low, self.observation_space.high)) and self.VERBOSE:
             print("[WARNING]: observation was clipped in CartPole._get_observation().")
-        # apply observation disturbance
+        # Apply observation disturbance.
         obs = deepcopy(self.state)
         if "observation" in self.disturbances:
             obs = self.disturbances["observation"].apply(obs, self)
@@ -701,11 +670,8 @@ def _get_reward(self):
         if self.COST == Cost.RL_REWARD:
             if self.constraints is not None and self.use_constraint_penalty and self.constraints.is_almost_active(self):
                 return self.constraint_penalty
-
             # Constant reward if episode not done (pole stays upright).
             return 1.0
-
-            # # a more control-oriented rl rewarc
             # state = self.state
             # x, theta = state[0], state[2]
             # length = self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH
@@ -715,7 +681,6 @@ def _get_reward(self):
             # # shape (*,)
             # reward = np.exp(-np.sum(np.square(ee_pos - goal_pos) * np.asarray([1.0, 1.0]), -1) / length**2)
             # return reward
-
         if self.COST == Cost.QUADRATIC:
             if self.TASK == Task.STABILIZATION:
                 return float(-1 * self.symbolic.loss(x=self.state, Xr=self.X_GOAL, u=self.current_preprocessed_action, Ur=self.U_GOAL, Q=self.Q, R=self.R)["l"])
@@ -752,10 +717,7 @@ def _get_info(self):
             info["constraint_values"] = self.constraints.get_values(self)
             violation = np.any(np.greater(info["constraint_values"], 0.))
             info["constraint_violation"] = int(violation)
-
-        # HACK: adding Timilimit flag
         if self.step_counter / self.PYB_FREQ >= self.EPISODE_LEN_SEC:
-            # should be `not done` instead of True, for hack just True
             x, _, theta, _ = self.state
             done = bool(x < -self.x_threshold or x > self.x_threshold or theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians)
             if self.constraints is not None:
@@ -776,7 +738,7 @@ def _get_reset_info(self):
         info["x_reference"] = self.X_GOAL
         info["u_reference"] = self.U_GOAL
         if self.constraints is not None:
-            # NOTE: Cannot evaluate constraints on reset because there are no inputs. How to handle this?
+            # NOTE: Cannot evaluate constraints on reset/without inputs.
             info["constraint_values"] = self.constraints.get_values(self, only_state=True)
         return info
 
@@ -793,11 +755,9 @@ def _parse_urdf_parameters(self, file_name):
 
         """
         URDF_TREE = (etxml.parse(file_name)).getroot()
-
         EFFECTIVE_POLE_LENGTH = 0.5 * float(URDF_TREE[3][0][0][0].attrib["size"].split(" ")[-1])  # Note: HALF length of pole.
         POLE_MASS = float(URDF_TREE[3][1][1].attrib["value"])
         CART_MASS = float(URDF_TREE[1][2][0].attrib["value"])
-
         return EFFECTIVE_POLE_LENGTH, POLE_MASS, CART_MASS
 
     def _create_urdf(self, file_name, length=None, inertia=None):
@@ -814,32 +774,30 @@ def _create_urdf(self, file_name, length=None, inertia=None):
         """
         tree = etxml.parse(file_name)
         root = tree.getroot()
-
-        # overwrite pod length
+        # Overwrite pod length.
         if length is not None:
-            # pole visual geometry box
+            # Pole visual geometry box.
             out = root[3][0][0][0].attrib["size"]
             out = " ".join(out.split(" ")[:-1] + [str(2 * length)])
             root[3][0][0][0].attrib["size"] = out
-            # pole visual origin
+            # Pole visual origin.
             out = root[3][0][1].attrib["xyz"]
             out = " ".join(out.split(" ")[:-1] + [str(length)])
             root[3][0][1].attrib["xyz"] = out
-            # pole inertial origin
+            # Pole inertial origin.
             out = root[3][1][0].attrib["xyz"]
             out = " ".join(out.split(" ")[:-1] + [str(length)])
             root[3][1][0].attrib["xyz"] = out
-            # pole inertia
+            # Pole inertia.
             root[3][1][2].attrib["ixx"] = str(inertia)
             root[3][1][2].attrib["iyy"] = str(inertia)
             root[3][1][2].attrib["izz"] = str(0.0)
-            # pole collision geometry box
+            # Pole collision geometry box.
             out = root[3][2][0][0].attrib["size"]
             out = " ".join(out.split(" ")[:-1] + [str(2 * length)])
             root[3][2][0][0].attrib["size"] = out
-            # pole collision origin
+            # Pole collision origin.
             out = root[3][2][1].attrib["xyz"]
             out = " ".join(out.split(" ")[:-1] + [str(length)])
             root[3][2][1].attrib["xyz"] = out
-
         return tree
diff --git a/safe_control_gym/envs/gym_control/cartpole.yaml b/safe_control_gym/envs/gym_control/cartpole.yaml
index a295b4e2f..0f5cd107a 100644
--- a/safe_control_gym/envs/gym_control/cartpole.yaml
+++ b/safe_control_gym/envs/gym_control/cartpole.yaml
@@ -3,26 +3,26 @@ ctrl_freq: 50
 pyb_freq: 50
 gui: False
 normalized_rl_action_space: False
-# state initialization
+# State initialization
 init_state: null
 randomized_init: True
 init_state_randomization_info: null
-# randomization
+# Randomization
 inertial_prop: null
 randomized_inertial_prop: False
 inertial_prop_randomization_info: null
-# task 
+# Task 
 task: stabilization
 task_info: null
 cost: rl_reward
-# disturbance
+# Disturbance
 disturbances: null
 adversary_disturbance: null 
 adversary_disturbance_scale: 0.01
-# constraints
+# Constraints
 constraints: null
 done_on_violation: False
 use_constraint_penalty: False 
 constraint_penalty: -1 
-# misc 
+# Misc 
 verbose: False
\ No newline at end of file
diff --git a/safe_control_gym/envs/gym_control/cartpole_constraints.py b/safe_control_gym/envs/gym_control/cartpole_constraints.py
index 87279d719..20238c6ff 100644
--- a/safe_control_gym/envs/gym_control/cartpole_constraints.py
+++ b/safe_control_gym/envs/gym_control/cartpole_constraints.py
@@ -6,6 +6,7 @@
 
 from safe_control_gym.envs.constraints import BoundedConstraint, ConstraintInputType, Constraint
 
+
 class CartPoleStateConstraint(BoundedConstraint):
     """Constrain the cart's state to the observation space bounds.
 
@@ -20,25 +21,22 @@ def __init__(self,
 
         Args:
             env (BenchmarkEnv): The environment to constrain.
-            low (list): to overwrite the environment minimums
-            high (list): To overwrite the environment maximums
+            low (list): to overwrite the environment minimums.
+            high (list): To overwrite the environment maximums.
 
         """
         if high is None:
             self.high = np.array([env.x_threshold * 2,  # Limit set to 2x: i.e. a failing observation is still within bounds.
                                   np.finfo(np.float32).max,
-                                  env.theta_threshold_radians * 2,  # Limit set to 2x: i.e. a failing observation is
-                                  # still within bounds.
+                                  env.theta_threshold_radians * 2,  # Limit set to 2x: i.e. a failing observation is still within bounds.
                                   np.finfo(np.float32).max])
         else:
             assert len(high) == env.observation_space.shape[0]
             self.high = high
-
         if low is None:
             self.low = -1 * np.array([env.x_threshold * 2,  # Limit set to 2x: i.e. a failing observation is still within bounds.
                                       np.finfo(np.float32).max,
-                                      env.theta_threshold_radians * 2,  # Limit set to 2x: i.e. a failing
-                                      # observation is still within bounds.
+                                      env.theta_threshold_radians * 2,  # Limit set to 2x: i.e. a failing observation is still within bounds.
                                       np.finfo(np.float32).max])
         else:
             assert len(low) == env.observation_space.shape[0]
@@ -96,7 +94,6 @@ def __init__(self,
         else:
             assert isinstance(high, float)
             self.high = high
-
         if low is None:
             self.low = -1 * env.action_threshold
         else:
@@ -130,13 +127,11 @@ def is_violated(self, env):
         flag = np.any(np.greater(c_value, 0.))
         return flag
 
-# TODO: Can this be formulated as a BoundedConstraint type?
+
 class CartPoleSymmetricStateConstraint(BoundedConstraint):
     """Symmetric state bound constraint.
 
-    Note: This only counts as a single constraint?
-    # todo: A bounded constraint is technically 2 cosntraints, but is considered a single constraints in se_ppo...how to handle?
-    """
+     """
 
     def __init__(self,
                  env,
@@ -144,7 +139,8 @@ def __init__(self,
                  constraint_input_type,
                  active_dims=None,
                  tolerance=None,
-                 **kwrags):
+                 **kwrags
+                 ):
         assert bound is not None
         self.bound = np.array(bound, ndmin=1)
         super().__init__(env,
@@ -154,14 +150,13 @@ def __init__(self,
                          active_dims=active_dims,
                          tolerance=tolerance,
                          **kwrags)
-        self.num_constraints = self.bound.shape[0] # todo: how to fix this?
+        self.num_constraints = self.bound.shape[0]
 
     def get_value(self, env):
         c_value = np.abs(self.constraint_filter @ env.state) - self.bound
         return c_value
 
 
-# TODO: Can this be formulated as a bounded constraint type?
 class CartPoleBoundConstraint(Constraint):
     """Implements bound constraint for state or input as c(x, u) <= 0.
 
@@ -181,35 +176,28 @@ def __init__(self,
                  slack=None
                  ):
         super().__init__()
-        assert var_name == "state" or var_name == "input", (
-            "Must specify state or input to constrain.")
-        assert low is not None or high is not None, (
-            "Must specify either lower or upper bound.")
-
-        # which variable (state, input) to constrain
+        assert var_name == "state" or var_name == "input", ("Must specify state or input to constrain.")
+        assert low is not None or high is not None, ("Must specify either lower or upper bound.")
+        # Which variable (state, input) to constrain.
         self.var_name = var_name
         if var_name == "state":
             var_dim = env.observation_space.shape[0]
         else:
             var_dim = env.action_space.shape[0]
-        # which fields of the variable to bound
+        # Which fields of the variable to bound.
         self.index = index
-
-        # construct A matrix
+        # Construct A matrix.
         weight = np.eye(var_dim)
         if isinstance(index, int):
             weight = weight[[index]]
         elif isinstance(index, list):
             weight = weight[index]
-
         self.low = np.array(low, ndmin=1)
         self.high = np.array(high, ndmin=1)
-
-        # construct A', b'
+        # Construct A', b'.
         dim = 0
         full_weight = []
         full_threshold = []
-
         if self.low is not None:
             dim += len(self.low)
             full_weight.append(-1 * weight)
@@ -218,13 +206,15 @@ def __init__(self,
             dim += len(self.high)
             full_weight.append(weight)
             full_threshold.append(-1 * self.high)
-
         self.dim = dim
         self.full_weight = np.concatenate(full_weight)
         self.full_threshold = np.concatenate(full_threshold)
         self.slack = np.array(slack, ndmin=1)
 
     def get_symbolic_model(self, env):
+        """
+
+        """
         X = env.symbolic.x_sym
         U = env.symbolic.u_sym
         if self.var_name == "state":
@@ -238,7 +228,9 @@ def get_symbolic_model(self, env):
         return sym_func
 
     def get_value(self, env):
+        """
 
+        """
         if self.var_name == "state":
             var = env.state
         else:
diff --git a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
index 21f7789dc..a6cb61944 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py
@@ -37,7 +37,7 @@ class Physics(Enum):
 
     """
 
-    PYB = "pyb"  # Base PyBullet physics update
+    PYB = "pyb"  # Base PyBullet physics update.
     DYN = "dyn"  # Update with an explicit model of the dynamics.
     PYB_GND = "pyb_gnd"  # PyBullet physics update with ground effect.
     PYB_DRAG = "pyb_drag"  # PyBullet physics update with drag.
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
index 8e5eb6036..ce0023e13 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
@@ -26,14 +26,12 @@ class Quadrotor(BaseAviary):
     Including symbolic model, constraints, randomization, adversarial disturbances,
     multiple cost functions, stabilization and trajectory tracking references.
 
-    TODO:
-        * Handling prior model param still out of sync with cartpole.
     """
 
     AVAILABLE_CONSTRAINTS = {
         "quadrotor_state": QuadrotorStateConstraint,
         "quadrotor_input": QuadrotorInputConstraint,
-        "quadrotor_diag": QuadrotorDiagConstraint  # Todo: Delete this.
+        "quadrotor_diag": QuadrotorDiagConstraint
     }
     AVAILABLE_CONSTRAINTS.update(deepcopy(GENERAL_CONSTRAINTS))
 
@@ -50,12 +48,12 @@ class Quadrotor(BaseAviary):
     }
 
     INERTIAL_PROP_RAND_INFO = {
-        "M": { # 0.27
+        "M": { # Nominal: 0.27
             'distrib': "uniform",
             'low': 0.22,
             'high': 0.32
         },
-        "Iyy": { # 1.4e-5
+        "Iyy": { # Nominal: 1.4e-5
             'distrib': "uniform",
             'low': 1.3e-5,
             'high': 1.5e-5
@@ -181,14 +179,11 @@ def __init__(self,
             )
         self.CTRL_TIMESTEP = 1. / self.CTRL_FREQ
         self.PYB_TIMESTEP = 1. / self.PYB_FREQ
-
         # Store initial state info.
         if init_state is None:
-            self.INIT_X, self.INIT_X_DOT, self.INIT_Z, self.INIT_Z_DOT, self.INIT_THETA, self.INIT_THETA_DOT = np.zeros(
-                6)
+            self.INIT_X, self.INIT_X_DOT, self.INIT_Z, self.INIT_Z_DOT, self.INIT_THETA, self.INIT_THETA_DOT = np.zeros(6)
         elif self.QUAD_TYPE == QuadType.ONE_D:
-            self.INIT_X, self.INIT_X_DOT, self.INIT_THETA, self.INIT_THETA_DOT = np.zeros(
-                4)
+            self.INIT_X, self.INIT_X_DOT, self.INIT_THETA, self.INIT_THETA_DOT = np.zeros(4)
             if isinstance(init_state, np.ndarray):
                 self.INIT_Z, self.INIT_Z_DOT = init_state
             elif isinstance(init_state, dict):
@@ -218,11 +213,8 @@ def __init__(self,
             self.INIT_STATE_RAND_INFO = init_state_randomization_info
         # Do NOT randomize x, x_dot, theta, theta_dot for the 1D quadrotor.
         if self.QUAD_TYPE == QuadType.ONE_D:
-            for init_name in [
-                    "init_x", "init_x_dot", "init_theta", "init_theta_dot"
-            ]:
+            for init_name in ["init_x", "init_x_dot", "init_theta", "init_theta_dot"]:
                 self.INIT_STATE_RAND_INFO.pop(init_name, None)
-
         # Decide whether to randomize the inertial properties and how (see info dictionary).
         self.RANDOMIZED_INERTIAL_PROP = randomized_inertial_prop
         if inertial_prop_randomization_info is not None:
@@ -230,7 +222,6 @@ def __init__(self,
         # Do NOT randomize J for the 1D quadrotor.
         if self.QUAD_TYPE == QuadType.ONE_D:
             self.INERTIAL_PROP_RAND_INFO.pop("Iyy", None)
-
         # Store disturbance info.
         self.DISTURBANCES = disturbances
         self.adversary_disturbance = adversary_disturbance
@@ -240,11 +231,9 @@ def __init__(self,
             self.DISTURBANCE_MODES["observation"]["dim"] = 2
             self.DISTURBANCE_MODES["action"]["dim"] = 1
             self.DISTURBANCE_MODES["dynamics"]["dim"] = 1
-
         # Store constraint info
         self.CONSTRAINTS = constraints
         self.DONE_ON_VIOLATION = done_on_violation
-
         self.VERBOSE = verbose
         # Call BaseAviary constructor.
         super().__init__(seed=seed,
@@ -253,10 +242,8 @@ def __init__(self,
                          cost=Cost(cost),
                          gui=gui,
                          freq=self.PYB_FREQ,
-                         aggregate_phy_steps=int(self.PYB_FREQ
-                                                 / self.CTRL_FREQ),
+                         aggregate_phy_steps=int(self.PYB_FREQ / self.CTRL_FREQ),
                          physics=Physics(physics))
-
         # Override inertial properties of passed as arguments.
         if inertial_prop is None:
             pass
@@ -308,14 +295,14 @@ def __init__(self,
             #
             if self.QUAD_TYPE == QuadType.ONE_D:
                 self.X_GOAL = np.vstack([
-                    POS_REF[:, 2],  # TODO Add offset?
+                    POS_REF[:, 2],  # TODO Add offset
                     VEL_REF[:, 2]
                 ]).transpose()
             elif self.QUAD_TYPE == QuadType.TWO_D:
                 self.X_GOAL = np.vstack([
-                    POS_REF[:, 0],  # TODO Add offset?
+                    POS_REF[:, 0],  # TODO Add offset
                     VEL_REF[:, 0],
-                    POS_REF[:, 2],  # TODO Add offset?
+                    POS_REF[:, 2],  # TODO Add offset
                     VEL_REF[:, 2],
                     np.zeros(POS_REF.shape[0]),
                     np.zeros(VEL_REF.shape[0])
@@ -362,7 +349,6 @@ def reset(self):
         self.current_preprocessed_action = None
         if self.adversary_disturbance is not None:
             self.adv_force = None
-
         # Choose randomized or deterministic inertial properties.
         prop_values = {
             "M": self.MASS,
@@ -373,11 +359,8 @@ def reset(self):
                 prop_values, self.INERTIAL_PROP_RAND_INFO)
             if any(phy_quantity < 0 for phy_quantity in prop_values.values()):
                 raise ValueError("[ERROR] in CartPole.reset(), negative randomized inertial properties.")
-
         self.OVERRIDDEN_QUAD_MASS = prop_values["M"]
-        self.OVERRIDDEN_QUAD_INERTIA = [
-            self.J[0, 0], prop_values["Iyy"], self.J[2, 2]
-        ]
+        self.OVERRIDDEN_QUAD_INERTIA = [self.J[0, 0], prop_values["Iyy"], self.J[2, 2]]
         # Override inertial properties.
         p.changeDynamics(
             self.DRONE_IDS[0],
@@ -385,7 +368,6 @@ def reset(self):
             mass=self.OVERRIDDEN_QUAD_MASS,
             localInertiaDiagonal=self.OVERRIDDEN_QUAD_INERTIA,
             physicsClientId=self.PYB_CLIENT)
-
         # Randomize initial state.
         init_values = {
             "init_x": self.INIT_X,
@@ -396,28 +378,22 @@ def reset(self):
             "init_theta_dot": self.INIT_THETA_DOT,
         }
         if self.RANDOMIZED_INIT:
-            init_values = self._randomize_values_by_info(
-                init_values, self.INIT_STATE_RAND_INFO)
-
+            init_values = self._randomize_values_by_info(init_values, self.INIT_STATE_RAND_INFO)
         OVERRIDDEN_INIT_X = init_values["init_x"]
         OVERRIDDEN_INIT_X_DOT = init_values["init_x_dot"]
         OVERRIDDEN_INIT_Z = init_values["init_z"]
         OVERRIDDEN_INIT_Z_DOT = init_values["init_z_dot"]
         OVERRIDDEN_INIT_THETA = init_values["init_theta"]
         OVERRIDDEN_INIT_THETA_DOT = init_values["init_theta_dot"]
-
-        p.resetBasePositionAndOrientation(
-            self.DRONE_IDS[0], [OVERRIDDEN_INIT_X, 0, OVERRIDDEN_INIT_Z],
-            p.getQuaternionFromEuler([0, OVERRIDDEN_INIT_THETA, 0]),
-            physicsClientId=self.PYB_CLIENT)
+        p.resetBasePositionAndOrientation(self.DRONE_IDS[0], [OVERRIDDEN_INIT_X, 0, OVERRIDDEN_INIT_Z],
+                                          p.getQuaternionFromEuler([0, OVERRIDDEN_INIT_THETA, 0]),
+                                          physicsClientId=self.PYB_CLIENT)
         p.resetBaseVelocity(self.DRONE_IDS[0],
                             [OVERRIDDEN_INIT_X_DOT, 0, OVERRIDDEN_INIT_Z_DOT],
                             [0, OVERRIDDEN_INIT_THETA_DOT, 0],
                             physicsClientId=self.PYB_CLIENT)
-
         # Update BaseAviary internal variables before calling self._get_observation().
         self._update_and_store_kinematic_information()
-
         # Return either an observation and dictionary or just the observation.
         if self.INFO_IN_RESET:
             return self._get_observation(), self._get_reset_info()
@@ -433,15 +409,14 @@ def render(self, mode='human'):
             ndarray: A multidimensional array with the RGB frame captured by PyBullet's camera.
 
         """
-        [w, h, rgb, dep, seg
-        ] = p.getCameraImage(width=self.RENDER_WIDTH,
-                             height=self.RENDER_HEIGHT,
-                             shadow=1,
-                             viewMatrix=self.CAM_VIEW,
-                             projectionMatrix=self.CAM_PRO,
-                             renderer=p.ER_TINY_RENDERER,
-                             flags=p.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX,
-                             physicsClientId=self.PYB_CLIENT)
+        [w, h, rgb, dep, seg] = p.getCameraImage(width=self.RENDER_WIDTH,
+                                                 height=self.RENDER_HEIGHT,
+                                                 shadow=1,
+                                                 viewMatrix=self.CAM_VIEW,
+                                                 projectionMatrix=self.CAM_PRO,
+                                                 renderer=p.ER_TINY_RENDERER,
+                                                 flags=p.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX,
+                                                 physicsClientId=self.PYB_CLIENT)
         # Image.fromarray(np.reshape(rgb, (h, w, 4)), 'RGBA').show()
         return np.reshape(rgb, (h, w, 4))
 
@@ -467,48 +442,42 @@ def set_adversary_control(self, action):
             self.adv_action = clipped_adv_action * self.adversary_disturbance_scale
 
     def _setup_disturbances(self):
-        """Sets up scaling and actions space for an adversarial disturbance."""
-        # default no passive disturbance
+        """Sets up scaling and actions space for an adversarial disturbance.
+
+        """
+        # Default: no passive disturbances.
         self.disturbances = {}
 
         if self.DISTURBANCES is not None:
             for mode, disturbs in self.DISTURBANCES.items():
-                assert mode in self.DISTURBANCE_MODES, "[ERROR] in Quadrotor._setup_disturbances(), disturbance mode " \
-                                                       "not available."
+                assert mode in self.DISTURBANCE_MODES, "[ERROR] in Quadrotor._setup_disturbances(), disturbance mode not available."
                 disturb_list = []
                 shared_args = self.DISTURBANCE_MODES[mode]
-
-                # each disturbance for this mode
+                # Each disturbance for this mode.
                 for name, cfg in disturbs.items():
-                    assert name in DISTURBANCE_TYPES, "[ERROR] in Quadrotor._setup_disturbances(), disturbance " \
-                                                           "type not available."
+                    assert name in DISTURBANCE_TYPES, "[ERROR] in Quadrotor._setup_disturbances(), disturbance type not available."
                     disturb_cls = DISTURBANCE_TYPES[name]
                     disturb = disturb_cls(self, **shared_args, **cfg)
                     disturb_list.append(disturb)
-
-                # combine as one for this mode
+                # Combine as one for this mode.
                 self.disturbances[mode] = DisturbanceList(disturb_list)
-
-        # adversary disturbance (set from outside of env, non-passive)
+        # Adversary disturbance (set from outside of env, active/non-passive).
         if self.adversary_disturbance is not None:
             assert self.adversary_disturbance in self.DISTURBANCE_MODES, "[ERROR] in Cartpole._setup_disturbances()"
-
             shared_args = self.DISTURBANCE_MODES[self.adversary_disturbance]
             dim = shared_args["dim"]
-            # TODO: symmetric space can be problematic for obs ?
             self.adversary_action_space = spaces.Box(low=-1,
                                                      high=1,
                                                      shape=(dim,))
-            # adversary obs same as protagonist
+            # Adversary obs are the same as those of the protagonist.
             self.adversary_observation_space = self.observation_space
 
     def _setup_constraints(self):
         """Sets up a list (ConstraintList) of constraints.
-        # todo: Can we put this in BenchmarkEnv?
+
         """
         self.constraints = None
         self.num_constraints = 0
-
         if self.CONSTRAINTS is not None:
             self.constraints = create_ConstraintList_from_dict(
                 self.CONSTRAINTS, self.AVAILABLE_CONSTRAINTS, self)
@@ -536,7 +505,7 @@ def _setup_symbolic(self):
             U = cs.vertcat(T)
             # Define dynamics equations.
             X_dot = cs.vertcat(z_dot, T / m - g)
-            # Define observation equation
+            # Define observation equation.
             Y = cs.vertcat(z, z_dot)
         elif self.QUAD_TYPE == QuadType.TWO_D:
             nx, nu = 6, 2
@@ -562,8 +531,7 @@ def _setup_symbolic(self):
         R = cs.MX.sym('R', nu, nu)
         Xr = cs.MX.sym('Xr', nx, 1)
         Ur = cs.MX.sym('Ur', nu, 1)
-        cost_func = 0.5 * (X - Xr).T @ Q @ (X - Xr) + 0.5 * (U - Ur).T @ R @ (
-            U - Ur)
+        cost_func = 0.5 * (X - Xr).T @ Q @ (X - Xr) + 0.5 * (U - Ur).T @ R @ (U - Ur)
         # Define dynamics and cost dictionaries.
         dynamics = {"dyn_eqn": X_dot, "obs_eqn": Y, "vars": {"X": X, "U": U}}
         cost = {
@@ -577,8 +545,6 @@ def _setup_symbolic(self):
                 "R": R
             }
         }
-        # todo: Add bound constraint attribute here from ConstraintList?
-
         # Setup symbolic model.
         self.symbolic = SymbolicModel(dynamics=dynamics, cost=cost, dt=dt)
 
@@ -586,8 +552,7 @@ def _set_action_space(self):
         """Returns the action space of the environment.
 
         Returns:
-            gym.spaces: The quadrotor environment's action space,
-                        of size 1 or 2 depending on QUAD_TYPE.
+            gym.spaces: The quadrotor environment's action space, of size 1 or 2 depending on QUAD_TYPE.
 
         """
         if self.NORMALIZED_RL_ACTION_SPACE:
@@ -604,8 +569,7 @@ def _set_observation_space(self):
         """Returns the observation space of the environment.
 
         Returns:
-            gym.spaces: The bounded observation (state) space,
-                        of size 2 or 6 depending on QUAD_TYPE.
+            gym.spaces: The bounded observation (state) space, of size 2 or 6 depending on QUAD_TYPE.
 
         """
         self.x_threshold = 2
@@ -641,23 +605,19 @@ def _preprocess_control(self, action):
 
         """
         if self.NORMALIZED_RL_ACTION_SPACE:
-            action = (1 + (0.1 * action)) * (
-                (self.GRAVITY_ACC * self.MASS) / int(self.QUAD_TYPE))
+            action = (1 + (0.1 * action)) * ((self.GRAVITY_ACC * self.MASS) / int(self.QUAD_TYPE))
         thrust = np.clip(action, self.action_space.low, self.action_space.high)
         if not np.array_equal(thrust, np.array(action)) and self.VERBOSE:
             print(
                 "[WARNING]: action was clipped in Quadrotor._preprocess_control()."
             )
         self.current_preprocessed_action = thrust
-
-        # apply disturbances
+        # Apply disturbances.
         if "action" in self.disturbances:
             thrust = self.disturbances["action"].apply(thrust, self)
         if self.adversary_disturbance == "action":
             thrust = thrust + self.adv_force
-
-        pwm = cmd2pwm(thrust, self.PWM2RPM_SCALE, self.PWM2RPM_CONST, self.KF,
-                      self.MIN_PWM, self.MAX_PWM)
+        pwm = cmd2pwm(thrust, self.PWM2RPM_SCALE, self.PWM2RPM_CONST, self.KF, self.MIN_PWM, self.MAX_PWM)
         rpm = pwm2rpm(pwm, self.PWM2RPM_SCALE, self.PWM2RPM_CONST)
         return rpm
 
@@ -677,8 +637,7 @@ def _advance_simulation(self, action):
 
         """
         disturb_force = None
-
-        # determine disturbance force
+        # Determine disturbance force.
         passive_disturb = "dynamics" in self.disturbances
         adv_disturb = self.adversary_disturbance == "dynamics"
         if passive_disturb or adv_disturb:
@@ -688,16 +647,15 @@ def _advance_simulation(self, action):
                 disturb_force, self)
         if adv_disturb and self.adv_action is not None:
             disturb_force = disturb_force + self.adv_action
-            # clear adversary action, wait for next one
+            # Clear the adversary action, wait for the next one.
             self.adv_action = None
-
-        # construct full disturbance force (3D)
+        # Construct full (3D) disturbance force.
         if disturb_force:
             if self.QUAD_TYPE == QuadType.ONE_D:
-                # only disturb on z direction
+                # Only disturb on z direction.
                 disturb_force = [0, 0, float(disturb_force)]
             elif self.QUAD_TYPE == QuadType.TWO_D:
-                # only disturb on x-z plane
+                # Only disturb on x-z plane.
                 disturb_force = [
                     float(disturb_force[0]), 0,
                     float(disturb_force[1])
@@ -713,8 +671,7 @@ def _get_observation(self):
         """Returns the current observation (state) of the environment.
 
         Returns:
-            ndarray: The state of the quadrotor, 
-                     of size 2 or 6 depending on QUAD_TYPE.
+            ndarray: The state of the quadrotor, of size 2 or 6 depending on QUAD_TYPE.
 
         """
         full_state = self._get_drone_state_vector(0)
@@ -727,16 +684,13 @@ def _get_observation(self):
             self.state = np.hstack(
                 [pos[0], vel[0], pos[2], vel[2], rpy[1], ang_v[1]]).reshape(
                     (6,))
-        if not np.array_equal(
-                self.state,
-                np.clip(self.state, self.observation_space.low,
-                        self.observation_space.high)):
+        if not np.array_equal(self.state,
+                              np.clip(self.state, self.observation_space.low, self.observation_space.high)):
             if self.GUI and self.VERBOSE:
                 print(
                     "[WARNING]: observation was clipped in Quadrotor._get_observation()."
                 )
-
-        # apply observation disturbance
+        # Apply observation disturbance.
         obs = deepcopy(self.state)
         if "observation" in self.disturbances:
             obs = self.disturbances["observation"].apply(obs, self)
@@ -751,29 +705,23 @@ def _get_reward(self):
         """
         if self.COST == Cost.RL_REWARD:
             full_state = self._get_drone_state_vector(0)
-            pos, _, rpy, vel, ang_v, _ = np.split(full_state,
-                                                  [3, 7, 10, 13, 16])
+            pos, _, rpy, vel, ang_v, _ = np.split(full_state, [3, 7, 10, 13, 16])
             if self.QUAD_TYPE == QuadType.ONE_D:
-                dist = np.linalg.norm(
-                    np.array([0, 0, self.TASK_INFO["stabilization_goal"][1]])
-                    - pos)**2
+                dist = np.linalg.norm(np.array([0, 0, self.TASK_INFO["stabilization_goal"][1]]) - pos)**2
             elif self.QUAD_TYPE == QuadType.TWO_D:
                 dist = np.linalg.norm(
-                    np.array([
-                        self.TASK_INFO["stabilization_goal"][0], 0,
-                        self.TASK_INFO["stabilization_goal"][1]
-                    ]) - pos)**2
+                    np.array([self.TASK_INFO["stabilization_goal"][0], 0,
+                              self.TASK_INFO["stabilization_goal"][1]]) - pos)**2
             return -1 * dist
         if self.COST == Cost.QUADRATIC:
             state = self._get_observation()
             if self.TASK == Task.STABILIZATION:
-                return float(
-                    -1 * self.symbolic.loss(x=state,
-                                            Xr=self.X_GOAL,
-                                            u=self.current_preprocessed_action,
-                                            Ur=self.U_GOAL,
-                                            Q=self.Q,
-                                            R=self.R)["l"])
+                return float(-1 * self.symbolic.loss(x=state,
+                                                     Xr=self.X_GOAL,
+                                                     u=self.current_preprocessed_action,
+                                                     Ur=self.U_GOAL,
+                                                     Q=self.Q,
+                                                     R=self.R)["l"])
             if self.TASK == Task.TRAJ_TRACKING:
                 return -1
 
@@ -834,12 +782,9 @@ def _get_reset_info(self):
         info["x_reference"] = self.X_GOAL
         info["u_reference"] = self.U_GOAL
         if self.constraints is not None:
-            info[
-                "symbolic_constraints"] = self.constraints.get_all_symbolic_models(
-                )
+            info["symbolic_constraints"] = self.constraints.get_all_symbolic_models()
             #info["constraint_values"] = self.constraints.get_values(self)
             #info["constraint_violations"] = self.constraints.get_violations(self)
-
         return info
 
     def _parse_urdf_parameters(self, file_name: str = "cf2x.urdf"):
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.yaml b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.yaml
index 69b881a8c..36148eafc 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.yaml
+++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.yaml
@@ -6,22 +6,22 @@ physics: pyb
 gui: False
 quad_type: 2
 normalized_rl_action_space: False
-# state initialization
+# State initialization
 init_state: null
 randomized_init: True
 init_state_randomization_info: null
-# randomization
+# Randomization
 inertial_prop: null
 randomized_inertial_prop: False
 inertial_prop_randomization_info: null
-# task 
+# Task 
 task: stabilization
 task_info: null
 cost: rl_reward
-# disturbance
+# Disturbance
 disturbances: null
-# constraints
+# Constraints
 constraints: null
 done_on_violation: False
-# misc 
+# Misc 
 verbose: False
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_constraints.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_constraints.py
index 00b81329c..a01393c32 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_constraints.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_constraints.py
@@ -21,8 +21,8 @@ def __init__(self,
 
        Args:
             env (BenchmarkEnv): The environment to constrain.
-            low (list): to overwrite the environment minimums
-            high (list): To overwrite the environment maximums
+            low (list): to overwrite the environment minimums.
+            high (list): To overwrite the environment maximums.
 
         """
         if env.QUAD_TYPE == QuadType.ONE_D:
@@ -45,7 +45,6 @@ def __init__(self,
                                   np.finfo(np.float32).max,
                                   env.theta_threshold_radians * 2,
                                   np.finfo(np.float32).max])
-
         # Overwrite with provided high and low
         if high is not None:
             assert len(high) == env.observation_space.shape[0]
@@ -53,7 +52,6 @@ def __init__(self,
         if low is not None:
             assert len(low) == env.observation_space.shape[0]
             self.low = low
-
         super().__init__(env, self.low, self.high, ConstraintInputType.STATE,  **kwargs)
 
     def get_value(self, env):
@@ -66,11 +64,9 @@ def get_value(self, env):
             ndarray: The evaluation of the constraint.
 
         """
-
         return np.squeeze(self.sym_func(env.state))
 
 
-
 class QuadrotorDiagConstraint(Constraint):
     """Constrain the quadrotor's commanded input to the action space bounds.
 
@@ -91,7 +87,6 @@ def __init__(self, env):
             self.b = 1.1
         else:
             raise NotImplementedError
-
         self.sym_func = lambda x: self.h @ x - self.b
 
     def get_symbolic_model(self, env):
@@ -132,6 +127,7 @@ def is_violated(self, env):
         flag = np.any(np.greater(c_value, 0.))
         return flag
 
+
 class QuadrotorInputConstraint(BoundedConstraint):
     """Constrain the quadrotor's commanded input to the action space bounds.
 
@@ -146,8 +142,8 @@ def __init__(self,
 
        Args:
             env (BenchmarkEnv): The environment to constrain.
-            low (list): to overwrite the environment minimums
-            high (list): To overwrite the environment maximums
+            low (list): to overwrite the environment minimums.
+            high (list): To overwrite the environment maximums.
 
         """
         if high is None:
@@ -155,14 +151,11 @@ def __init__(self,
         else:
             assert len(high) == env.action_space.shape[0]
             self.high = high
-
         if low is None:
             self.low = np.zeros(int(env.QUAD_TYPE))
         else:
-
             assert len(low) == env.action_space.shape[0]
             self.low = low
-
         super().__init__(env, self.low, self.high, ConstraintInputType.INPUT, **kwargs)
 
     def get_value(self, env):
diff --git a/safe_control_gym/math_and_models/distributions.py b/safe_control_gym/math_and_models/distributions.py
index b0057128b..bf2507451 100644
--- a/safe_control_gym/math_and_models/distributions.py
+++ b/safe_control_gym/math_and_models/distributions.py
@@ -1,15 +1,15 @@
 """Probability distributions for PyTorch. 
 
-Adapted from  https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py
+Based on https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py
 
-Todo:
-    * missing SoftCategorical (differentiable Categorical).
 """
 import torch
 
 
 class Normal(torch.distributions.Normal):
-    """Multivariate Gaussian distribution given mean and std tensors."""
+    """Multivariate Gaussian distribution given mean and std tensors.
+
+    """
 
     def log_prob(self, actions):
         """Log probability of actions given current distribution.
@@ -33,12 +33,16 @@ def entropy(self):
         return super().entropy().sum(-1)
 
     def mode(self):
-        """Mode (max probability point) of current distribution."""
+        """Mode (max probability point) of current distribution.
+
+        """
         return self.mean
 
 
 class Categorical(torch.distributions.Categorical):
-    """Categorical distribution given class probabilities or logits, not differentiable."""
+    """Categorical distribution given class probabilities or logits, not differentiable.
+
+    """
 
     def sample(self):
         """Sample from the current distribution.
@@ -59,9 +63,10 @@ def log_prob(self, actions):
             (torch.FloatTensor): shape (batch, 1).
 
         """
-        return (super().log_prob(actions.squeeze(-1)).view(
-            actions.size(0), -1).sum(-1).unsqueeze(-1))
+        return (super().log_prob(actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1))
 
     def mode(self):
-        """Mode (max probability point) of current distribution."""
+        """Mode (max probability point) of current distribution.
+
+        """
         return self.probs.argmax(dim=-1, keepdim=True)
diff --git a/safe_control_gym/math_and_models/neural_networks.py b/safe_control_gym/math_and_models/neural_networks.py
index 4c28aaf9b..1b92c69cb 100644
--- a/safe_control_gym/math_and_models/neural_networks.py
+++ b/safe_control_gym/math_and_models/neural_networks.py
@@ -1,13 +1,5 @@
 """Neural networks.
 
-This module demonstrates documentation as specified by the `Google Python
-Style Guide`_. Docstrings may extend over multiple lines. Sections are created
-with a section header and a colon followed by a block of indented text.
-
-Todo:
-    * For module TODOs
-    * You have to also use ``sphinx.ext.todo`` extension
-
 """
 import numpy as np
 import torch
@@ -16,37 +8,16 @@
 
 
 def get_activation(name):
-    """Example function with types documented in the docstring.
-
-    `PEP 484`_ type annotations are supported. If attribute, parameter, and
-    return types are annotated according to `PEP 484`_, they do not need to be
-    included in the docstring:
-
-    Args:
-        name (int): The first parameter.
-
-    Returns:
-        bool: The return value. True for success, False otherwise.
+    """
 
     """
     return getattr(F, name) if name else lambda x: x
 
 
 def init_(module):
-    """Example function with types documented in the docstring.
-
-    `PEP 484`_ type annotations are supported. If attribute, parameter, and
-    return types are annotated according to `PEP 484`_, they do not need to be
-    included in the docstring:
-
-    Args:
-        module (int): The first parameter.
-
-    Returns:
-        bool: The return value. True for success, False otherwise.
+    """
 
     """
-    # could have different gains with different activations
     nn.init.orthogonal_(module.weight.data, gain=1)
     nn.init.constant_(module.bias.data, 0)
     return module
@@ -55,18 +26,6 @@ def init_(module):
 class MLP(nn.Module):
     """MLP network (can be used as value or policy).
 
-    If the class has public attributes, they may be documented here
-    in an ``Attributes`` section and follow the same formatting as a
-    function's ``Args`` section. Alternatively, attributes may be documented
-    inline with the attribute's declaration (see __init__ method below).
-
-    Properties created with the ``@property`` decorator should be documented
-    in the property's getter method.
-
-    Attributes:
-        attr1 (str): Description of `attr1`.
-        attr2 (:obj:`int`, optional): Description of `attr2`.
-
     """
 
     def __init__(self,
@@ -76,8 +35,9 @@ def __init__(self,
                  act="relu",
                  output_act=None,
                  init_weights=False,
-                 **kwargs):
-        """Mlti-layer perception / fully-connected network.
+                 **kwargs
+                 ):
+        """Multi-layer perception/fully-connected network.
 
         Args:
             input_dim (int): input dimension.
@@ -90,7 +50,6 @@ def __init__(self,
         super(MLP, self).__init__()
         dims = [input_dim] + hidden_dims + [output_dim]
         init_func = init_ if init_weights else lambda x: x
-
         self.fcs = nn.ModuleList([
             init_func(nn.Linear(dims[i], dims[i + 1]))
             for i in range(len(dims) - 1)
@@ -99,17 +58,7 @@ def __init__(self,
         self.output_act = get_activation(output_act)
 
     def forward(self, x):
-        """Class methods are similar to regular functions.
-
-        Note:
-            Do not include the `self` parameter in the ``Args`` section.
-
-        Args:
-            param1: The first parameter.
-            param2: The second parameter.
-
-        Returns:
-            True if successful, False otherwise.
+        """
 
         """
         out = x
@@ -122,18 +71,6 @@ def forward(self, x):
 class CNN(nn.Module):
     """CNN network for encoding images.
 
-    If the class has public attributes, they may be documented here
-    in an ``Attributes`` section and follow the same formatting as a
-    function's ``Args`` section. Alternatively, attributes may be documented
-    inline with the attribute's declaration (see __init__ method below).
-
-    Properties created with the ``@property`` decorator should be documented
-    in the property's getter method.
-
-    Attributes:
-        attr1 (str): Description of `attr1`.
-        attr2 (:obj:`int`, optional): Description of `attr2`.
-
     """
 
     def __init__(self,
@@ -141,14 +78,9 @@ def __init__(self,
                  output_dim,
                  act="relu",
                  output_act="relu",
-                 **kwargs):
-        """Example of docstring on the __init__ method.
-
-        Note:
-            Do not include the `self` parameter in the ``Args`` section.
-
-        Args:
-            param1 (str): Description of `param1`.
+                 **kwargs
+                 ):
+        """
 
         """
         super(CNN, self).__init__()
@@ -157,24 +89,13 @@ def __init__(self,
             init_(nn.Conv2d(32, 64, 4, stride=2)),
             init_(nn.Conv2d(64, 32, 3, stride=1))
         ])
-        # too lazy for auto-calculate, do it with a test run instead
         conv_out_dim = 32 * 7 * 7
         self.fc = init_(nn.Linear(conv_out_dim, output_dim))
         self.act = get_activation(act)
         self.output_act = get_activation(output_act)
 
     def forward(self, x, normalize=False):
-        """Class methods are similar to regular functions.
-
-        Note:
-            Do not include the `self` parameter in the ``Args`` section.
-
-        Args:
-            param1: The first parameter.
-            param2: The second parameter.
-
-        Returns:
-            True if successful, False otherwise.
+        """
 
         """
         out = x / 255.0 if normalize else x
@@ -188,31 +109,14 @@ def forward(self, x, normalize=False):
 class RNN(nn.Module):
     """RNN network (can be used as value or policy).
 
-    If the class has public attributes, they may be documented here
-    in an ``Attributes`` section and follow the same formatting as a
-    function's ``Args`` section. Alternatively, attributes may be documented
-    inline with the attribute's declaration (see __init__ method below).
-
-    Properties created with the ``@property`` decorator should be documented
-    in the property's getter method.
-
-    Attributes:
-        attr1 (str): Description of `attr1`.
-        attr2 (:obj:`int`, optional): Description of `attr2`.
-
     """
 
     def __init__(self,
                  input_dim,
                  output_dim,
-                 **kwargs):
-        """Example of docstring on the __init__ method.
-
-        Note:
-            Do not include the `self` parameter in the ``Args`` section.
-
-        Args:
-            param1 (str): Description of `param1`.
+                 **kwargs
+                 ):
+        """
 
         """
         super(RNN, self).__init__()
@@ -224,20 +128,7 @@ def __init__(self,
                 nn.init.orthogonal_(param)
 
     def forward(self, x, hxs, masks):
-        """Run the RNN taking account in masking.
-
-        The first condition applies during exploraion or evaluation, the second 
-        condition applies during training/action evaluation.
-
-        Note:
-            Do not include the `self` parameter in the ``Args`` section.
-
-        Args:
-            param1: The first parameter.
-            param2: The second parameter.
-
-        Returns:
-            True if successful, False otherwise.
+        """
 
         """
         if x.size(0) == hxs.size(0):
@@ -268,7 +159,6 @@ def forward(self, x, hxs, masks):
                 # We can now process steps that don't have any zeros in masks together!
                 start_idx = has_zeros[i]
                 end_idx = has_zeros[i + 1]
-
                 rnn_scores, hxs = self.gru(
                     x[start_idx:end_idx], (hxs * masks[start_idx]).unsqueeze(0))
                 outputs.append(rnn_scores)
diff --git a/safe_control_gym/math_and_models/normalization.py b/safe_control_gym/math_and_models/normalization.py
index 3c30072d0..4893210d0 100644
--- a/safe_control_gym/math_and_models/normalization.py
+++ b/safe_control_gym/math_and_models/normalization.py
@@ -1,29 +1,22 @@
 """Perform normalization on inputs or rewards.
 
-References:
-    * github.com/ShangtongZhang/DeepRL/blob/932ea88082e0194126b87742bd4a28c4599aa1b8/deep_rl/utils/normalizer.py#L69
-    * github.com/DLR-RM/stable-baselines3/blob/d7c6aff2523122aebf1e70210b2c56642e0b2735/stable_baselines3/common/vec_env/vec_normalize.py#L138
-    * github.com/openai/large-scale-curiosity/blob/e0a698676d19307a095cd4ac1991c4e4e70e56fb/cppo_agent.py#L226
-
-Todo:
-    * 
-    
 """
 import numpy as np
 import torch
+
 from gym.spaces import Box
 
 
 def normalize_angle(x):
-    """Wraps input angle to [-pi, pi]."""
+    """Wraps input angle to [-pi, pi].
+
+    """
     return ((x + np.pi) % (2 * np.pi)) - np.pi
 
 
 class RunningMeanStd():
     """Calulates the running mean and std of a data stream.
     
-    Reference: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
-    
     Attributes:
         mean (np.array): mean of data stream.
         var (np.array): variance of data stream.
@@ -56,15 +49,15 @@ def update(self, arr):
         self.update_from_moments(batch_mean, batch_var, batch_count)
 
     def update_from_moments(self, batch_mean, batch_var, batch_count):
-        """Util function for `update` method."""
+        """Util function for `update` method.
+
+        """
         delta = batch_mean - self.mean
         tot_count = self.count + batch_count
-
         new_mean = self.mean + delta * batch_count / tot_count
         m_a = self.var * self.count
         m_b = batch_var * batch_count
-        m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (
-            self.count + batch_count)
+        m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
         new_var = m_2 / (self.count + batch_count)
         new_count = batch_count + self.count
         self.mean = new_mean
@@ -73,7 +66,7 @@ def update_from_moments(self, batch_mean, batch_var, batch_count):
 
 
 class BaseNormalizer(object):
-    """Template for normalizers, also default.
+    """Template/default normalizer.
 
     Attributes:
         read_only (bool): if to freeze the current stats being tracked.
@@ -90,20 +83,28 @@ def unset_read_only(self):
         self.read_only = False
 
     def __call__(self, x, *args, **kwargs):
-        """Invokes normalization on the given input."""
+        """Invokes normalization on the given input.
+
+        """
         return x
 
     def state_dict(self):
-        """Returns snapshot of current stats."""
+        """Returns snapshot of current stats.
+
+        """
         return {}
 
     def load_state_dict(self, _):
-        """Restores the stats from a snapshot."""
+        """Restores the stats from a snapshot.
+
+        """
         pass
 
 
 class MeanStdNormalizer(BaseNormalizer):
-    """Normalize by the running average."""
+    """Normalize by the running average.
+
+    """
 
     def __init__(self, shape=(), read_only=False, clip=10.0, epsilon=1e-8):
         """Initializes the data stream tracker. 
@@ -122,7 +123,9 @@ def __init__(self, shape=(), read_only=False, clip=10.0, epsilon=1e-8):
         self.epsilon = epsilon
 
     def __call__(self, x):
-        """Update tracker given data, optionally normalize the data."""
+        """Update tracker given data, optionally normalize the data.
+
+        """
         x = np.asarray(x)
         if not self.read_only:
             self.rms.update(x)
@@ -144,10 +147,9 @@ class RewardStdNormalizer(MeanStdNormalizer):
     Papers:
         * arxiv.org/pdf/1808.04355.pdf 
         * arxiv.org/pdf/1810.12894.pdf 
-    Implementations:
-        * github.com/openai/baselines/blob/ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/common/vec_env/vec_normalize.py
-        * github.com/openai/large-scale-curiosity/blob/e0a698676d19307a095cd4ac1991c4e4e70e56fb/cppo_agent.py#L226
-    See github.com/openai/baselines/issues/538 for a dicussion on its necessity
+
+    Also see:
+        * github.com/openai/baselines/issues/538
 
     """
 
@@ -167,25 +169,25 @@ def __init__(self, gamma=0.99, read_only=False, clip=10.0, epsilon=1e-8):
         self.ret = None
 
     def __call__(self, x, dones):
-        """Update tracker given reward, optionally normalize the reward (only scaling)."""
-        x = np.asarray(x)
+        """Update tracker given reward, optionally normalize the reward (only scaling).
 
+        """
+        x = np.asarray(x)
         if not self.read_only:
             # Track running average of forward discounted returns.
             if self.ret is None:
                 self.ret = np.zeros(x.shape[0])
-
             self.ret = self.ret * self.gamma + x
             self.rms.update(self.ret)
             # Prevent information leak from previous episodes.
             self.ret[dones.astype(np.long)] = 0
-
-        return np.clip(x / np.sqrt(self.rms.var + self.epsilon), -self.clip,
-                       self.clip)
+        return np.clip(x / np.sqrt(self.rms.var + self.epsilon), -self.clip, self.clip)
 
 
 class RescaleNormalizer(BaseNormalizer):
-    """Apply constant scaling."""
+    """Apply constant scaling.
+
+    """
 
     def __init__(self, coef=1.0):
         """Initializes with fixed scaling constant.
@@ -198,32 +200,41 @@ def __init__(self, coef=1.0):
         self.coef = coef
 
     def __call__(self, x):
-        """Scale the input."""
+        """Scale the input.
+
+        """
         if not isinstance(x, torch.Tensor):
             x = np.asarray(x)
         return self.coef * x
 
 
 class ImageNormalizer(RescaleNormalizer):
-    """Scale image pixles from [0,255] to [0,1]"""
+    """Scale image pixles from [0,255] to [0,1].
+
+    """
 
     def __init__(self):
         super().__init__(self, 1.0 / 255)
 
 
 class ActionUnnormalizer(BaseNormalizer):
-    """Assumes policy output action is in [-1,1], unnormalize it for gym env."""
+    """Assumes policy output action is in [-1,1], unnormalize it for gym env.
+
+    """
 
     def __init__(self, action_space):
-        """Defines the mean and std for the bounded action space."""
+        """Defines the mean and std for the bounded action space.
+
+        """
         super().__init__()
-        assert isinstance(action_space,
-                          Box), "action space must be gym.spaces.Box"
+        assert isinstance(action_space, Box), "action space must be gym.spaces.Box"
         low, high = action_space.low, action_space.high
         self.mean = (low + high) / 2.0
         self.std = (high - low) / 2.0
 
     def __call__(self, action):
-        """Unnormalizes given input action."""
+        """Unnormalizes given input action.
+
+        """
         x = np.asarray(action)
         return self.mean + x * self.std
diff --git a/safe_control_gym/math_and_models/symbolic_systems.py b/safe_control_gym/math_and_models/symbolic_systems.py
index 71d70cf3e..286cffd7e 100644
--- a/safe_control_gym/math_and_models/symbolic_systems.py
+++ b/safe_control_gym/math_and_models/symbolic_systems.py
@@ -1,12 +1,4 @@
-"""Symbolic Model .
-
-This module demonstrates documentation as specified by the `Google Python
-Style Guide`_. Docstrings may extend over multiple lines. Sections are created
-with a section header and a colon followed by a block of indented text.
-
-Todo:
-    * For module TODOs
-    * You have to also use ``sphinx.ext.todo`` extension
+"""Symbolic Models.
 
 """
 import numpy as np
@@ -17,16 +9,12 @@ class SymbolicModel():
     """Implements the dynamics model with symbolic variables.
 
     x_dot = f(x,u), y = g(x,u), with other pre-defined, symbolic functions
-    (e.g. cost, constraints), can serve as priors for the controller.
+    (e.g. cost, constraints), serve as priors for the controllers.
 
-    Attributes:
-        attr1 (str): Description of `attr1`.
-        attr2 (:obj:`int`, optional): Description of `attr2`.
-    
     Notes:
         * naming convention on symbolic variable and functions. 
-        * for single-letter symbol, use {}_sym, otherwise use underscore for delimiter 
-        * for symbolic functions to be exposed, use {}_func
+            * for single-letter symbol, use {}_sym, otherwise use underscore for delimiter.
+            * for symbolic functions to be exposed, use {}_func.
 
     """
 
@@ -36,25 +24,10 @@ def __init__(self,
                  dt=1e-3,
                  integration_algo='cvodes',
                  funcs=None):
-        """Example of docstring on the __init__ method.
-
-        The __init__ method may be documented in either the class level
-        docstring, or as a docstring on the __init__ method itself.
-
-        Either form is acceptable, but the two should not be mixed. Choose one
-        convention to document the __init__ method and be consistent with it.
-
-        Note:
-            Do not include the `self` parameter in the ``Args`` section.
-
-        Args:
-            param1 (str): Description of `param1`.
-            param2 (:obj:`int`, optional): Description of `param2`. Multiple
-                lines are supported.
-            param3 (:obj:`list` of :obj:`str`): Description of `param3`.
+        """
 
         """
-        # setup for dynamics
+        # Setup for dynamics.
         self.x_sym = dynamics["vars"]["X"]
         self.u_sym = dynamics["vars"]["U"]
         self.x_dot = dynamics["dyn_eqn"]
@@ -62,57 +35,49 @@ def __init__(self,
             self.y_sym = self.x_sym
         else:
             self.y_sym = dynamics["obs_eqn"]
-
-        # sampling time
+        # Sampling time.
         self.dt = dt
-
-        # integration algorithm
+        # Integration algorithm.
         self.integration_algo = integration_algo
-
-        # other symbolic functions
+        # Other symbolic functions.
         if funcs is not None:
             for name, func in funcs.items():
                 assert name not in self.__dict__
                 self.__dict__[name] = func
-
-        # variable dimensions
+        # Variable dimensions.
         self.nx = self.x_sym.shape[0]
         self.nu = self.u_sym.shape[0]
         self.ny = self.y_sym.shape[0]
-
-        # setup cost function
+        # Setup cost function.
         self.cost_func = cost["cost_func"]
-
         print(self.cost_func)
         self.Q = cost["vars"]["Q"]
         self.R = cost["vars"]["R"]
         self.Xr = cost["vars"]["Xr"]
         self.Ur = cost["vars"]["Ur"]
-
-        # setup symbolic model
+        # Setup symbolic model.
         self.setup_model()
-
-        # setup Jacobian and Hessian of the dynamics and cost functions
+        # Setup Jacobian and Hessian of the dynamics and cost functions.
         self.setup_linearization()
 
     def setup_model(self):
-        """Exposes functions to evaluate the model."""
-        # Continuous time dynamics.
-        self.fc_func = cs.Function('fc', [self.x_sym, self.u_sym], [self.x_dot],
-                                   ['x', 'u'], ['f'])
-        # discrete time dynamics
-        self.fd_func = cs.integrator('fd', self.integration_algo, {
-            'x': self.x_sym,
-            'p': self.u_sym,
-            'ode': self.x_dot
-        }, {'tf': self.dt})
+        """Exposes functions to evaluate the model.
 
+        """
+        # Continuous time dynamics.
+        self.fc_func = cs.Function('fc', [self.x_sym, self.u_sym], [self.x_dot], ['x', 'u'], ['f'])
+        # Discrete time dynamics.
+        self.fd_func = cs.integrator('fd', self.integration_algo, {'x': self.x_sym,
+                                                                   'p': self.u_sym,
+                                                                   'ode': self.x_dot}, {'tf': self.dt}
+                                    )
         # Observation model.
-        self.g_func = cs.Function('g', [self.x_sym, self.u_sym], [self.y_sym],
-                                  ['x', 'u'], ['g'])
+        self.g_func = cs.Function('g', [self.x_sym, self.u_sym], [self.y_sym], ['x', 'u'], ['g'])
 
     def setup_linearization(self):
-        """Exposes functions for the linearized model."""
+        """Exposes functions for the linearized model.
+
+        """
         # Jacobians w.r.t state & input.
         self.dfdx = cs.jacobian(self.x_dot, self.x_sym)
         self.dfdu = cs.jacobian(self.x_dot, self.u_sym)
@@ -124,11 +89,9 @@ def setup_linearization(self):
         self.dg_func = cs.Function('dg', [self.x_sym, self.u_sym],
                                    [self.dgdx, self.dgdu], ['x', 'u'],
                                    ['dgdx', 'dgdu'])
-
         # Evaluation point for linearization.
         self.x_eval = cs.MX.sym('x_eval', self.nx, 1)
         self.u_eval = cs.MX.sym('u_eval', self.nu, 1)
-
         # Linearized dynamics model.
         self.x_dot_linear = self.x_dot + self.dfdx @ (
             self.x_eval - self.x_sym) + self.dfdu @ (self.u_eval - self.u_sym)
@@ -141,26 +104,20 @@ def setup_linearization(self):
                 'p': cs.vertcat(self.u_eval, self.x_sym, self.u_sym),
                 'ode': self.x_dot_linear
             }, {'tf': self.dt})
-
         # Linearized observation model.
         self.y_linear = self.y_sym + self.dgdx @ (
             self.x_eval - self.x_sym) + self.dgdu @ (self.u_eval - self.u_sym)
         self.g_linear_func = cs.Function(
             'g_linear', [self.x_eval, self.u_eval, self.x_sym, self.u_sym],
             [self.y_linear], ['x_eval', 'u_eval', 'x', 'u'], ['g_linear'])
-
-        # Jacobian and Hessian of cost function
+        # Jacobian and Hessian of cost function.
         self.l_x = cs.jacobian(self.cost_func, self.x_sym)
         self.l_xx = cs.jacobian(self.l_x, self.x_sym)
         self.l_u = cs.jacobian(self.cost_func, self.u_sym)
         self.l_uu = cs.jacobian(self.l_u, self.u_sym)
         self.l_xu = cs.jacobian(self.l_x, self.u_sym)
-
         l_inputs = [self.x_sym, self.u_sym, self.Xr, self.Ur, self.Q, self.R]
         l_inputs_str = ['x', 'u', 'Xr', 'Ur', 'Q', 'R']
-        l_outputs = [
-            self.cost_func, self.l_x, self.l_xx, self.l_u, self.l_uu, self.l_xu
-        ]
+        l_outputs = [self.cost_func, self.l_x, self.l_xx, self.l_u, self.l_uu, self.l_xu]
         l_outputs_str = ['l', 'l_x', 'l_xx', 'l_u', 'l_uu', 'l_xu']
-        self.loss = cs.Function('loss', l_inputs, l_outputs, l_inputs_str,
-                                l_outputs_str)
+        self.loss = cs.Function('loss', l_inputs, l_outputs, l_inputs_str, l_outputs_str)
diff --git a/safe_control_gym/utils/configuration.py b/safe_control_gym/utils/configuration.py
index 8907cf5f1..efcba660b 100644
--- a/safe_control_gym/utils/configuration.py
+++ b/safe_control_gym/utils/configuration.py
@@ -1,3 +1,6 @@
+"""Configuration utilities.
+
+"""
 import argparse
 from dict_deep import deep_set
 import os
@@ -9,7 +12,9 @@
 
 
 class ConfigFactory:
-    """Manager class that's in charge of experiment configs."""
+    """Manager class that's in charge of experiment configs.
+
+    """
 
     def __init__(self):
         self.parser = argparse.ArgumentParser(description="Benchmark")
@@ -23,18 +28,21 @@ def __init__(self):
         )
 
     def add_argument(self, *args, **kwargs):
-        """Extends to new arguments."""
+        """Extends to new arguments.
+
+        """
         self.parser.add_argument(*args, **kwargs)
 
     def add_arguments(self):
-        """Registers base arguments (for experiment bookkeeping)."""
+        """Registers base arguments (for experiment bookkeeping).
+
+        """
         self.add_argument("--tag", type=str, help='id of the experiment')
         self.add_argument("--seed", type=int, help="random seed, 0 is no seed")
         self.add_argument("--device", type=str, help="cpu or cuda(gpu)")
         self.add_argument("--output_dir", type=str, help="output saving folder")
         self.add_argument("--restore", type=str, help='folder to reload from')
-
-        # need to explicit provide from cmd (if train for the 1st time)
+        # Need to explicitly provide from command line (if training for the 1st time).
         self.add_argument("--algo", type=str, help='algorithm/controller')
         self.add_argument("--task", type=str, help='task/environment')
         self.add_argument("--overrides",
@@ -47,23 +55,22 @@ def add_arguments(self):
                           help="override key-value pairs")
 
     def merge(self):
-        """Creates experiment config object from command line and config files."""
+        """Creates experiment config object from command line and config files.
+
+        """
         config_dict = self.base_dict
         args, _ = self.parser.parse_known_args()
-
         if args.restore:
-            # restore for continual training or evaluation
+            # Restore for continual training or evaluation.
             restore_path = os.path.join(args.restore, "config.yaml")
             config_dict.update(read_file(restore_path))
         elif args.algo and args.task:
-            # start fresh training
+            # Start fresh training.
             config_dict["algo_config"] = get_config(args.algo)
             config_dict["task_config"] = get_config(args.task)
         else:
             warnings.warn("No agent/task config given.")
-
-        # experiment-specific overrides
-        # e.g. specific training hyperparameters or testing conditions
+        # Experiment-specific overrides, e.g. training hyperparameters.
         if args.overrides:
             for f in args.overrides:
                 merge_dict(config_dict, read_file(f))
@@ -71,18 +78,14 @@ def merge(self):
             kv_dict = {}
             for kv in args.kv_overrides:
                 k, v = kv.split("=")
-                try:
-                    # string as a python expression
-                    v = eval(v)
+                try:   
+                    v = eval(v)  # String as a python expression.
                 except:
-                    # normal python string
-                    pass
+                    pass  # Normal python string.
                 deep_set(kv_dict, k.strip(), v)
             merge_dict(config_dict, kv_dict)
-
-        # commind line overrides (e.g. retains `restore` field)
+        # Command line overrides (e.g. retains `restore` field).
         cmdline_dict = {k: v for k, v in args.__dict__.items() if v is not None}
         config_dict.update(cmdline_dict)
-
-        # allow attribute-style access.
+        # Allow attribute-style access.
         return munch.munchify(config_dict)
diff --git a/safe_control_gym/utils/logging.py b/safe_control_gym/utils/logging.py
index 58e21c038..be73ab426 100644
--- a/safe_control_gym/utils/logging.py
+++ b/safe_control_gym/utils/logging.py
@@ -1,78 +1,70 @@
 """Logging utilities.
 
-Todo:
-    * 
-
 """
 from collections import defaultdict
 import logging
 import os
 import numpy as np
-
 import imageio
-from torch.utils.tensorboard import SummaryWriter
 
-# -----------------------------------------------------------------------------------
-#                   Terminal Logger
-# -----------------------------------------------------------------------------------
+from torch.utils.tensorboard import SummaryWriter
 
 
 class StdoutLogger:
-    """Channel print content to std out and log file."""
+    """Channel print content to std out and log file.
+
+    """
 
     def __init__(self, logger_name, log_dir, level=logging.INFO):
         logger = logging.getLogger(logger_name)
         formatter = logging.Formatter('%(asctime)s : %(message)s')
-
         # Log to file ('w' to overwrite, 'a' to keep appending).
         log_file = os.path.join(log_dir, "std_out.txt")
         file_handler = logging.FileHandler(log_file, mode='a')
         file_handler.setFormatter(formatter)
-
         # Log to std out.
         stream_handler = logging.StreamHandler()
         stream_handler.setFormatter(formatter)
         logger.setLevel(level)
         logger.addHandler(file_handler)
         logger.addHandler(stream_handler)
-
         self.logger = logger
         self.file_handler = file_handler
 
     def info(self, msg):
-        """Chain print message to logger."""
+        """Chain print message to logger.
+
+        """
         self.logger.info(msg)
 
     def close(self):
-        """Free log file."""
-        self.file_handler.close()
+        """Free log file.
 
-
-# -----------------------------------------------------------------------------------
-#                   File Logger
-# -----------------------------------------------------------------------------------
+        """
+        self.file_handler.close()
 
 
 class FileLogger:
     """Logger for saving statistics and other outputs to text files.  
     
-    References:
-        * Michael Zhang's logger https://github.com/michaelrzhang/logger
+    Based on https://github.com/michaelrzhang/logger
     
     Initializes the log directory and creates log files given by name in arguments.
     Can be used to append future log values to each file.
+
     """
 
     def __init__(self, log_dir):
         # Creates folder for logging stats
         self.log_dir = os.path.join(log_dir, "logs")
         os.makedirs(self.log_dir, exist_ok=True)
-
         # name of stats being tracked
         self.log_names = []
 
     def init_logfile(self, name, xlabel="step"):
-        """Makes text file for logging the stat."""
+        """Makes text file for logging the stat.
+
+        """
         fname = self.get_log_fname(name)
         # Already exist due to restore.
         if os.path.exists(fname):
@@ -83,34 +75,37 @@ def init_logfile(self, name, xlabel="step"):
             log_file.write("{},{}\n".format(xlabel, name))
 
     def get_log_fname(self, name):
-        """Gets log file name for the stat."""
+        """Gets log file name for the stat.
+
+        """
         return os.path.join(self.log_dir, '{}.log'.format(name))
 
     def log(self, name, value, step):
-        """Logs the stat to its corresponding text file."""
+        """Logs the stat to its corresponding text file.
+
+        """
         if name not in self.log_names:
             # Initialize only if not done so already.
             self.init_logfile(name)
             self.log_names.append(name)
-
         fname = self.get_log_fname(name)
         with open(fname, 'a') as log_file:
             log_file.write("{},{}\n".format(step, value))
 
     def restore(self, step):
-        """Resets all log files to ignore lines after `step`."""
-        # find all stats log files
+        """Resets all log files to ignore lines after `step`.
+
+        """
+        # Find all stats log files.
         log_files = []
         for res, _, files in os.walk(self.log_dir):
             for each_file in files:
                 if ".log" in files:
                     log_files.append(os.path.join(res, each_file))
-
         for fname in log_files:
             with open(fname, "r") as file:
                 lines = file.readlines()
-
-            # find which line to start purging
+            # Find which line to start purging.
             stop_idx = None
             for i, each_line in enumerate(lines):
                 temp = each_line.strip().split(',')
@@ -119,42 +114,27 @@ def restore(self, step):
                 # Skip header.
                 if i == 0:
                     continue
-                # first invalid line
+                # First invalid line.
                 if idx > step:
                     break
-
-            # overwrite log file with only valid lines
+            # Overwrite log file with only valid lines.
             lines = lines[:stop_idx]
             with open(fname, "w") as file:
                 for each_line in lines:
                     file.write(each_line)
 
 
-# -----------------------------------------------------------------------------------
-#                   CSV Logger
-# -----------------------------------------------------------------------------------
-
-
-class CSVLogger:
-    """Logger for saving periodic stats to as csv file."""
-
-    def __init__(self):
-        pass
-
-
-# -----------------------------------------------------------------------------------
-#                   Main Logger
-# -----------------------------------------------------------------------------------
-
-
 class ExperimentLogger:
-    """A hybrid logger."""
+    """A hybrid logger.
+
+    """
 
     def __init__(self,
                  log_dir,
                  log_std_out=True,
                  log_file_out=False,
-                 use_tensorboard=False):
+                 use_tensorboard=False
+                 ):
         """Initializes loggers.
 
         Args:
@@ -162,29 +142,29 @@ def __init__(self,
             log_std_out (bool): if to save terminal logs.  
             log_file_out (bool): if to write data logs to text files.  
             use_tensorboard (bool): if to use tensorboard.
+
         """
         self.log_dir = log_dir
         os.makedirs(log_dir, exist_ok=True)
-        # container for a log period
+        # Container for a log period.
         self.stats_buffer = defaultdict(list)
-
         # Terminal logging.
         self.log_std_out = log_std_out
         if log_std_out:
             self.std_out_logger = StdoutLogger("Benchmark", log_dir)
-
         # Text file logging.
         self.log_file_out = log_file_out
         if log_file_out:
             self.file_logger = FileLogger(log_dir)
-
         # Tensorboard logging.
         self.use_tensorboard = use_tensorboard
         if use_tensorboard:
             self.tb_logger = SummaryWriter(log_dir=log_dir)
 
     def load(self, step):
-        """Resume from experiment, but ignores any logs after `step`."""
+        """Resume from experiment, but ignores any logs after `step`.
+
+        """
         if self.log_file_out:
             self.file_logger.restore(step)
         if self.use_tensorboard:
@@ -192,14 +172,18 @@ def load(self, step):
                                            purge_step=step)
 
     def close(self):
-        """Cleans up logging resources."""
+        """Cleans up logging resources.
+
+        """
         if self.log_std_out:
             self.std_out_logger.close()
         if self.use_tensorboard:
             self.tb_logger.close()
 
     def info(self, msg):
-        """Logs a message to std output."""
+        """Logs a message to std output.
+
+        """
         if self.log_std_out:
             self.std_out_logger.info(msg)
         else:
@@ -212,14 +196,16 @@ def add_scalar(self,
                    store=True,
                    write=True,
                    write_tb=True):
-        """Logs a scalar data."""
-        # add to buffer (to be logged to terminal)
+        """Logs a scalar data.
+
+        """
+        # Add to buffer (to be logged to terminal).
         if store:
             self.stats_buffer[name].append(val)
-        # log to text file
+        # Log to text file.
         if self.log_file_out and write:
             self.file_logger.log(name, val, step)
-        # log to tb
+        # Log to tensorboard.
         if self.use_tensorboard and write_tb:
             self.tb_logger.add_scalar(name, val, step)
 
@@ -230,69 +216,60 @@ def add_scalars(self,
                     store=True,
                     write=True,
                     write_tb=True):
-        """Logs a group of scalars."""
+        """Logs a group of scalars.
+
+        """
         assert isinstance(data, dict)
         for name, val in data.items():
-            # scalars under the same name group
+            # Scalars under the same name group.
             full_name = prefix + "/" + name if prefix else name
             self.add_scalar(full_name, val, step, store, write, write_tb)
 
     def dump_scalars(self):
         """Produce a summary of stats within the log period (from buffer).
 
-        References:
-            * https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/logger.py
-            * https://github.com/openai/spinningup/blob/master/spinup/utils/logx.py
-
         Currently only dump to terminal as a table summary, 
         can dump to a CSV file in the future, 
         but feels repetitive & less flexible than `add_scalar(..., write=True)`.
+
         """
         keys, values = [], []
         tag = None
-
-        # sorted keys are important for consistency betwen log steps !!!
+        # Important: sorted keys are important for consistency betwen log steps.
         for key, val_list in sorted(self.stats_buffer.items()):
             if len(val_list) == 1:
-                # align left
+                # Left align.
                 val_str = "{:<8.3g}".format(val_list[0])
             else:
                 val_np = np.asarray(val_list)
-                val_str = "{:.3f} +/- {:.3f}".format(val_np.mean(),
-                                                     val_np.std())
-
-            # Find tag and add it to the dict
+                val_str = "{:.3f} +/- {:.3f}".format(val_np.mean(), val_np.std())
+            # Find tag and add it to the dict.
             if key.find("/") > 0:
                 tag = key[:key.find("/") + 1]
                 trunc_tag = self._truncate(tag)
                 if trunc_tag not in keys:
                     keys.append(trunc_tag)
                     values.append("")
-            # Remove tag from key
+            # Remove tag from key.
             if tag is not None and tag in key:
                 key = str("   " + key[len(tag):])
-
             keys.append(self._truncate(key))
             values.append(self._truncate(val_str))
-
-        # Find max widths
+        # Find max widths.
         if len(keys) == 0:
             print("Tried to write empty key-value dict")
             return
         else:
             key_width = max(map(len, keys))
             val_width = max(map(len, values))
-
-        # Write out the data
+        # Write out the data.
         dashes = "-" * (key_width + val_width + 7)
         lines = [dashes]
         for key, value in zip(keys, values):
             key_space = " " * (key_width - len(key))
             val_space = " " * (val_width - len(value))
-            lines.append("| {}{} | {}{} |".format(key, key_space, value,
-                                                  val_space))
+            lines.append("| {}{} | {}{} |".format(key, key_space, value, val_space))
         lines.append(dashes)
-
         summary = "\n" + "\n".join(lines) + "\n"
         self.info(summary)
         self.stats_buffer.clear()
@@ -305,7 +282,7 @@ def _truncate(self, string, max_length=23):
 
     def log_video(self, name, video, fps=20):
         """Saves a video for evaluation, video: list of np.arrays of shape (H,W,C).
-        Reference: https://imageio.readthedocs.io/en/stable/format_gif-pil.html
+
         """
         vid_kargs = {'fps': fps}
         vid_name = '{}/{}'.format(self.log_dir, name)
diff --git a/safe_control_gym/utils/plotting.py b/safe_control_gym/utils/plotting.py
index 4182b10d7..5895f4e41 100644
--- a/safe_control_gym/utils/plotting.py
+++ b/safe_control_gym/utils/plotting.py
@@ -1,8 +1,5 @@
 """Plotting utilities.
 
-Todo:
-    *
-    
 """
 from collections import defaultdict
 import os
@@ -13,12 +10,10 @@
 
 from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
 
-# -----------------------------------------------------------------------------------
-#                   Utils
-# -----------------------------------------------------------------------------------
 
 DIV_LINE_WIDTH = 50
 
+
 COLORS = [
     "blue",
     "green",
@@ -46,14 +41,15 @@
     "darkblue",
 ]
 
-# reference: https://matplotlib.org/stable/gallery/lines_bars_and_markers/linestyles.html
+
 LINE_STYLES = [
-    ('solid', 'solid'),  # Same as (0, ()) or '-'
-    ('dotted', 'dotted'),  # Same as (0, (1, 1)) or '.'
-    ('dashed', 'dashed'),  # Same as '--'
+    ('solid', 'solid'),
+    ('dotted', 'dotted'),
+    ('dashed', 'dashed'),
     ('dashdot', 'dashdot'),
 ]
 
+
 LINE_STYLES2 = [('loosely dotted', (0, (1, 10))), ('dotted', (0, (1, 1))),
                 ('densely dotted', (0, (1, 1))),
                 ('loosely dashed', (0, (5, 10))), ('dashed', (0, (5, 5))),
@@ -63,25 +59,32 @@
                 ('densely dashdotted', (0, (3, 1, 1, 1))),
                 ('dashdotdotted', (0, (3, 5, 1, 5, 1, 5))),
                 ('loosely dashdotdotted', (0, (3, 10, 1, 10, 1, 10))),
-                ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1)))]
+                ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1)))
+]
 
 
 def rolling_window(a, window):
-    """"""
+    """Window data.
+
+    """
     shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
     strides = a.strides + (a.strides[-1],)
     return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
 
 
 def window_func(x, y, window, func):
-    """"""
+    """Evaluate a function on windowed data.
+
+    """
     yw = rolling_window(y, window)
     yw_func = func(yw, axis=-1)
     return x[window - 1:], yw_func
 
 
 def filter_log_dirs(pattern, negative_pattern=" ", root="./log", **kwargs):
-    """Gets list of experiment folders as specified."""
+    """Gets list of experiment folders as specified.
+
+    """
     dirs = [item[0] for item in os.walk(root)]
     leaf_dirs = []
     for i in range(len(dirs)):
@@ -100,10 +103,12 @@ def filter_log_dirs(pattern, negative_pattern=" ", root="./log", **kwargs):
 
 
 def align_runs(xy_list, x_num_max=None):
-    """Aligns the max of the x data across runs"""
+    """Aligns the max of the x data across runs.
+
+    """
     x_max = float("inf")
     for x, y in xy_list:
-        # align length of x data (get min across all runs)
+        # Align length of x data (get min across all runs).
         x_max = min(x_max, len(x))
     if x_num_max:
         x_max = min(x_max, x_num_max)
@@ -112,7 +117,9 @@ def align_runs(xy_list, x_num_max=None):
 
 
 def smooth_runs(xy_list, window=10):
-    """Smooth the data curves by mean filtering."""
+    """Smooth the data curves by mean filtering.
+
+    """
     smoothed_list = [
         window_func(np.asarray(x), np.asarray(y), window, np.mean)
         for x, y in xy_list
@@ -121,7 +128,9 @@ def smooth_runs(xy_list, window=10):
 
 
 def select_runs(xy_list, criterion, top_k=0):
-    """Pickes the top k runs based on a criterion."""
+    """Pickes the top k runs based on a criterion.
+
+    """
     perf = [criterion(y) for _, y in xy_list]
     top_k_runs = np.argsort(perf)[-top_k:]
     selected_list = []
@@ -132,32 +141,31 @@ def select_runs(xy_list, criterion, top_k=0):
 
 
 def interpolate_runs(xy_list, interp_interval=100):
-    """Uses the same x data by interpolation across runs."""
+    """Uses the same x data by interpolation across runs.
+
+    """
     x_right = float("inf")
     for x, y in xy_list:
         x_right = min(x_right, x[-1])
-    # shape (data_len,)
+    # Shape: (data_len,).
     x = np.arange(0, x_right, interp_interval)
     y = []
     for x_, y_ in xy_list:
         y.append(np.interp(x, x_, y_))
-    # shape (num_runs, data_len)
+    # Shape: (num_runs, data_len).
     y = np.asarray(y)
     return x, y
 
 
-# -----------------------------------------------------------------------------------
-#                   Single Experiment Plotting
-# -----------------------------------------------------------------------------------
-
-
 def load_from_log_file(path):
-    """Return x, y sequence data from the stat csv."""
+    """Return x, y sequence data from the stat csv.
+
+    """
     with open(path, "r") as f:
         lines = f.readlines()
-    # x, y labels
+    # Labels.
     xk, yk = [k.strip() for k in lines[0].strip().split(",")]
-    # x, y values
+    # Values.
     x, y = [], []
     for l in lines[1:]:
         data = l.strip().split(",")
@@ -169,14 +177,16 @@ def load_from_log_file(path):
 
 
 def load_from_logs(log_dir):
-    """Return dict of stats under log_dir folder (`exp_dir/logs/`)"""
+    """Return dict of stats under log_dir folder (`exp_dir/logs/`).
+
+    """
     log_files = []
-    # fetch all log files
+    # Fetch all log files.
     for r, _, f in os.walk(log_dir):
         for file in f:
             if ".log" in file:
                 log_files.append(os.path.join(r, file))
-    # fetch all stats from log files
+    # Fetch all stats from log files.
     data = {}
     for path in log_files:
         name = path.split(log_dir)[-1].replace(".log", "")
@@ -193,18 +203,17 @@ def plot_from_logs(src_dir, out_dir, window=None, keys=None):
         out_dir (str): folder to save figures.
         window (int): window size for smoothing.
         keys (list): specify name of stats to plot, None means plot all.
+
     """
-    # find all logs
+    # Find all logs.
     log_files = []
     for r, _, f in os.walk(src_dir):
         for file in f:
             if ".log" in file:
                 log_files.append(os.path.join(r, file))
-
-    # make a figure for each log file
+    # Make a figure for each log file.
     stats = {}
     for path in log_files:
-        # name = os.path.basename(path).split(".")[0]
         name = path.split(src_dir)[-1].replace(".log", "")
         if keys:
             if name not in keys:
@@ -229,15 +238,11 @@ def plot_from_tensorboard_log(src_dir,
                               xlabel="step"):
     """Generates a plot for each stat from tfb log file in source folder.
     
-    References:
-        * https://stackoverflow.com/questions/36700404/tensorflow-opening-log-data-written-by-summarywriter
-        * https://github.com/tensorflow/tensorboard/blob/master/tensorboard/backend/event_processing/event_accumulator.py
     """
     event_acc = EventAccumulator(src_dir)
     event_acc.Reload()
     if not keys:
         keys = event_acc.Tags()["scalars"]
-
     stats = {}
     for k in keys:
         _, x, y = zip(*event_acc.Scalars(k))
@@ -250,17 +255,12 @@ def plot_from_tensorboard_log(src_dir,
         plt.title(k)
         plt.xlabel(xlabel)
         plt.ylabel(k)
-        # use "-" instead of "/" to connect group and stat name
+        # Use "-" instead of "/" to connect group and stat name.
         out_path = os.path.join(out_dir, k.replace("/", "-") + ".jpg")
         plt.savefig(out_path)
     return stats
 
 
-# -----------------------------------------------------------------------------------
-#                   Cross Experiment Plotting
-# -----------------------------------------------------------------------------------
-
-
 def plot_from_experiments(legend_dir_specs,
                           out_path="temp.jpg",
                           scalar_name=None,
@@ -270,7 +270,8 @@ def plot_from_experiments(legend_dir_specs,
                           window=None,
                           x_num_max=None,
                           num_std=1,
-                          use_tb_log=True):
+                          use_tb_log=True
+                          ):
     """Generates plot among algos, each with several seed runs.
     
     Example: 
@@ -297,14 +298,14 @@ def plot_from_experiments(legend_dir_specs,
             ylabel="Reward",
             window=10
         )
+
     """
     assert scalar_name is not None, "Must provide a scalar name to plot"
-
-    # get all stats
+    # Get all stats.
     stats = defaultdict(list)
     for l, dirs in legend_dir_specs.items():
         for d in dirs:
-            # pick from either log source (tensorboard or log text files)
+            # Pick from either log source (tensorboard or log text files).
             if use_tb_log:
                 event_acc = EventAccumulator(d)
                 event_acc.Reload()
@@ -313,33 +314,29 @@ def plot_from_experiments(legend_dir_specs,
             else:
                 path = os.path.join(d, "logs", scalar_name + ".log")
                 _, x, _, y = load_from_log_file(path)
-
-            # smoothing
+            # Smoothing.
             x, y = np.asarray(x), np.asarray(y)
             if window:
                 x, y = window_func(x, y, window, np.mean)
             stats[l].append([x, y])
-
-    # post-processing
+    # Post-processing.
     x_max = float("inf")
     for _, runs in stats.items():
         for x, y in runs:
-            # align length of x data (get min across all runs & all algos)
+            # Align length of x data (get min across all runs & all algos).
             x_max = min(x_max, len(x))
     if x_num_max:
         x_max = min(x_max, x_num_max)
-
     processed_stats = {}
     for name, runs in stats.items():
-        # use same x for all runs to an algo
+        # Use same x for all runs to an algorithm.
         x = np.array([x[:x_max] for x, _ in runs])[0]
-        # different y for different runs
+        # Different y for different runs.
         y = np.stack([y[:x_max] for _, y in runs])
         y_mean = np.mean(y, axis=0)
         y_std = np.std(y, axis=0)
         processed_stats[name] = [x, y_mean, y_std]
-
-    # actual plot
+    # Actual plot.
     plt.clf()
     for i, name in enumerate(processed_stats.keys()):
         color = COLORS[i]
@@ -354,18 +351,17 @@ def plot_from_experiments(legend_dir_specs,
     plt.xlabel(xlabel)
     plt.ylabel(ylabel)
     plt.legend()
-
     plt.tight_layout()
     plt.savefig(out_path)
     plt.show()
     return stats, processed_stats
 
 
-def get_log_dirs(all_logdirs, select=None, exclude=None):
+def get_log_dirs(all_logdirs,
+                 select=None,
+                 exclude=None
+                 ):
     """Find all folders for plotting.
-    
-    Reference:
-        * https://github.com/openai/spinningup/blob/038665d62d569055401d91856abb287263096178/spinup/utils/plot.py 
 
     All 3 arguments can be exposed as list args from command line.
 
@@ -374,6 +370,7 @@ def get_log_dirs(all_logdirs, select=None, exclude=None):
            pull data from it; 
         2) if not, check to see if the entry is a prefix for a 
            real directory, and pull data from that.
+
     """
     logdirs = []
     for logdir in all_logdirs:
@@ -385,22 +382,17 @@ def get_log_dirs(all_logdirs, select=None, exclude=None):
             prefix = logdir.split(os.sep)[-1]
             listdir = os.listdir(basedir)
             logdirs += sorted([fulldir(x) for x in listdir if prefix in x])
-    """
-    Enforce selection rules, which check logdirs for certain substrings.
-    Makes it easier to look at graphs from particular ablations, if you
-    launch many jobs at once with similar names.
-    """
+    # Enforce selection rules, which check logdirs for certain substrings. Makes it easier to look
+    # at graphs from particular ablations, if you launch many jobs at once with similar names.
     if select is not None:
         logdirs = [log for log in logdirs if all(x in log for x in select)]
     if exclude is not None:
         logdirs = [
             log for log in logdirs if all(not (x in log) for x in exclude)
         ]
-
-    # Verify logdirs
+    # Verify logdirs.
     print('Plotting from...\n' + '=' * DIV_LINE_WIDTH + '\n')
     for logdir in logdirs:
         print(logdir)
     print('\n' + '=' * DIV_LINE_WIDTH)
-
     return logdirs
diff --git a/safe_control_gym/utils/registration.py b/safe_control_gym/utils/registration.py
index 71da1e8c9..d63e9560b 100644
--- a/safe_control_gym/utils/registration.py
+++ b/safe_control_gym/utils/registration.py
@@ -1,6 +1,6 @@
-"""Example Google style docstrings.
+"""Environments and controllers registration.
 
-Reference to https://github.com/openai/gym/blob/master/gym/envs/registration.py
+Based on https://github.com/openai/gym/blob/master/gym/envs/registration.py
 
 """
 import copy
@@ -13,7 +13,9 @@
 
 
 def load(name):
-    """Loads a callable from a string with format `module_path:callable_name`."""
+    """Loads a callable from a string with format `module_path:callable_name`.
+
+    """
     mod_name, attr_name = name.split(":")
     mod = importlib.import_module(mod_name)
     fn = getattr(mod, attr_name)
@@ -21,7 +23,9 @@ def load(name):
 
 
 class Spec():
-    """ A specification for a particular instance of the environment."""
+    """ A specification for a particular instance of the environment.
+
+    """
 
     def __init__(self, id, entry_point=None, config_entry_point=None):
         """Used in function "register".
@@ -37,11 +41,15 @@ def __init__(self, id, entry_point=None, config_entry_point=None):
         self.config_entry_point = config_entry_point
 
     def __repr__(self):
-        """Defines a class instance's string representation."""
+        """Defines a class instance's string representation.
+
+        """
         return "Spec({})".format(self.id)
 
     def get_config(self):
-        """Fetches config (as dict) for the registered class."""
+        """Fetches config (as dict) for the registered class.
+
+        """
         if isinstance(self.config_entry_point, str):
             if self.config_entry_point.endswith(".yaml"):
                 # Specified as file path.
@@ -52,18 +60,18 @@ def get_config(self):
                 # Specified as "module_path:config_dict_name".
                 config = load(self.config_entry_point)
         elif self.config_entry_point is None:
-            # no default config (provided as overwrites)
+            # No default config.
             config = {}
         else:
-            raise Exception("Config type {} is not supported.".format(
-                self.config_entry_point))
+            raise Exception("Config type {} is not supported.".format(self.config_entry_point))
         return config
 
     def make(self, *args, **kwargs):
-        """Instantiates an instance of the registered class with appropriate kwargs."""
+        """Instantiates an instance of the registered class with appropriate kwargs.
+
+        """
         if self.entry_point is None:
-            raise Exception('Attempting to make deprecated env {}.'.format(
-                self.id))
+            raise Exception('Attempting to make deprecated env {}.'.format(self.id))
         if callable(self.entry_point):
             obj = self.entry_point(*args, **kwargs)
         else:
@@ -79,67 +87,81 @@ def make(self, *args, **kwargs):
 
 
 class Registry():
-    """Register a callable by ID."""
+    """Register a callable by ID.
+
+    """
 
     def __init__(self):
         self.specs = {}
 
     def make(self, path, *args, **kwargs):
-        """Create an instance of the registered callable by id `path`."""
+        """Create an instance of the registered callable by id `path`.
+
+        """
         spec = self.specs[path]
         obj = spec.make(*args, **kwargs)
         return obj
 
     def all(self):
-        """Returns all registered callables."""
+        """Returns all registered callables.
+
+        """
         return self.specs.values()
 
     def spec(self, path):
-        """Returns spec of the registered callable by id."""
+        """Returns spec of the registered callable by id.
+
+        """
         if ':' in path:
             mod_name, id = path.split(':')
             try:
                 importlib.import_module(mod_name)
             except:
-                raise Exception(
-                    '''A module ({}) was specified for the environment but was not
-                                found, make sure the package is installed with `pip install`
-                                before calling `gym.make()`'''.format(mod_name))
+                raise Exception('''A module ({}) was specified for the environment but was not found,make sure the
+                                package is installed with `pip install` before calling `gym.make()`'''.format(mod_name))
         else:
             id = path
-
         try:
             return self.specs[id]
         except:
             raise Exception("Key not found in registry.")
 
     def register(self, id, **kwargs):
-        """Saves a reference to the callable with a id."""
+        """Saves a reference to the callable with a id.
+
+        """
         if id in self.specs:
             raise Exception('Cannot re-register id: {}'.format(id))
         self.specs[id] = Spec(id, **kwargs)
 
 
 def register(id, **kwargs):
-    """Saves the callable to the global registry."""
+    """Saves the callable to the global registry.
+
+    """
     return registry.register(id, **kwargs)
 
 
 def make(id, *args, **kwargs):
-    """Creates an instance of the callable from global registry."""
+    """Creates an instance of the callable from global registry.
+
+    """
     return registry.make(id, *args, **kwargs)
 
 
 def spec(id):
-    """Gets the spec of the callable from global registry."""
+    """Gets the spec of the callable from global registry.
+
+    """
     return registry.spec(id)
 
 
 def get_config(id):
-    """Gets the config of the callable from global registry."""
+    """Gets the config of the callable from global registry.
+
+    """
     return registry.spec(id).get_config()
 
 
-# Registry: global registry (must be a singleton).
-# used by functions `register`, `make`, `spec` and `get_config`.
+# Registry: global registry (singleton) used by functions `register`, `make`, `spec` and `get_config`.
 registry = Registry()
diff --git a/safe_control_gym/utils/utils.py b/safe_control_gym/utils/utils.py
index d10d41e58..15cacbb76 100644
--- a/safe_control_gym/utils/utils.py
+++ b/safe_control_gym/utils/utils.py
@@ -1,4 +1,6 @@
-"""Collection of utility functions."""
+"""Miscellaneous utility functions.
+
+"""
 import argparse
 import datetime
 import gym
@@ -9,21 +11,24 @@
 import sys
 import munch
 import yaml
-
 import imageio
 import numpy as np
 import torch
 
 
 def mkdirs(*paths):
-    """Makes a list of directories."""
+    """Makes a list of directories.
+
+    """
     for path in paths:
         if not os.path.exists(path):
             os.makedirs(path)
 
 
 def eval_token(token):
-    """Converts string token to int, float or str."""
+    """Converts string token to int, float or str.
+
+    """
     if token.isnumeric():
         return int(token)
     try:
@@ -33,10 +38,11 @@ def eval_token(token):
 
 
 def read_file(file_path, sep=","):
-    """Loads content from a file (json, yaml, csv, txt)
+    """Loads content from a file (json, yaml, csv, txt).
     
-    For json & yaml files returns a dict,
-    for csv & txt returns list of lines.
+    For json & yaml files returns a dict.
+    Ror csv & txt returns list of lines.
+
     """
     if len(file_path) < 1 or not os.path.exists(file_path):
         return None
@@ -61,7 +67,9 @@ def read_file(file_path, sep=","):
 
 
 def merge_dict(source_dict, update_dict):
-    """Merges updates into source recursively."""
+    """Merges updates into source recursively.
+
+    """
     for k, v in update_dict.items():
         if k in source_dict and isinstance(source_dict[k], dict) and isinstance(
                 v, dict):
@@ -71,14 +79,18 @@ def merge_dict(source_dict, update_dict):
 
 
 def get_time():
-    """Gets current timestamp (as string)."""
+    """Gets current timestamp (as string).
+
+    """
     start_time = datetime.datetime.now()
     time = str(start_time.strftime("%Y_%m_%d-%X"))
     return time
 
 
 def get_random_state():
-    """Snapshots the random state at any moment."""
+    """Snapshots the random state at any moment.
+
+    """
     return {
         "random": random.getstate(),
         "numpy": np.random.get_state(),
@@ -87,14 +99,18 @@ def get_random_state():
 
 
 def set_random_state(state_dict):
-    """Resets the random state for experiment restore."""
+    """Resets the random state for experiment restore.
+
+    """
     random.setstate(state_dict["random"])
     np.random.set_state(state_dict["numpy"])
     torch.torch.set_rng_state(state_dict["torch"])
 
 
 def set_seed(seed, cuda=False):
-    """General seeding function for reproducibility."""
+    """General seeding function for reproducibility.
+
+    """
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
@@ -106,29 +122,31 @@ def set_seed(seed, cuda=False):
 def set_dir_from_config(config):
     """Creates a output folder for experiment (and save config files).
     
-    naming format:
-        {root (e.g. results)}/{tag (exp id)}/{seed}_{timestamp}_{git commit id}
+    Naming format: {root (e.g. results)}/{tag (exp id)}/{seed}_{timestamp}_{git commit id}
+
     """
     # Make output folder.
     timestamp = datetime.datetime.now().strftime("%b-%d-%H-%M")
     commit_id = subprocess.check_output(
         ["git", "describe", "--tags", "--always"]).decode("utf-8").strip()
-    run_dir = "seed{}_{}_{}".format(str(config.seed), str(timestamp),
-                                    str(commit_id))
+    run_dir = "seed{}_{}_{}".format(str(config.seed),
+                                    str(timestamp),
+                                    str(commit_id)
+                                    )
     config.output_dir = os.path.join(config.output_dir, config.tag, run_dir)
     mkdirs(config.output_dir)
-
     # Save config.
     with open(os.path.join(config.output_dir, 'config.yaml'), "w") as file:
         yaml.dump(munch.unmunchify(config), file, default_flow_style=False)
-
     # Save command.
     with open(os.path.join(config.output_dir, 'cmd.txt'), 'a') as file:
         file.write(" ".join(sys.argv) + "\n")
 
 
 def set_seed_from_config(config):
-    """Sets seed, only set if nonzero, 0 seed default to no seeding."""
+    """Sets seed, only set if nonzero, 0 seed default to no seeding.
+
+    """
     seed = config.seed
     use_cuda = True if "cuda" in config.device else False
     if seed > 0:
@@ -136,7 +154,9 @@ def set_seed_from_config(config):
 
 
 def set_device_from_config(config):
-    """Sets device, using GPU is set to `cuda` for now, no specific GPU yet."""
+    """Sets device, using GPU is set to `cuda` for now, no specific GPU yet.
+
+    """
     use_cuda = (config.device == "cuda") and torch.cuda.is_available()
     config.device = "cuda" if use_cuda else "cpu"
 
@@ -148,6 +168,7 @@ def save_video(name, frames, fps=20):
         name (str): path name to save the video.
         frames (list): frames of the video as list of np.arrays.
         fps (int, optional): frames per second.
+
     """
     assert ".gif" in name or ".mp4" in name, "invalid video name"
     vid_kwargs = {'fps': fps}
@@ -164,6 +185,7 @@ def str2bool(val):
 
     Returns:
         bool: Interpretation of `val` as True or False.
+
     """
     if isinstance(val, bool):
         return val
@@ -172,15 +194,12 @@ def str2bool(val):
     elif val.lower() in ('no', 'false', 'f', 'n', '0'):
         return False
     else:
-        raise argparse.ArgumentTypeError(
-            "[ERROR] in str2bool(), a Boolean value is expected")
+        raise argparse.ArgumentTypeError("[ERROR] in str2bool(), a Boolean value is expected")
 
 
 def unwrap_wrapper(env, wrapper_class):
     """Retrieve a ``VecEnvWrapper`` object by recursively searching.
 
-    Reference:
-        * https://github.com/DLR-RM/stable-baselines3/blob/ddbe0e93f9fe55152f2354afd058b28e6ccc3345/stable_baselines3/common/env_util.py
     """
     env_tmp = env
     while isinstance(env_tmp, gym.Wrapper):
@@ -193,7 +212,5 @@ def unwrap_wrapper(env, wrapper_class):
 def is_wrapped(env, wrapper_class):
     """Check if a given environment has been wrapped with a given wrapper.
 
-    Reference:
-        * https://github.com/DLR-RM/stable-baselines3/blob/ddbe0e93f9fe55152f2354afd058b28e6ccc3345/stable_baselines3/common/env_util.py
     """
-    return unwrap_wrapper(env, wrapper_class) is not None
\ No newline at end of file
+    return unwrap_wrapper(env, wrapper_class) is not None