# NSGA2 Generator
This notebook demonstrates the use of the generator `NSGA2Generator` which implements the NSGA-II algorithm from [1]. We show how to set up the optimizer object, use it to solve a test problem, and show off some of the generator's output features. Running this notebook will generate files in a temporary directory on your computer. These files will be cleaned up at the end.

In [None]:
import json
import logging
import matplotlib.pyplot as plt
import os
import pandas as pd
import shutil
import tempfile

from xopt.generators.ga.nsga2 import (
    NSGA2Generator,
    PolynomialMutation,
    SimulatedBinaryCrossover,
)
from xopt.resources.test_functions.zdt import construct_zdt
from xopt import Xopt, Evaluator, VOCS

In [None]:
# The NSGA2Generator object emits logger messages
# Configure the logging module to output them to console
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Format for log messages
    datefmt="%Y-%m-%d %H:%M:%S",  # Date/time format
)

In [None]:
# Get the problem ZDT2 and create an Xopt evaluator
prob_vocs, prob_fun, _ = construct_zdt(30, 3)
ev = Evaluator(function=prob_fun)

### Generator Setup and Use
First we create the `NSGA2Generator` object, demonstrate some of its settings, and then use it to solve the ZDT3 test problem.

In [None]:
# Create the NSGA2 generator with default settings
generator = NSGA2Generator(
    vocs=prob_vocs,  # Must provide the problem's details
)

# Let's demonstrate controlling the generator's hyperparameters and settings
generator = NSGA2Generator(
    vocs=prob_vocs,  # Must provide the problem's details
    population_size=50,  # How many individuals in a population
    mutation_operator=PolynomialMutation(
        pm=1 / 30, eta_m=20
    ),  # Can manually specify mutation operator and hyperparameters
    crossover_operator=SimulatedBinaryCrossover(
        delta_1=0.5, delta_2=0.5, eta_c=20
    ),  # Similarly can specify crossover manually
    deduplicate_output=True,  # Performs deduplication of candidate individuals. Ensure unique outputs from generator.
)

In [None]:
# Run the optimizer for a few generations. Notice log output printed below this cell
ev.max_workers = generator.population_size
X = Xopt(generator=generator, evaluator=ev, vocs=prob_vocs)

for _ in range(3):
    X.step()

### Exploring Optimizer Output
We now take a look at the metadata associated with the optimizer run as well as its output.

In [None]:
%%time
# Run for longer with log output turned off
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

for _ in range(47):
    X.step()

In [None]:
# Inspect generator properties
print(
    f"Saw {generator.fevals} function evaluations"
)  # Number of function evaluations returned to generator
print(
    f"Completed {generator.n_generations} generations"
)  # Number of generations finished
print(
    f"Generated {generator.n_candidates} candidate solutions"
)  # Number of individuals generated

In [None]:
# All evaluations are stored in the following Dataframe. Speific to this generator, a `xopt_parent_generation` and
# `xopt_candidate_idx` columns which indicate from which generation the indvidual's parents belong to as well as providing a
# unique index for indviduals.
#
# NOTE: The data DataFrame is not stored when serializing the generator. It must be saved on its own for later use.
generator.data.head()

In [None]:
# Each population the optimizer has seen is stored by the unique indices of each individual.
print(generator.history_idx[-1][:16])  # Show the first few indices of last generation

# If you have the data DataFrame you can extract all variables, objectives, constraints for each population
# Get a DataFrame of all information for the first population with every row being an individual
generator.data[
    generator.data["xopt_candidate_idx"].isin(generator.history_idx[0])
].head()

In [None]:
# Using the population records we can plot the final generation's objective functions
final_pop = generator.data[
    generator.data["xopt_candidate_idx"].isin(generator.history_idx[-1])
]
plt.scatter(final_pop["f1"], final_pop["f2"])
plt.xlabel("f1")
plt.ylabel("f2")
plt.title(f"ZDT3, Generation {generator.n_generations}")

# File Output from Generator
In this section, we will take a look at the files which can be automatically written by the optimizer. We will create a temporary directory and clean it up at the end of this notebook.

The output files are the following.
 - `data.csv`: All data evaluated during the optimization
 - `vocs.txt`: The VOCS object so that the objectives, constraints, decision variables are retained alongside the data
 - `populations.csv`: Each population is written here with a column `xopt_generation` to distinguish which generation the row belongs to
 - `checkpoints`: This generator periodically saves its full state to timestamped files in this directory
 - `log.txt`: Log output from the generator is recorded to this file

 Wile running each of the blocks, go ahead and open the temporary directory printed from the next cell and take a look at the files for yourself.

In [None]:
# Setup our output directory
output_dir = tempfile.mkdtemp()
print(f'Will write data to "{output_dir}"')

In [None]:
# Set up a generator configured to output data
generator = NSGA2Generator(
    vocs=prob_vocs,
    output_dir=output_dir,  # Where will we write data
    checkpoint_freq=8,  # Turn on checkpoints and set ow often (in terms of generations) we save the optimizer state
    log_level=logging.DEBUG,  # Set the level of log messages output to our log file
)

# Run it for a couple of generations
ev.max_workers = generator.population_size
X = Xopt(generator=generator, evaluator=ev, vocs=prob_vocs)
for _ in range(32):
    X.step()

In [None]:
# Check out the generated files
print(f'"{output_dir}": {os.listdir(output_dir)}')
checkpoint_dir = os.path.join(output_dir, "checkpoints")
print(f'"checkpoints": {os.listdir(checkpoint_dir)}')

In [None]:
# In the event data was already written to `output_dir` the generator will choose a new path with a numeric suffix
# to avoid overwriting anything.
X = Xopt(
    generator=NSGA2Generator(vocs=prob_vocs, output_dir=output_dir),
    evaluator=ev,
    vocs=prob_vocs,
)
for _ in range(32):
    X.step()

# Compare the requested path and where the data ended up
print(f'Requested path: "{output_dir}"')
print(f'Path used:      "{X.generator.output_dir}"')

# Clean up the directory
X.generator.close_log_file()
shutil.rmtree(X.generator.output_dir)

In [None]:
# Load all data back in
df = pd.read_csv(os.path.join(generator.output_dir, "data.csv"))
df.head()

In [None]:
# Read the VOCS object back in. This can be used for data analysis / restarting optimizations
with open(os.path.join(generator.output_dir, "vocs.txt")) as f:
    vocs_from_file = VOCS.from_dict(json.load(f))

# Show the objectives
vocs_from_file.objectives

In [None]:
# Load the populations and get just the last population
df = pd.read_csv(os.path.join(generator.output_dir, "populations.csv"))
last_pop = df[df["xopt_generation"] == df["xopt_generation"].max()]
last_pop.head()

In [None]:
# Reload the optimizer from its last checkpoint
# NOTE: the vocs object is not retained with the checkpoint and should be added as in the following code
last_checkpoint = os.path.join(checkpoint_dir, os.listdir(checkpoint_dir)[-1])
with open(last_checkpoint) as f:
    restored_generator = NSGA2Generator.from_dict({"vocs": prob_vocs, **json.load(f)})
print(f"From file: {restored_generator}")

# Demonstrate using the generator after loading (starting optimization from its last saved point)
X = Xopt(generator=restored_generator, evaluator=ev, vocs=prob_vocs)
for _ in range(32):
    X.step()
print(f"Further optimization: {restored_generator}")

# Clean up the output
X.generator.close_log_file()
shutil.rmtree(X.generator.output_dir)

In [None]:
# Clean up the original output
generator.close_log_file()
shutil.rmtree(output_dir)