# Data loading

MSGPACK --> Parquat.

In [None]:
from pathlib import Path
from tqdm.auto import tqdm

import pandas as pd
import geopandas as gpd
import numpy as np
import seaborn as sns
import shapely
from matplotlib import pyplot as plt

from ship_routing.app.routing import RoutingResult, RoutingLog
import msgpack
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings("ignore")

In [None]:
data_files = sorted(Path("results/").glob("results_1*.msgpack"))
len(data_files)

In [None]:
def load_results(msgpack_file_list: list) -> dict[str, RoutingResult]:
    """Load all results from msgpack file list.

    Parameters
    ----------
    msgpack_file_list : list
        Paths to msgpack file containing serialized results.

    Returns
    -------
    dict[str, RoutingResult]
        Dictionary mapping result keys to RoutingResult objects.
    """
    raw_results = {}
    for mf in msgpack_file_list:
        with open(mf, "rb") as f:
            raw_results.update(msgpack.unpack(f, raw=False))
    return {
        key: RoutingResult.from_msgpack(value) for key, value in tqdm(list(raw_results.items()), desc="records")
    }

In [None]:
results = load_results(data_files)
len(results)

In [None]:
def get_journey_params_df(routing_results_dict: dict = None):
    def _fix_waypoints(dct):
        dct['lon_waypoints'] = str(dct['lon_waypoints'])
        dct['lat_waypoints'] = str(dct['lat_waypoints'])
        return dct
        
    df = pd.concat(
        [
            pd.DataFrame(
                _fix_waypoints(rr.logs.config["journey"]),
                index=[
                    f,
                ],
            )
            for f, rr in routing_results_dict.items()
        ]
    ).add_prefix("journey_")
    df = df.assign(
        journey_lon_waypoints=df["journey_lon_waypoints"].astype("category"),
        journey_lat_waypoints=df["journey_lat_waypoints"].astype("category"),
        journey_name=df["journey_name"].astype("category"),
        journey_time_start=df["journey_time_start"].astype("category"),
        # journey_time_end=df["journey_time_end"].astype("category"),
        journey_speed_knots=df["journey_speed_knots"].astype("category"),
    )
    df.index = df.index.rename("filename")
    return df

In [None]:
df_journey = get_journey_params_df(results)
df_journey

In [None]:
def get_hyper_params_df(routing_results_dict: dict = None):
    df = pd.concat(
        [
            pd.DataFrame(
                rr.logs.config["hyper"],
                index=[
                    f,
                ],
            )
            for f, rr in routing_results_dict.items()
        ]
    ).add_prefix("hyper_")
    df = df.assign(
        hyper_crossover_strategy=df["hyper_crossover_strategy"].astype("category")
    )
    df.index = df.index.rename("filename")
    return df

In [None]:
df_params = pd.merge(
    get_hyper_params_df(results), 
    get_journey_params_df(results), 
    left_index=True, 
    right_index=True,
)
df_params

In [None]:
def get_runtime(routing_results_dict: dict = None):
    _records = []
    for f, rr in routing_results_dict.items():
        _records.extend(
            [
                {
                    "filename": f,
                    "runtime": rr.logs.to_dataframe().timestamp.max()
                    - rr.logs.to_dataframe().timestamp.min(),
                }
            ]
        )
    df = pd.DataFrame.from_records(_records).set_index("filename")
    df = df.assign(runtime_seconds=df.runtime.dt.total_seconds())
    return df

In [None]:
df_runtime = get_runtime(results)
df_runtime

In [None]:
def get_elite(routing_results_dict: dict = None):
    _records = []
    for f, rr in tqdm(routing_results_dict.items()):
        seed_member = rr.seed_member
        _records.extend(
            [
                {
                    "filename": f,
                    "n_elite": n,
                    "elite_length_meters": m.route.length_meters,
                    "elite_length_relative": m.route.length_meters / seed_member.route.length_meters,
                    "elite_cost_absolute": m.cost,
                    "elite_cost_relative": m.cost / seed_member.cost,
                    "geometry": m.route.line_string,
                }
                for n, m in enumerate(rr.elite_population.members)
            ]
        )
    return pd.DataFrame.from_records(_records).set_index("filename")

In [None]:
df_elite = get_elite(results)
df_elite

In [None]:
df_merged = (
    df_params
    .merge(df_elite, left_index=True, right_index=True, how="inner")
    .merge(df_runtime, left_index=True, right_index=True, how="inner")
)
df_merged

In [None]:
df_merged

In [None]:
gpd.GeoDataFrame(df_merged).to_parquet("results/results_prelim.pq")