# **Integrated Supply Chain Optimization**

**Combined Sources:**
* **Model Logic (Algo):** From File 2 (ACO Best-so-far, Swarm Ring, GA, etc.).
* **Fixed Data & Costs:** From File 1 (Detailed Costs, Non-linear CO2, Depots).
* **Visualization (HTML):** From File 1 (Complete interactive map, Plotly Dashboard).

In [None]:
# ---------------- 1. Setup & Drive ----------------

from google.colab import drive
import os
import datetime
import json
import numpy as np
import math
import random
import requests
from functools import lru_cache
from tqdm import tqdm
import folium
from folium import FeatureGroup
from folium.plugins import BeautifyIcon
import plotly.graph_objects as go
import csv

# RL Dependencies
!pip install stable-baselines3 gymnasium --quiet
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback

# Drive Mount
drive_path = '/content/drive'
if not os.path.exists(drive_path):
    print("Drive not mounted. Mounting in progress...")
    drive.mount(drive_path)
else:
    print("Drive ready.")

def save_on_drive(content, fname, output_on_drive="Colab_Output"):
    base_path = f'{drive_path}/MyDrive/{output_on_drive}'
    os.makedirs(base_path, exist_ok=True)
    full_path = f'{base_path}/{fname}'
    try:
        with open(full_path, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"File saved on Drive: {full_path}")
    except Exception as e:
        print(f"Error saving to Google Drive: {e}")

In [None]:
# ---------------- 2. Configuration & Fixed Data ----------------

random.seed(42)
np.random.seed(42)

OUTPUT_DIR = "output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# --- Coordinates & Depot ---
REGION_CENTER = (48.8566, 2.3522)  # Paris
ENTERPRISE_COORD = (48.8566, 2.3522)  # (lat, lon)

# DEPOT_COORDS as (lat, lon)
DEPOT_COORDS = {
    "Creteil":   (48.7771, 2.4457),
    "Versailles":(48.8049, 2.1301),
    "Cergy":     (49.0361, 2.0765),
    "Meaux":     (48.9600, 2.8800),
    "Nanterre":  (48.8924, 2.2073),
    "Evry":      (48.6239, 2.4456)
}
DEPOT_RESOURCES = {
    "Creteil":   [1, 2],
    "Versailles":[2, 3],
    "Cergy":     [1, 3],
    "Meaux":     [3, 4],
    "Nanterre":  [1, 4],
    "Evry":      [2, 4]
}

NUM_CLIENTS = 50
TRUCK_CAPACITY_UNITS = 100
MAX_DELIVERY_TIME = 300
MAX_NODES = 20  # For RL environment (depot + max 18 clients + depot)

SCENARIOS_FILE_POS = "client_positions_20.txt"
SCENARIOS_FILE_DEMANDS = "client_demands_20.txt"
N_SCENARIOS_LIMIT = 1 # None to run all

OUTPUT_HTML_MAP = os.path.join(OUTPUT_DIR, "supply_chain_compare_model.html")
OUTPUT_HTML_FIT = os.path.join(OUTPUT_DIR, "fitness_curves.html")

# --- Costs details ---
COSTS = {
    "truck_fixed": 150.0,
    "fuel_per_km": 0.85,
    "driver_per_hour": 25.0,
    "maintenance_per_km": 0.15,
    # Variable cost depending on load (‚Ç¨/km per unit)
    "load_cost_per_km_per_unit": 0.01,
}

# --- CO2 Non-Linear ---
CO2_PARAMS = {
    "base_per_km": 0.10,
    "max_per_km": 0.60,
    "scale_km": 14.0,
}

OSRM_URL = "https://router.project-osrm.org/route/v1/driving"

# --- Algorithms Parameters ---
ACO_PARAMS = {
    "n_ants": 40,
    "n_iterations": 150,
    "alpha": 1.2,
    "beta": 3.0,
    "evaporation": 0.3,
    "Q": 250,
    "elitist_weight": 10.0,
    "two_opt_iters": 120,
    "candidate_k": 20,
    "local_tau": 0.85,
    "noise_sigma": 0.01
}

GA_PARAMS = {
    "pop_size": 60,
    "generations": 145,
    "mutation_rate": 0.12,
    "elite_ratio": 0.3
}

SWARM_PARAMS = {
    "n_agents": 20,
    "n_iterations": 80,
    "two_opt_iters": 50,
    "topology": "ring"
}

In [None]:
# ---------------- 3. Utilities: Cost, OSRM, CO2 ----------------

def haversine_km(latlon_a, latlon_b):
    lat1, lon1 = latlon_a
    lat2, lon2 = latlon_b
    R = 6371.0
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    s = (math.sin(dlat / 2) ** 2 +
         math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
         math.sin(dlon / 2) ** 2)
    return 2 * R * math.asin(math.sqrt(s))

def safe_requests_get(url, params=None, timeout=10, retries=1):
    for attempt in range(retries + 1):
        try:
            r = requests.get(url, params=params, timeout=timeout)
            r.raise_for_status()
            return r
        except Exception:
            if attempt == retries:
                return None

# --- CO2 Logic from File 1 (Non-linear) ---
def co2_emissions(distance_km: float) -> float:
    d = max(distance_km, 0.0)
    if d == 0:
        return 0.0

    base = CO2_PARAMS["base_per_km"]
    max_rate = CO2_PARAMS["max_per_km"]
    scale = CO2_PARAMS["scale_km"]

    w = 1.0 / (1.0 + math.log(1.0 + d / scale))
    rate = base + (max_rate - base) * w
    return rate * d

# --- Cost Logic ---
def per_truck_cost(distance_km, time_min, units_total):
    distance_km = max(distance_km, 0.0)
    time_min = max(time_min, 0.0)
    units_total = max(units_total, 0)

    fixed_cost = COSTS["truck_fixed"]
    base_distance_cost = distance_km * (
        COSTS["fuel_per_km"] + COSTS["maintenance_per_km"]
    )
    var_per_km_per_unit = COSTS.get("load_cost_per_km_per_unit", 0.0)
    load_cost = distance_km * units_total * var_per_km_per_unit
    time_cost = (time_min / 60.0) * COSTS["driver_per_hour"]

    return fixed_cost + base_distance_cost + load_cost + time_cost

# --- OSRM ---
@lru_cache(maxsize=200000)
def osrm_route_cached(lon1, lat1, lon2, lat2):
    # Round to ensure cache hits
    lon1, lat1, lon2, lat2 = map(lambda x: round(float(x), 5), (lon1, lat1, lon2, lat2))
    return _osrm_route_uncached(lon1, lat1, lon2, lat2)

def _osrm_route_uncached(lon1, lat1, lon2, lat2):
    url = f"{OSRM_URL}/{lon1},{lat1};{lon2},{lat2}"
    params = {"overview": "full", "geometries": "geojson", "alternatives": "false"}
    r = safe_requests_get(url, params=params, timeout=8, retries=0)
    if r is not None:
        try:
            data = r.json()
            routes = data.get("routes")
            if routes:
                route = routes[0]
                duration_min = route["duration"] / 60.0
                distance_km = route["distance"] / 1000.0
                coords = [(lat, lon) for lon, lat in route["geometry"]["coordinates"]]
                return duration_min, distance_km, coords
        except Exception:
            pass
    # fallback
    a = (lat1, lon1)
    b = (lat2, lon2)
    distance_km = haversine_km(a, b)
    duration_min = (distance_km / 40.0) * 60.0
    coords = [a, b]
    return duration_min, distance_km, coords

def osrm_route_from_latlon(a_latlon, b_latlon):
    lat1, lon1 = a_latlon
    lat2, lon2 = b_latlon
    return osrm_route_cached(lon1, lat1, lon2, lat2)

# ---------------- Geometry Helpers ----------------
def build_route_geometry_from_latlon_seq(seq_latlon):
    full = []
    for a, b in zip(seq_latlon[:-1], seq_latlon[1:]):
        _, _, seg = osrm_route_from_latlon(a, b)
        if not seg:
            continue
        if full and full[-1] == seg[0]:
            full.extend(seg[1:])
        else:
            full.extend(seg)
    return full

In [None]:
# ---------------- 4. Models Logic ----------------
# Greedy + 2-opt + ACO + GA + Swarm + RL

# --- Shared Routing (Greedy + 2-opt) ---
def greedy_route(n_nodes, time_matrix):
    unvisited = set(range(1, n_nodes))
    tour = [0]
    current = 0
    while unvisited:
        next_idx = min(unvisited, key=lambda j: time_matrix[current][j])
        tour.append(next_idx)
        unvisited.remove(next_idx)
        current = next_idx
    tour.append(0)
    return tour

def two_opt(tour, time_matrix, max_iter=120):
    def route_len(tt):
        return sum(time_matrix[tt[i]][tt[i+1]] for i in range(len(tt)-1))

    best = tour[:]
    best_len = route_len(best)

    for _ in range(max_iter):
        improved = False
        for i in range(1, len(best)-2):
            for k in range(i+1, len(best)-1):
                new_tour = best[:i] + best[i:k+1][::-1] + best[k+1:]
                new_len = route_len(new_tour)
                if new_len + 1e-9 < best_len:
                    best, best_len = new_tour, new_len
                    improved = True
        if not improved:
            break
    return best


# --- RL Environment (Gymnasium) ---
class FixedTruckRouteEnv(gym.Env):
    """
    RL Environment with fixed observation size based on MAX_NODES.
    - time_matrix/dist_matrix are of size MAX_NODES x MAX_NODES
    - nodes beyond n_nodes_real are considered "dummy" and never selected
    """
    metadata = {"render_modes": []}

    def __init__(self, time_matrix, dist_matrix, units_per_node, n_nodes_real):
        super().__init__()
        self.max_nodes = MAX_NODES
        self.nnodes_real = min(n_nodes_real, self.max_nodes)

        # Pad time/dist to max_nodes x max_nodes
        self.timematrix = np.zeros((self.max_nodes, self.max_nodes), dtype=np.float32)
        self.distmatrix = np.zeros((self.max_nodes, self.max_nodes), dtype=np.float32)

        T = np.array(time_matrix, dtype=np.float32)
        D = np.array(dist_matrix, dtype=np.float32)
        n = min(T.shape[0], self.max_nodes)
        self.timematrix[:n, :n] = T[:n, :n]
        self.distmatrix[:n, :n] = D[:n, :n]

        # units_per_node padded
        upn = np.zeros(self.max_nodes, dtype=np.float32)
        upn[:min(len(units_per_node), self.max_nodes)] = units_per_node[:self.max_nodes]
        self.unitspernode = upn

        # State: one-hot current node + visited mask + (dist_norm, clients_norm, units_norm)
        obs_dim = self.max_nodes + self.max_nodes + 3
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(obs_dim,), dtype=np.float32)
        self.action_space = spaces.Discrete(self.max_nodes)

        self.reset()

    def _build_obs(self):
        onehot = np.zeros(self.max_nodes, dtype=np.float32)
        onehot[self.currentnode] = 1.0
        visited = self.visited.astype(np.float32)
        totaldist_norm = min(self.totaldistancekm / 100.0, 1.0)
        clients_norm = min(self.clientsserved / max(1, self.nnodes_real - 2), 1.0)
        units_norm = min(self.unitsaccum / 200.0, 1.0)
        extra = np.array([totaldist_norm, clients_norm, units_norm], dtype=np.float32)
        return np.concatenate([onehot, visited, extra])

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.currentnode = 0
        self.visited = np.zeros(self.max_nodes, dtype=bool)
        self.visited[0] = True
        self.totaldistancekm = 0.0
        self.clientsserved = 0
        self.unitsaccum = 0.0
        self.steps = 0
        obs = self._build_obs()
        return obs, {}

    def step(self, action):
        self.steps += 1
        done = False
        truncated = False
        reward = 0.0

        # Invalid actions beyond nnodes_real
        if action >= self.nnodes_real:
            return self._build_obs(), -5.0, False, True, {}

        nextnode = int(action)

        if nextnode == self.currentnode:
            reward = -1.0
        else:
            dist = float(self.distmatrix[self.currentnode, nextnode])
            if not np.isfinite(dist):
                dist = 0.0

            self.totaldistancekm += dist
            self.currentnode = nextnode

            if not self.visited[nextnode]:
                self.visited[nextnode] = True

                if nextnode != 0:
                    self.clientsserved += 1
                    units = float(self.unitspernode[nextnode])
                    self.unitsaccum += units
                    reward = 30.0 * units - 1.5 * dist
                else:
                    reward = -0.5
            else:
                reward = -0.5

        if (
            self.steps >= self.max_nodes * 2
            or self.clientsserved >= (self.nnodes_real - 2)
        ):
            done = True
            finalbonus = (
                50.0 * self.clientsserved
                + 10.0 * self.unitsaccum
                - 2.0 * self.totaldistancekm
            )
            reward += finalbonus

        obs = self._build_obs()
        info = {
            "clients_served": int(self.clientsserved),
            "units_total": float(self.unitsaccum),
            "total_distance_km": float(self.totaldistancekm),
        }

        return obs, float(reward), done, truncated, info


# --- ACO Optimized  ---
class AntColonyTSP:
    def __init__(self, time_matrix, dist_matrix, **params):
        self.time = np.array(time_matrix, dtype=float)
        self.dist = np.array(dist_matrix, dtype=float)
        self.n = len(self.time)

        self.alpha = params.get("alpha", 1.2)
        self.beta = params.get("beta", 3.0)
        self.evaporation = params.get("evaporation", 0.3)
        self.Q = params.get("Q", 250)
        self.n_ants = params.get("n_ants", 40)
        self.n_iterations = params.get("n_iterations", 150)
        self.elitist_weight = params.get("elitist_weight", 10.0)
        self.two_opt_iters = params.get("two_opt_iters", 120)
        self.k_candidates = min(params.get("candidate_k", 20), max(0, self.n-1))
        self.local_tau = params.get("local_tau", 0.85)
        self.noise_sigma = params.get("noise_sigma", 0.01)

        self.pheromone = np.ones((self.n, self.n), dtype=np.float32)

        base_inv = np.zeros((self.n, self.n), dtype=np.float32)
        for i in range(self.n):
            for j in range(self.n):
                t = self.time[i][j]
                base_inv[i][j] = 0.0 if (i == j or not math.isfinite(t) or t <= 0) else (1.0 / t)

        noise = np.random.normal(0.0, self.noise_sigma, size=base_inv.shape).astype(np.float32)
        self.inv_time = np.clip(base_inv + noise, 0.0, None)

        self.candidates = []
        for i in range(self.n):
            order = np.argsort(self.time[i])
            cand = [int(j) for j in order if j != i][:self.k_candidates]
            self.candidates.append(cand)

        self.best_path = None
        self.best_distance = float("inf")
        self.fitness_curve_raw = []
        self.fitness_curve_best = []
        self.best_paths_per_iter = []

    def _select_next(self, current, unvisited):
        cand = [j for j in self.candidates[current] if j in unvisited]
        if not cand:
            cand = list(unvisited)

        tau = self.pheromone[current][cand]
        eta = self.inv_time[current][cand]
        weights = (tau ** self.alpha) * (eta ** self.beta)
        s = weights.sum()

        if s <= 0 or not np.isfinite(s):
            probs = np.ones(len(cand)) / len(cand)
        else:
            probs = weights / s

        return int(np.random.choice(cand, p=probs))

    def _construct_solution(self):
        path = [0]
        unvisited = set(range(1, self.n))
        while unvisited:
            current = path[-1]
            nxt = self._select_next(current, unvisited)
            self.pheromone[current][nxt] = (1 - self.local_tau) * self.pheromone[current][nxt] + self.local_tau * 1e-6
            self.pheromone[nxt][current] = self.pheromone[current][nxt]
            path.append(nxt)
            unvisited.remove(nxt)
        path.append(0)
        return path

    def _path_distance(self, path):
        return float(sum(self.time[path[i]][path[i+1]] for i in range(len(path)-1)))

    def _update_pheromone_global(self, paths, dists):
        self.pheromone *= (1 - self.evaporation)
        for path, dist in zip(paths, dists):
            if dist <= 0 or not np.isfinite(dist):
                continue
            deposit = self.Q / dist
            for i in range(len(path)-1):
                a, b = path[i], path[i+1]
                self.pheromone[a][b] += deposit
                self.pheromone[b][a] += deposit

        if self.best_path is not None and self.best_distance > 0:
            elite = self.elitist_weight * (self.Q / self.best_distance)
            for i in range(len(self.best_path)-1):
                a, b = self.best_path[i], self.best_path[i+1]
                self.pheromone[a][b] += elite
                self.pheromone[b][a] += elite

    def solve(self):
        nn_path = greedy_route(self.n, self.time)
        nn_path = two_opt(nn_path, self.time, max_iter=self.two_opt_iters)
        best_path = nn_path
        best_dist = self._path_distance(best_path)

        self.fitness_curve_raw = []
        self.fitness_curve_best = []
        self.best_paths_per_iter = []
        best_so_far = best_dist

        for _ in range(self.n_iterations):
            paths, dists = [], []
            for _a in range(self.n_ants):
                path = self._construct_solution()
                path_opt = two_opt(path, self.time, max_iter=self.two_opt_iters)
                dist = self._path_distance(path_opt)
                paths.append(path_opt)
                dists.append(dist)
                if dist + 1e-9 < best_dist:
                    best_path, best_dist = path_opt, dist

            self.best_path, self.best_distance = best_path, best_dist
            current_best = best_dist
            self.fitness_curve_raw.append(current_best)
            best_so_far = min(best_so_far, current_best)
            self.fitness_curve_best.append(best_so_far)
            self.best_paths_per_iter.append(list(best_path))

            self._update_pheromone_global(paths, dists)

        return self.best_path, self.best_distance


# --- GA  ---
def ga_evaluate_tour(tour, time_matrix):
    return sum(time_matrix[tour[i]][tour[i+1]] for i in range(len(tour)-1))

def ga_initial_population(pop_size, n_nodes):
    population = []
    for _ in range(pop_size):
        tour = list(range(1, n_nodes))
        random.shuffle(tour)
        tour = [0] + tour + [0]
        population.append(tour)
    return population

def ga_ordered_crossover(parent1, parent2):
    start, end = sorted(random.sample(range(1, len(parent1)-1), 2))
    child = [None] * len(parent1)
    child[start:end] = parent1[start:end]
    pos = end
    for gene in parent2[1:-1]:
        if gene not in child:
            if pos == len(parent1)-1:
                pos = 1
            child[pos] = gene
            pos += 1
    child[0], child[-1] = 0, 0
    return child

def ga_mutate(tour, mutation_rate=0.12):
    for i in range(1, len(tour)-2):
        if random.random() < mutation_rate:
            j = random.randint(1, len(tour)-2)
            tour[i], tour[j] = tour[j], tour[i]
    if random.random() < 0.02:
        i, j = sorted(random.sample(range(1, len(tour)-1), 2))
        subseq = tour[i:j+1]
        random.shuffle(subseq)
        tour[i:j+1] = subseq
    return tour

def genetic_algorithm(time_matrix, dist_matrix,
                      pop_size=60, generations=145,
                      mutation_rate=0.12, elite_ratio=0.3):

    n_nodes = len(time_matrix)
    population = ga_initial_population(pop_size, n_nodes)

    best_tour = None
    best_cost = float("inf")
    fitness_curve_raw = []
    fitness_curve_best = []
    best_tours_per_gen = []
    elite_count = max(2, int(pop_size * elite_ratio))
    best_so_far = float("inf")

    for _ in range(generations):
        scores = [ga_evaluate_tour(t, time_matrix) for t in population]
        ranked = sorted(zip(scores, population), key=lambda x: x[0])
        current_best, current_tour = ranked[0]

        if current_best < best_cost:
            best_cost, best_tour = current_best, current_tour

        fitness_curve_raw.append(current_best)
        best_so_far = min(best_so_far, current_best)
        fitness_curve_best.append(best_so_far)
        best_tours_per_gen.append(list(current_tour))

        selected = [t for _, t in ranked[:elite_count]]
        children = []
        children.extend(selected[:elite_count // 2])

        while len(children) < pop_size:
            p1, p2 = random.sample(selected, 2)
            child = ga_ordered_crossover(p1, p2)
            child = ga_mutate(child, mutation_rate)
            children.append(child)

        population = children[:pop_size]

    return best_tour, best_cost, fitness_curve_raw, fitness_curve_best, best_tours_per_gen


# --- Swarm  ---
def topology_ring(n_agents):
    return [[(i - 1) % n_agents, (i + 1) % n_agents] for i in range(n_agents)]

class SwarmTSP:
    def __init__(self, time_matrix, n_agents=20, n_iterations=80,
                 topology="ring", two_opt_iters=50):

        self.time = np.array(time_matrix, dtype=float)
        self.n_nodes = self.time.shape[0]
        self.n_agents = n_agents
        self.n_iterations = n_iterations
        self.two_opt_iters = two_opt_iters

        self.tours = []
        self.costs = []
        for _ in range(n_agents):
            tour = self._random_tour()
            cost = self._route_len(tour)
            self.tours.append(tour)
            self.costs.append(cost)

        self.pbest_tours = [t[:] for t in self.tours]
        self.pbest_costs = self.costs[:]

        self.gbest_idx = int(np.argmin(self.pbest_costs))
        self.gbest_tour = self.pbest_tours[self.gbest_idx][:]
        self.gbest_cost = self.pbest_costs[self.gbest_idx]

        if topology == "ring":
            self.neighbors = topology_ring(n_agents)
        else:
            self.neighbors = topology_ring(n_agents)

        self.history_best_cost_raw = []
        self.history_best_cost_best = []
        self.best_tours_per_iter = []

    def _random_tour(self):
        nodes = list(range(1, self.n_nodes))
        random.shuffle(nodes)
        return [0] + nodes + [0]

    def _route_len(self, tour):
        return float(sum(self.time[tour[i]][tour[i+1]] for i in range(len(tour)-1)))

    def _neighborhood_best(self, i):
        indices = [i] + self.neighbors[i]
        best_idx = min(indices, key=lambda k: self.pbest_costs[k])
        return self.pbest_tours[best_idx], self.pbest_costs[best_idx]

    def _combine_tours(self, current, guide, influence_prob=0.4):
        n = len(current)
        pos_in_guide = {node: idx for idx, node in enumerate(guide)}
        new_tour =current[:]

        for i in range(1, n-1):
            if random.random() < influence_prob:
                node = new_tour[i]
                gi =pos_in_guide[node]
                if gi + 1 < n:
                    desired_succ = guide[gi+1]
                    if desired_succ in new_tour[1:-1]:
                        j = new_tour.index(desired_succ)
                        if i+1 <n-1:
                            new_tour[i+1], new_tour[j] = new_tour[j], new_tour[i+1]

        if new_tour[0] != 0:
            new_tour = [0] + [x for x in new_tour if x != 0] + [0]
        if new_tour[-1] != 0:
            new_tour[-1] = 0

        return new_tour

    def solve(self):
        best_so_far = self.gbest_cost
        self.history_best_cost_raw = []
        self.history_best_cost_best = []
        self.best_tours_per_iter = []

        for _it in range(self.n_iterations):
            for i in range(self.n_agents):
                nb_tour, _ = self._neighborhood_best(i)
                candidate = self._combine_tours(self.tours[i], nb_tour)
                candidate = two_opt(candidate, self.time, max_iter=self.two_opt_iters)
                candidate_cost = self._route_len(candidate)

                self.tours[i] = candidate
                self.costs[i] = candidate_cost

                if candidate_cost < self.pbest_costs[i]:
                    self.pbest_costs[i] = candidate_cost
                    self.pbest_tours[i]= candidate

            best_idx = int(np.argmin(self.pbest_costs))
            if self.pbest_costs[best_idx] < self.gbest_cost:
                self.gbest_cost = self.pbest_costs[best_idx]
                self.gbest_tour = self.pbest_tours[best_idx][:]

            current_best =self.gbest_cost
            self.history_best_cost_raw.append(current_best)
            best_so_far = min(best_so_far, current_best)
            self.history_best_cost_best.append(best_so_far)
            self.best_tours_per_iter.append(self.gbest_tour[:])

        return (
            self.gbest_tour,
            self.gbest_cost,
            self.history_best_cost_raw,
            self.history_best_cost_best,
            self.best_tours_per_iter
        )


# --- RL Solver (requires trained model) ---
def rl_solve_route(time_matrix, dist_matrix, units_total, rl_model):
    """
    Use a trained PPO model to solve the routing problem.
    Returns a tour as a list of node indices.
    """
    n_nodes_real = len(time_matrix)
    n_nodes_real = min(n_nodes_real, MAX_NODES)

    # Limit to MAX_NODES
    T = [row[:n_nodes_real] for row in time_matrix[:n_nodes_real]]
    D = [row[:n_nodes_real] for row in dist_matrix[:n_nodes_real]]

    units_per_node = [0.0] + [1.0] * (n_nodes_real - 2) + [0.0]

    env = FixedTruckRouteEnv(T, D, units_per_node, n_nodes_real)
    obs, info = env.reset()
    done = False
    truncated = False
    tour = [env.currentnode]

    while not (done or truncated):
        action, _ = rl_model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(int(action))
        tour.append(env.currentnode)
        if env.currentnode == 0 and env.clientsserved > 0:
            break

    if tour[-1] != 0:
        tour.append(0)

    return tour


In [None]:
# ---------------- 4b. RL Training (PPO) ----------------
# This cell trains the PPO model for the routing problem

print("Starting RL Training with PPO...")

# Create a sample training environment
sample_n_nodes = 10  # depot + 8 clients + depot
sample_time_matrix = [[0.0] * sample_n_nodes for _ in range(sample_n_nodes)]
sample_dist_matrix = [[0.0] * sample_n_nodes for _ in range(sample_n_nodes)]

# Fill with random values for training
for i in range(sample_n_nodes):
    for j in range(sample_n_nodes):
        if i != j:
            dist = random.uniform(5.0, 50.0)
            time = (dist / 40.0) * 60.0
            sample_time_matrix[i][j] = time
            sample_dist_matrix[i][j] = dist

sample_units_per_node = [0.0] + [random.uniform(5, 20) for _ in range(sample_n_nodes - 2)] + [0.0]

# Create training environment
train_env = FixedTruckRouteEnv(
    sample_time_matrix,
    sample_dist_matrix,
    sample_units_per_node,
    sample_n_nodes
)

print(f"   Training environment created: {sample_n_nodes} nodes")
print(f"   Observation space: {train_env.observation_space.shape}")
print(f"   Action space: {train_env.action_space.n}")

# Training callback to monitor progress
class TrainingMetricsCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []

    def _on_step(self) -> bool:
        # Log episode metrics
        if len(self.model.ep_info_buffer) > 0:
            for info in self.model.ep_info_buffer:
                if 'r' in info:
                    self.episode_rewards.append(info['r'])
                if 'l' in info:
                    self.episode_lengths.append(info['l'])
        return True

    def _on_rollout_end(self) -> None:
        if len(self.episode_rewards) > 0:
            mean_reward = np.mean(self.episode_rewards[-100:])
            print(f"   Mean reward (last 100 episodes): {mean_reward:.2f}")

callback = TrainingMetricsCallback(verbose=1)

# Create and train PPO model
print("\n Training PPO model...")
rl_model = PPO(
    "MlpPolicy",
    train_env,
    verbose=1,
    device="cpu",
    learning_rate=3e-4,
    n_steps=1024,
    batch_size=64,
)

# Train the model
rl_model.learn(total_timesteps=50_000, callback=callback)

print("\n  RL Training completed!")
print(f"   Total episodes: {len(callback.episode_rewards)}")
if len(callback.episode_rewards) > 0:
    print(f"   Final mean reward: {np.mean(callback.episode_rewards[-100:]):.2f}")

# Store model globally for use in build_routes_for_model
rlmodel = rl_model
print("\n  Model saved to variable 'rlmodel' - ready to use in routing!")


In [None]:
# ---------------- 5. Build Routes Pipeline ----------------


def parse_demands_line(line):
    clients_demands = []
    for client_str in line.split(";"):
        demand = {}
        if client_str.strip():
            for item in client_str.split(","):
                parts = item.split(":")
                if len(parts) == 2:
                    try:
                        res = int(parts[0])
                        qty = int(parts[1])
                        demand[res] = qty
                    except Exception:
                        pass
        clients_demands.append(demand)
    return clients_demands

def parse_positions_line_to_latlon(line):
    pairs = []
    for p in line.split(";"):
        if not p.strip():
            continue
        parts = p.split(",")
        if len(parts) >= 2:

            lon, lat = map(float, parts[:2])
            pairs.append((lat, lon))
    return pairs

def build_clients_from_positions_and_demands(positions_latlon, demands):
    clients = []
    for (plat, plon), demand in zip(positions_latlon, demands):
        clients.append({
            "coord": (plat, plon),
            "lat": plat,
            "lon": plon,
            "demand": demand,
            "assigned_depots": {}
        })
    return clients

def nearest_compatible_depot(res_type, client_coord_latlon):
    best = None
    bestt = float("inf")
    for dname, dlatlon in DEPOT_COORDS.items():
        if res_type not in DEPOT_RESOURCES.get(dname, []):
            continue
        t, _, _ = osrm_route_from_latlon(dlatlon, client_coord_latlon)
        if np.isfinite(t) and t < bestt:
            bestt = t
            best = dname
    return best

def assign_depots_for_client(c):
    assigned = {}
    for res_type, units in c["demand"].items():
        d = nearest_compatible_depot(res_type, c["coord"])
        assigned[res_type] = d
    c["assigned_depots"] = assigned
    c["is_fully_assignable"] = all(d is not None for d in assigned.values())
    return c["is_fully_assignable"]

def make_chunks_for_client(c):
    per_depot = {}
    for res_type, units in c["demand"].items():
        depot = c["assigned_depots"].get(res_type)
        if depot is None:
            continue
        per_depot.setdefault(depot, []).append((res_type, units))
    chunks = []
    for depot, items in per_depot.items():
        for rtype, runits in items:
            remaining = runits
            while remaining > 0:
                take = min(TRUCK_CAPACITY_UNITS, remaining)
                chunks.append({
                    "depot": depot,
                    "types": [rtype],
                    "units": {rtype: take},
                    "client": c
                })
                remaining -= take
    return chunks

def chunk_signature(ch):
    return (ch["depot"],)

def pack_trucks_bestfit(chunks):
    chunks_sorted = sorted(chunks, key=lambda ch: sum(ch["units"].values()), reverse=True)
    loads = []
    capacities = []
    for ch in chunks_sorted:
        ch_units = sum(ch["units"].values())
        best_idx = None
        best_leftover = None
        for i, cap in enumerate(capacities):
            if cap + ch_units <= TRUCK_CAPACITY_UNITS:
                leftover = TRUCK_CAPACITY_UNITS - (cap + ch_units)
                if best_leftover is None or leftover < best_leftover:
                    best_leftover = leftover
                    best_idx = i
        if best_idx is not None:
            loads[best_idx].append(ch)
            capacities[best_idx] += ch_units
        else:
            loads.append([ch])
            capacities.append(ch_units)
    return loads

def format_res_summary(units_dict):
    types = sorted(units_dict.keys())
    quantities = [units_dict[t] for t in types]
    return f"Res {','.join(map(str, types))} | {','.join(map(str, quantities))}"

# --- Core Route Builder (Integration Point) ---
def build_routes_for_model(clients, model="ACO"):
    # 1. Assignment
    for c in clients:
        assign_depots_for_client(c)
    reachable = [c for c in clients if c.get("is_fully_assignable")]

    # 2. Chunking
    all_chunks = []
    for c in reachable:
        all_chunks.extend(make_chunks_for_client(c))

    # 3. Grouping & Packing
    groups = {}
    for ch in all_chunks:
        sig = chunk_signature(ch)
        groups.setdefault(sig, []).append(ch)

    group_loads = {sig: pack_trucks_bestfit(chs) for sig, chs in groups.items()}

    routes = []
    truck_id = 0
    fitness_raw = []
    fitness_best = []

    for (depot,), loads in group_loads.items():
        depot_latlon = DEPOT_COORDS[depot]
        for load in loads:
            # Calc total units for Cost function
            units_total = 0
            units_dict = {}
            for ch in load:
                for t, q in ch["units"].items():
                    units_dict[t] = units_dict.get(t, 0) + q
                    units_total += q

            client_coords = [ch["client"]["coord"] for ch in load]
            seen = set()
            client_coords_unique = []
            for lat, lon in client_coords:
                key = (round(lat, 6), round(lon, 6))
                if key not in seen:
                    seen.add(key)
                    client_coords_unique.append((lat, lon))

            route_nodes = [depot_latlon] + client_coords_unique + [depot_latlon]
            n_nodes = len(route_nodes)

            # Build Matrices
            time_matrix = [[0.0]*n_nodes for _ in range(n_nodes)]
            dist_matrix = [[0.0]*n_nodes for _ in range(n_nodes)]
            for i in range(n_nodes):
                for j in range(n_nodes):
                    if i == j:
                        time_matrix[i][j] = 0.0
                        dist_matrix[i][j] = 0.0
                    else:
                        t, d, _ = osrm_route_from_latlon(route_nodes[i], route_nodes[j])
                        if not np.isfinite(t) or not np.isfinite(d):
                            d = haversine_km(route_nodes[i], route_nodes[j])
                            t = (d / 40.0) * 60.0
                        time_matrix[i][j] = t
                        dist_matrix[i][j] = d

            # EXECUTE MODELS
            optimized_nodes = route_nodes

            if n_nodes > 3:
                if model == "ACO":
                    aco = AntColonyTSP(time_matrix, dist_matrix, **ACO_PARAMS)
                    best_path, _ = aco.solve()
                    optimized_nodes = [route_nodes[i] for i in best_path]
                    if aco.fitness_curve_raw:
                        fitness_raw.extend(aco.fitness_curve_raw)
                        fitness_best.extend(aco.fitness_curve_best)

                elif model == "GA":
                    best_tour, _, ga_raw, ga_best, best_tours = genetic_algorithm(
                        time_matrix, dist_matrix,
                        pop_size=GA_PARAMS["pop_size"],
                        generations=GA_PARAMS["generations"],
                        mutation_rate=GA_PARAMS["mutation_rate"],
                        elite_ratio=GA_PARAMS["elite_ratio"]
                    )
                    optimized_nodes = [route_nodes[i] for i in best_tour]
                    fitness_raw.extend(ga_raw)
                    fitness_best.extend(ga_best)

                elif model == "SWARM":
                    swarm = SwarmTSP(
                        time_matrix,
                        n_agents=SWARM_PARAMS["n_agents"],
                        n_iterations=SWARM_PARAMS["n_iterations"],
                        topology=SWARM_PARAMS["topology"],
                        two_opt_iters=SWARM_PARAMS["two_opt_iters"]
                    )
                    best_tour, _, hist_raw, hist_best, best_tours = swarm.solve()
                    optimized_nodes = [route_nodes[i] for i in best_tour]
                    fitness_raw.extend(hist_raw)
                    fitness_best.extend(hist_best)

                elif model == "RL":
                    rl_tour = rl_solve_route(time_matrix, dist_matrix)
                    optimized_nodes = [route_nodes[i] for i in rl_tour]
                    total_time = sum(time_matrix[rl_tour[i]][rl_tour[i+1]] for i in range(len(rl_tour)-1))
                    fitness_raw.append(total_time)
                    fitness_best.append(total_time)

                else: # GREEDY
                    greedy_tour = greedy_route(n_nodes, time_matrix)
                    optimized_nodes = [route_nodes[i] for i in greedy_tour]
                    total_time = sum(time_matrix[greedy_tour[i]][greedy_tour[i+1]] for i in range(len(greedy_tour)-1))
                    fitness_raw.append(total_time)
                    fitness_best.append(total_time)

            # Final Metrics Calculation
            final_seq = optimized_nodes

            total_time = 0.0
            total_dist = 0.0

            # Recalculate exact path metrics
            for a, b in zip(final_seq[:-1], final_seq[1:]):
                t, d, _ = osrm_route_from_latlon(a, b)
                if not np.isfinite(t) or not np.isfinite(d):
                    d = haversine_km(a, b)
                    t = (d / 40.0) * 60.0
                total_time += t
                total_dist += d

            # Cost & CO2
            final_cost = per_truck_cost(total_dist, total_time, units_total)
            final_co2 = co2_emissions(total_dist)

            # Visualization Geometry
            seq_nodes_visu = [ENTERPRISE_COORD] + final_seq + [ENTERPRISE_COORD]


            geom = build_route_geometry_from_latlon_seq(seq_nodes_visu)
            res_summary = format_res_summary(units_dict)

            routes.append({
                "truck_id": truck_id,
                "depot": depot,
                "res_summary": res_summary,
                "n_clients": len(set(client_coords_unique)),
                "units_total": units_total,
                "coords": geom,
                "distance_km": total_dist,
                "time_min": total_time,
                "cost": final_cost,
                "co2_kg": final_co2
            })
            truck_id += 1

    return routes, reachable, fitness_raw, fitness_best

In [None]:
# ---------------- 6. Visualization Functions ----------------

def generate_map_html(summary):
    last_routes = summary["last_routes"]
    last_clients = summary["last_clients"]

    m = folium.Map(location=REGION_CENTER, zoom_start=9, tiles="cartodbpositron")

    # Enterprise
    folium.Marker(
        location=ENTERPRISE_COORD,
        popup="üè¢ Enterprise",
        icon=BeautifyIcon(icon="building", border_color="black", text_color="black")
    ).add_to(m)

    # Depots
    for d, latlon in DEPOT_COORDS.items():
        folium.Marker(
            location=latlon,
            popup=f"üè≠ Depot {d}<br>Resources: {DEPOT_RESOURCES.get(d)}",
            icon=BeautifyIcon(icon="archive", border_color="red", text_color="red")
        ).add_to(m)

    # Clients
    def demand_str(d):
        return ", ".join([f"{k}:{v}" for k, v in sorted(d.items())])

    for i, c in enumerate(last_clients or []):
        popup_text = f"Client {i}<br>Demand: {demand_str(c['demand'])}"
        folium.CircleMarker(
            location=(c["lat"], c["lon"]),
            radius=5,
            color="green",
            fill=True,
            fill_opacity=0.8,
            popup=popup_text
        ).add_to(m)

    # Colors
    colors = ["green","darkred","darkblue","black","orange","pink","gray","teal","brown","cyan"]

    # Layers
    model_layers = {
        "ACO":    FeatureGroup(name="Model A: ACO", show=True),
        "GREEDY": FeatureGroup(name="Model B: Greedy", show=False),
        "GA":     FeatureGroup(name="Model C: GA", show=False),
        "SWARM":  FeatureGroup(name="Model D: Swarm", show=False),
        "RL":     FeatureGroup(name="Model E: RL", show=False)
    }

    # Draw routes
    for model_key, fg in model_layers.items():
        routes = last_routes.get(model_key) or []
        for r in routes:
            coords = r.get("coords") or []
            folium.PolyLine(
                coords,
                color=colors[r["truck_id"] % len(colors)],
                weight=5,
                opacity=0.8,
                tooltip=(
                    f"Truck {r['truck_id']} | Depot {r['depot']} | {r['res_summary']} | "
                    f"{r['n_clients']} clients | {r['units_total']} units | "
                    f"{r['cost']:.0f}‚Ç¨ | {r['time_min']:.0f}min | CO2 {r['co2_kg']:.1f}kg"
                )
            ).add_to(fg)
        fg.add_to(m)

    folium.LayerControl(collapsed=False).add_to(m)

    # Stats box
    stats_html = ""
    for m_key in ["ACO", "GREEDY", "GA", "SWARM", "RL"]:
        if m_key in summary:
            s = summary[m_key]
            stats_html += (
                f"<b>{m_key}:</b> "
                f"Costs: {s['cost_avg']:,.0f}‚Ç¨ | "
                f"Times: {s['time_avg']:.0f} min | "
                f"Camions: {s['trucks_avg']:.2f} | "
                f"CO2: {s['co2_avg']:.1f} kg<br>"
            )

    legend_html = f"""
    <div style="position: fixed; bottom: 30px; left: 30px; width: 900px;
    background-color: white; border:2px solid grey; z-index:9999;
    font-size:12px; padding:12px; border-radius:5px; box-shadow: 2px 2px 6px rgba(0,0,0,0.3);">
    <b style="font-size:14px;">Compare (mean for camion)</b><hr>
    {stats_html}<hr>
    activate/deactivate to see the routes.
    </div>
    """
    m.get_root().html.add_child(folium.Element(legend_html))

    m.save(OUTPUT_HTML_MAP)
    print(f"Map HTML saved as: {OUTPUT_HTML_MAP}")

    with open(OUTPUT_HTML_MAP, 'r', encoding='utf-8') as f:
        return f.read()



#  FITNESS NORMALIS√âE + BEST MONOTONE
def generate_fitness_html(summary):
    models = ["ACO", "GA", "GREEDY", "SWARM", "RL"]

    traces = []
    max_len = 0

    # 1) Max lenght
    for m in models:
        if m in summary and "series" in summary[m]:
            data = summary[m]["series"].get("distance_km_raw", [])
            max_len = max(max_len, len(data))

    if max_len == 0:
        return "No data for fitness curves."

    # 2) Axe X normalised
    x_norm = np.linspace(0, 1, max_len)

    # 3) Padding
    def pad_to(arr, L):
        if not arr:
            return [np.nan] * L
        if len(arr) >= L:
            return arr[:L]
        res = list(arr)
        while len(res) < L:
            res.append(res[-1])
        return res

    # 4) Best monotone
    def make_monotone(arr):
        mono = []
        best = float("inf")
        for v in arr:
            if v < best:
                best = v
            mono.append(best)
        return mono

    colors = {
        "ACO": "blue",
        "GA": "red",
        "GREEDY": "green",
        "SWARM": "purple",
        "RL": "orange"
    }

    for m in models:
        if m not in summary or "series" not in summary[m]:
            continue

        raw_full = summary[m]["series"].get("distance_km_raw", [])
        best_full = summary[m]["series"].get("distance_km_best", [])


        if m in ["GREEDY", "RL"]:

    # we take the last known value (raw ou best)
          if raw_full:
            v = raw_full[-1]
          elif best_full:
            v = best_full[-1]
          else:
            continue

    # flat line
          raw = [v] * max_len
          best = [v] * max_len


        else:
            # ACO / GA / SWARM
            raw = pad_to(raw_full, max_len)
            best = pad_to(best_full, max_len)
            best = make_monotone(best)

        # Raw (pointill√©)
        traces.append(go.Scatter(
            x=x_norm, y=raw,
            mode="lines",
            name=f"{m} raw",
            line=dict(color=colors[m], dash="dot"),
            opacity=0.5
        ))

        # Best (plein)
        traces.append(go.Scatter(
            x=x_norm, y=best,
            mode="lines",
            name=f"{m} best",
            line=dict(color=colors[m], width=3)
        ))

    fig = go.Figure(data=traces)
    fig.update_layout(
        title="Convergence Distance (X normalized, best monotone)",
        xaxis_title="Progression normalized (0 ‚Üí 1)",
        yaxis_title="Distance (km)",
        hovermode="x unified",
        width=1200,
        height=650,
        legend_title="Algorithms",
        margin=dict(t=100, b=50, l=60, r=60)
    )

    fig.write_html(OUTPUT_HTML_FIT, include_plotlyjs="cdn")
    print(f"Fitness HTML saved as: {OUTPUT_HTML_FIT}")

    with open(OUTPUT_HTML_FIT, 'r', encoding='utf-8') as f:
        return f.read()


In [None]:
# ---------------- 7. Main Execution & Aggregation ----------------

def avg(lst):
    return float(np.mean(lst)) if lst else float("nan")

def main():
    # 1. Load Scenarios
    if not os.path.exists(SCENARIOS_FILE_POS) or not os.path.exists(SCENARIOS_FILE_DEMANDS):
        print("Creating dummy scenario files for testing...")
        with open(SCENARIOS_FILE_POS, "w") as f:
            f.write("2.35,48.85;2.40,48.80\n" * 5)
        with open(SCENARIOS_FILE_DEMANDS, "w") as f:
            f.write("1:10,2:5;1:5\n" * 5)

    with open(SCENARIOS_FILE_POS, "r") as fpos:
        pos_lines = [line.strip() for line in fpos.readlines() if line.strip()]
    with open(SCENARIOS_FILE_DEMANDS, "r") as fdem:
        dem_lines = [line.strip() for line in fdem.readlines() if line.strip()]

    if N_SCENARIOS_LIMIT:
        pos_lines = pos_lines[:N_SCENARIOS_LIMIT]
        dem_lines = dem_lines[:N_SCENARIOS_LIMIT]

    scenarios = list(zip(pos_lines, dem_lines))
    print(f"Loaded {len(scenarios)} scenarios.")

    # 2. Containers
    stats = {}
    models_to_run = ["ACO", "GA", "GREEDY", "SWARM", "RL"]
    for m in models_to_run:
        stats[m] = {
            "costs": [], "times": [], "trucks": [], "co2": [],
            "fitness_raw": [], "fitness_best": []
        }

    last_routes_map = {}
    last_clients_data = None

    # 3. Loop
    for idx, (pos_line, dem_line) in enumerate(tqdm(scenarios, desc="Running Scenarios")):
        positions = parse_positions_line_to_latlon(pos_line)
        demands = parse_demands_line(dem_line)
        clients = build_clients_from_positions_and_demands(positions, demands)

        for model_key in models_to_run:
            routes, _, f_raw, f_best = build_routes_for_model(clients, model=model_key)

            stats[model_key]["costs"].append(sum(r["cost"] for r in routes))
            stats[model_key]["times"].append(sum(r["time_min"] for r in routes))
            stats[model_key]["trucks"].append(len(routes))
            stats[model_key]["co2"].append(sum(r["co2_kg"] for r in routes))

            stats[model_key]["fitness_raw"].extend(f_raw)
            stats[model_key]["fitness_best"].extend(f_best)

            if idx == len(scenarios) - 1:
                last_routes_map[model_key] = routes

        if idx == len(scenarios) - 1:
            last_clients_data = clients

    # 4. Summary
    final_summary = {
        "last_routes": last_routes_map,
        "last_clients": last_clients_data,
    }

    print("\n--- mean results ---")
    for m in models_to_run:
        s = stats[m]
        trucks_avg = avg(s["trucks"])

        # ‚úÖ fallback fitness
        series_best = s["fitness_best"] if s["fitness_best"] else s["fitness_raw"]

        final_summary[m] = {
            "cost_avg": avg(s["costs"]) / trucks_avg if trucks_avg else 0,
            "time_avg": avg(s["times"]) / trucks_avg if trucks_avg else 0,
            "trucks_avg": trucks_avg,
            "co2_avg": avg(s["co2"]) / trucks_avg if trucks_avg else 0,
            "series": {
                "distance_km_raw": s["fitness_raw"],
                "distance_km_best": series_best
            }
        }

        print(
            f"{m}: Cost/truck={final_summary[m]['cost_avg']:.1f}, "
            f"Trucks={trucks_avg:.1f}, "
            f"CO2={final_summary[m]['co2_avg']:.1f}"
        )

    # 5. Visuals
    map_html = generate_map_html(final_summary)
    fit_html = generate_fitness_html(final_summary)

    # 6. Save
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    save_on_drive(map_html, f"map_results_{timestamp}.html")
    save_on_drive(fit_html, f"fitness_curves_{timestamp}.html")

    json_summary = json.dumps(final_summary, indent=4)
    save_on_drive(json_summary, f"results_summary_{timestamp}.json")

    return final_summary


if __name__ == "__main__":
    main()


Loaded 1 scenarios.


Running Scenarios: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [02:39<00:00, 159.25s/it]



--- R√©sultats Moyens ---
ACO: Cost/truck=485.1, Trucks=9.0, CO2=37.5
GA: Cost/truck=500.0, Trucks=9.0, CO2=38.7
GREEDY: Cost/truck=533.9, Trucks=9.0, CO2=42.4
SWARM: Cost/truck=485.1, Trucks=9.0, CO2=37.5
RL: Cost/truck=492.7, Trucks=9.0, CO2=38.1
Map HTML enregistr√©: output/supply_chain_compare_model.html
Fitness HTML enregistr√©: output/fitness_curves.html
File salvato su Drive: /content/drive/MyDrive/Colab_Output/map_results_20251220_232257.html
File salvato su Drive: /content/drive/MyDrive/Colab_Output/fitness_curves_20251220_232257.html
File salvato su Drive: /content/drive/MyDrive/Colab_Output/results_summary_20251220_232257.json
