In [90]:
import torch
import os
import argparse
import torch.optim as optim
from torch.nn.utils import clip_grad_norm_
from functions import load_simple_scenarios_with_flexible_context
from pald_implementation import (
    make_pald_base_layer,
    make_pald_flex_purchase_layer,
    make_pald_flex_delivery_layer,
    compute_segment_caps
)
from paad_implementation import get_alpha
from robust_projection import project_y_robust, project_y_flex_robust
from contextual_model import ThresholdPredictor
import paad_implementation as pi
import math
from paad_implementation import objective_function as np_objective_function
import cvxpy as cp
from cvxpylayers.torch import CvxpyLayer
import numpy as np
import pickle
import opt_sol
from tqdm import tqdm
from typing import Dict, List, Tuple


In [91]:
K = 10           # number of segments in piecewise linear approximation for psi
gamma = 1.0     # switching cost parameter for x
delta = 0.5     # switching cost parameter for z (used in analytical threshold)
S = 1.0          # maximum inventory capacity
c_delivery = 0.2
eps_delivery = 0.05
epochs = 10
T = 10

In [92]:
# define a toy instance of osdm where prices decrease from 10 to 1 and then jump up to 10, 
# single unit of base demand at T=10, no flex demand
p_min = 1.0
p_max = 10.0
base_demand_all = [1.0 if t == T-1 else 0.0 for t in range(T)]
flex_demand_all = [0.0 for t in range(T)]

prices = np.linspace(p_max, p_min, T-1).tolist()
prices.append(p_max)

In [93]:
solver_options = {
    # SCS parameters tend to be robust for differentiable layers
    "eps": 1e-5,
    "max_iters": 2000,
    "verbose": False,
}


def _safe_layer_call(layer, args, size=1.0):
    """
    Call a CvxpyLayer and catch SCS/diffcp failures. Returns x_total tensor.
    """
    x_total, x_parts = layer(*args, solver_args=solver_options)
    return x_total

In [132]:
def make_pald_base_layer(K, gamma, ridge=False):
    x_parts = cp.Variable(K, nonneg=True)
    x_total = cp.Variable(nonneg=True)

    x_prev = cp.Parameter(nonneg=True)
    w_prev = cp.Parameter(nonneg=True)
    p_t = cp.Parameter()
    y_vec = cp.Parameter((K,))
    caps = cp.Parameter((K,), nonneg=True)

    constraints = [
        x_parts >= 0,
        x_parts <= caps,
        x_total == cp.sum(x_parts),
        x_total <= 1 - w_prev,
    ]
    ridge = 0
    if ridge:
        # Increase ridge to encourage interior solutions and smoother differentiability
        ridge = 1e-3 * cp.sum_squares(x_parts) + 1e-3 * cp.sum_squares(x_total)
    hit_cost = p_t * x_total
    switch_cost = gamma * cp.abs(x_total - x_prev) + gamma * cp.abs(x_total)
    phi_cost = y_vec @ x_parts
    obj = cp.Minimize(hit_cost + switch_cost - phi_cost + ridge)
    prob = cp.Problem(obj, constraints)
    return CvxpyLayer(prob,
                      parameters=[x_prev, w_prev, p_t, y_vec, caps],
                      variables=[x_total, x_parts])

In [133]:
# helper to compute a (coarse approximation) of the integral over the (piecewise-affine) threshold function phi
def compute_segment_caps(w_prev: float, K: int):
    """Remaining capacity per segment given cumulative fraction w_prev."""
    # Clamp w into [0, 1]
    w = max(0.0, min(1.0, float(w_prev)))
    if 1.0 - w <= 1e-9:
        return [0.0] * K
    caps = []
    for i in range(K):
        left = i / K
        right = (i + 1) / K
        cap = max(0.0, right - max(left, w))
        caps.append(cap)
    return caps

In [134]:
# differentiable torch objective function
# NOTE: Keep everything as torch ops; avoid Python floats that can break the graph.
def torch_objective(p_seq, x_seq, z_seq, gamma, delta, c, eps):
    """Torch version of objective_function for differentiable PALD cost.
    Inputs are torch 1D tensors of length T (float32).
    Mirrors paad_implementation.objective_function.
    """
    Tn = p_seq.shape[0]
    # state of charge s[0..T]
    s = []
    s_prev = torch.zeros((), dtype=torch.float32)
    s.append(s_prev)
    for t in range(1, Tn + 1):
        s_t = torch.clamp(s_prev + x_seq[t - 1] - z_seq[t - 1], min=0.0)
        s.append(s_t)
        s_prev = s_t
    s_torch = torch.stack(s)

    # Costs
    cost_purchasing = (p_seq * x_seq).sum()
    switching_cost_x = gamma * (x_seq[1:] - x_seq[:-1]).abs().sum() if Tn > 1 else torch.tensor(0.0)
    switching_cost_z = delta * (z_seq[1:] - z_seq[:-1]).abs().sum() if Tn > 1 else torch.tensor(0.0)
    s_prev_seq = s_torch[:-1]
    discharge_cost = (p_seq * (c * z_seq + eps * z_seq - c * s_prev_seq * z_seq)).sum()
    return cost_purchasing + switching_cost_x + switching_cost_z + discharge_cost

In [135]:
# define the base demand cvxpylayer
pald_base_layer = make_pald_base_layer(K, gamma, ridge=False)

In [136]:
# y_vec is the values of the piecewise affine threshold function -- let's see what happens if we set it HIGH
# IMPORTANT: To get valid gradients w.r.t. y_vec, we must keep decisions as torch Tensors and avoid .item()/.round()
y_vec_large = torch.tensor([p_max for i in range(K)], dtype=torch.float32, requires_grad=True)


def simulate_one_driver(y_vec, prices, base_demand_all, flex_demand_all, ridge=False):
    """
    Simulate one base driver with given y_vec and return a differentiable loss.
    Decisions remain as torch.Tensors so autograd can backprop to y_vec through the CvxpyLayer.
    """
    # torch state (kept differentiable across time)
    x_prev = torch.tensor(0.0, dtype=torch.float32)  # previous decision (scalar)
    w_prev = torch.tensor(0.0, dtype=torch.float32)  # cumulative fraction delivered

    torch_decisions = []

    for t in range(T):
        p_t = prices[t]
        # Prepare parameters for the layer
        x_prev_t = x_prev.view(1)
        w_prev_t = w_prev.view(1)
        p_t_t = torch.tensor([p_t], dtype=torch.float32)

        # Segment caps are not differentiated; treat them as constants derived from current w_prev
        caps_list = compute_segment_caps(float(w_prev.detach().cpu().item()), K)
        caps_t = torch.tensor(caps_list, dtype=torch.float32)

        # Solve the per-step convex problem via CVXPYLayer (returns x_total, x_parts)
        x_total_t = _safe_layer_call(
            pald_base_layer, (x_prev_t, w_prev_t, p_t_t, y_vec, caps_t)
        )
        # Squeeze to scalar for arithmetic
        x_total_scalar = x_total_t.squeeze()

        # On the final step, force completion if any remaining fraction < 1.0
        if t == T - 1:
            # Add just enough slack to finish any remainder (keeps graph intact)
            remainder = torch.clamp(1.0 - (w_prev + x_total_scalar), min=0.0)
            x_total_scalar = x_total_scalar + remainder

        torch_decisions.append(x_total_scalar)

        # Update state for the next step (kept differentiable)
        w_prev = w_prev + x_total_scalar
        x_prev = x_total_scalar

    # Stack decisions and compute differentiable objective
    x_seq = torch.stack(torch_decisions)  # shape [T]
    z_seq = torch.tensor(base_demand_all, dtype=torch.float32)  # no flex here; constant

    loss = torch_objective(
        torch.tensor(prices, dtype=torch.float32),
        x_seq,
        z_seq,
        gamma,
        delta,
        c_delivery,
        eps_delivery,
    )

    # For visibility only (no graph break for the loss)
    with torch.no_grad():
        pretty = [round(float(v), 4) for v in x_seq]
        print("Decisions:", pretty)
        print("Cost of this solution:", float(loss))

    return loss


# Run and backprop to inspect gradients w.r.t y_vec
loss = simulate_one_driver(y_vec_large, prices, base_demand_all, flex_demand_all)
loss.backward()
print("Gradients of cost w.r.t. y_vec (large init):", y_vec_large.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_large - lr * y_vec_large.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [0.0, 0.0, 1.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0]
Cost of this solution: 10.75
Gradients of cost w.r.t. y_vec (large init): tensor([6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09,
        6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09])
New y_vec after one gradient step: tensor([9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936,
        9.9936], grad_fn=<SubBackward0>)


In [137]:
# what if we make y_vec small?
y_vec_small = torch.tensor([p_min for i in range(K)], dtype=torch.float32, requires_grad=True)

loss_small = simulate_one_driver(y_vec_small, prices, base_demand_all, flex_demand_all)
loss_small.backward()
print("Gradients of cost w.r.t. y_vec (small init):", y_vec_small.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_small - lr * y_vec_small.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [-0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 1.0]
Cost of this solution: 14.0
Gradients of cost w.r.t. y_vec (small init): tensor([3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10,
        3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10])
New y_vec after one gradient step: tensor([0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996,
        0.9996], grad_fn=<SubBackward0>)


In [138]:
# the above is a big problem because the threshold is now outside the range [p_min, p_max] !

In [139]:
# what is the optimal solution?
flex_demand_all = [0.0 for t in range(T)]
Deltas = [0.0 for t in range(T)]
status, results = opt_sol.optimal_solution(T, prices, gamma, delta, c_delivery, eps_delivery, S, base_demand_all, flex_demand_all, Deltas)
if status == "Optimal" and results is not None:
    decisions = results['x']
    opt_z = results['z']
    opt_s = results['s'][1:]
    # Use numpy objective for consistency
    opt_cost = np_objective_function(T, prices, gamma, delta, c_delivery, eps_delivery, decisions, opt_z)
decisions = [round(d, 4) for d in decisions]
print("Optimal decisions: ", decisions)
print("Optimal cost: ", np_objective_function(T, prices, gamma, delta, c_delivery, eps_delivery, decisions, base_demand_all))

Optimal decisions:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0]
Optimal cost:  3.5625


In [140]:
# what if we add a penalty to encourage the drivers to "purchase early" (instead of relying on compulsory trade)

def simulate_one_driver_penalty(y_vec, prices, base_demand_all, flex_demand_all):
    """
    Simulate one base driver with given y_vec and return a differentiable loss.
    Decisions remain as torch.Tensors so autograd can backprop to y_vec through the CvxpyLayer.
    """
    # torch state (kept differentiable across time)
    x_prev = torch.tensor(0.0, dtype=torch.float32)  # previous decision (scalar)
    w_prev = torch.tensor(0.0, dtype=torch.float32)  # cumulative fraction delivered
    x_topup = torch.tensor(0.0, dtype=torch.float32)

    torch_decisions = []

    for t in range(T):
        p_t = prices[t]
        # Prepare parameters for the layer
        x_prev_t = x_prev.view(1)
        w_prev_t = w_prev.view(1)
        p_t_t = torch.tensor([p_t], dtype=torch.float32)

        # Segment caps are not differentiated; treat them as constants derived from current w_prev
        caps_list = compute_segment_caps(float(w_prev.detach().cpu().item()), K)
        caps_t = torch.tensor(caps_list, dtype=torch.float32)

        # Solve the per-step convex problem via CVXPYLayer (returns x_total, x_parts)
        x_total_t = _safe_layer_call(
            pald_base_layer, (x_prev_t, w_prev_t, p_t_t, y_vec, caps_t)
        )
        # Squeeze to scalar for arithmetic
        x_total_scalar = x_total_t.squeeze()

        # On the final step, force completion if any remaining fraction < 1.0
        if t == T - 1:
            # Add just enough slack to finish any remainder (keeps graph intact)
            remainder = torch.clamp(1.0 - (w_prev + x_total_scalar), min=0.0)
            x_total_scalar = x_total_scalar + remainder
            x_topup = remainder

        torch_decisions.append(x_total_scalar)

        # Update state for the next step (kept differentiable)
        w_prev = w_prev + x_total_scalar
        x_prev = x_total_scalar

    # Stack decisions and compute differentiable objective
    x_seq = torch.stack(torch_decisions)  # shape [T]
    z_seq = torch.tensor(base_demand_all, dtype=torch.float32)

    loss = torch_objective(
        torch.tensor(prices, dtype=torch.float32),
        x_seq,
        z_seq,
        gamma,
        delta,
        c_delivery,
        eps_delivery,
    )
    # add a penalty to the loss proportional to x_topup
    lamda = torch.tensor(200.0, dtype=torch.float32)
    loss += lamda * remainder

    # For visibility only (no graph break for the loss)
    with torch.no_grad():
        pretty = [round(float(v), 4) for v in x_seq]
        print("Decisions:", pretty)
        print("Augmented cost of this solution:", float(loss))

    return loss

In [141]:
y_vec_large = torch.tensor([p_max for i in range(K)], dtype=torch.float32, requires_grad=True)
# Run and backprop to inspect gradients w.r.t y_vec
loss = simulate_one_driver_penalty(y_vec_large, prices, base_demand_all, flex_demand_all)
loss.backward()
print("Gradients of augmented cost w.r.t. y_vec (large init):", y_vec_large.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_large - lr * y_vec_large.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [0.0, 0.0, 1.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0]
Augmented cost of this solution: 10.75
Gradients of augmented cost w.r.t. y_vec (large init): tensor([6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09,
        6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09])
New y_vec after one gradient step: tensor([9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936,
        9.9936], grad_fn=<SubBackward0>)


In [142]:
# what if we make y_vec small?
y_vec_small = torch.tensor([p_min for i in range(K)], dtype=torch.float32, requires_grad=True)

loss_small = simulate_one_driver_penalty(y_vec_small, prices, base_demand_all, flex_demand_all)
loss_small.backward()
print("Gradients of cost w.r.t. y_vec (small init):", y_vec_small.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_small - lr * y_vec_small.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [-0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 1.0]
Augmented cost of this solution: 214.0
Gradients of cost w.r.t. y_vec (small init): tensor([6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08,
        6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08])
New y_vec after one gradient step: tensor([0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352,
        0.9352], grad_fn=<SubBackward0>)


In [143]:
# even if I add a large penalty to the compulsory trade (forced top-ups), the gradient is still pointing down?

# when I train using train_pald_contextual.py, this is one of the key issues I run into -- the NN learns that it should predict 

In [144]:
# retry with ridge enabled, but no penalty
# define the base demand cvxpylayer
pald_base_layer = make_pald_base_layer(K, gamma, ridge=True)

In [145]:
y_vec_large = torch.tensor([p_max for i in range(K)], dtype=torch.float32, requires_grad=True)
# Run and backprop to inspect gradients w.r.t y_vec
loss = simulate_one_driver(y_vec_large, prices, base_demand_all, flex_demand_all)
loss.backward()
print("Gradients of cost w.r.t. y_vec (large init):", y_vec_large.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_large - lr * y_vec_large.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [0.0, 0.0, 1.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0]
Cost of this solution: 10.75
Gradients of cost w.r.t. y_vec (large init): tensor([6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09,
        6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09])
New y_vec after one gradient step: tensor([9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936,
        9.9936], grad_fn=<SubBackward0>)


In [147]:
# what if we make y_vec small?
y_vec_small = torch.tensor([p_min for i in range(K)], dtype=torch.float32, requires_grad=True)

loss_small = simulate_one_driver(y_vec_small, prices, base_demand_all, flex_demand_all)
loss_small.backward()
print("Gradients of cost w.r.t. y_vec (small init):", y_vec_small.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_small - lr * y_vec_small.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [-0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 1.0]
Cost of this solution: 14.0
Gradients of cost w.r.t. y_vec (small init): tensor([3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10,
        3.5394e-10, 3.5394e-10, 3.5394e-10, 3.5394e-10])
New y_vec after one gradient step: tensor([0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996, 0.9996,
        0.9996], grad_fn=<SubBackward0>)


In [148]:
# with penalty and ridge
pald_base_layer = make_pald_base_layer(K, gamma, ridge=True)
y_vec_large = torch.tensor([p_max for i in range(K)], dtype=torch.float32, requires_grad=True)
# Run and backprop to inspect gradients w.r.t y_vec
loss = simulate_one_driver_penalty(y_vec_large, prices, base_demand_all, flex_demand_all)
loss.backward()
print("Gradients of augmentedcost w.r.t. y_vec (large init):", y_vec_large.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_large - lr * y_vec_large.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [0.0, 0.0, 1.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0]
Augmented cost of this solution: 10.75
Gradients of augmentedcost w.r.t. y_vec (large init): tensor([6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09,
        6.4438e-09, 6.4438e-09, 6.4438e-09, 6.4438e-09])
New y_vec after one gradient step: tensor([9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936, 9.9936,
        9.9936], grad_fn=<SubBackward0>)


  warn(


In [149]:
# what if we make y_vec small?
y_vec_small = torch.tensor([p_min for i in range(K)], dtype=torch.float32, requires_grad=True)

loss_small = simulate_one_driver_penalty(y_vec_small, prices, base_demand_all, flex_demand_all)
loss_small.backward()
print("Gradients of cost w.r.t. y_vec (small init):", y_vec_small.grad)
# take one big step in the direction of the gradient to see which direction it's going to take us
lr = 1_000_000
new_y_vec = y_vec_small - lr * y_vec_small.grad
print("New y_vec after one gradient step:", new_y_vec)

Decisions: [-0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 1.0]
Augmented cost of this solution: 214.0
Gradients of cost w.r.t. y_vec (small init): tensor([6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08,
        6.4807e-08, 6.4807e-08, 6.4807e-08, 6.4807e-08])
New y_vec after one gradient step: tensor([0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352, 0.9352,
        0.9352], grad_fn=<SubBackward0>)


  warn(
