In [0]:
%sql
CREATE OR REPLACE FUNCTION zg.production_scheduling_demo.down_machines_uc()
RETURNS TABLE (
  machine_id STRING,
  machine_name STRING,
  status STRING,
  reported_at TIMESTAMP
)
RETURN
WITH latest AS (
  SELECT
    machine_id,
    machine_name,
    status,
    reported_at,
    ROW_NUMBER() OVER (PARTITION BY machine_id ORDER BY reported_at DESC) AS rn
  FROM zg.production_scheduling_demo.machine_status
)
SELECT machine_id, machine_name, status, reported_at
FROM latest
WHERE rn = 1
  AND status = 'down';


In [0]:
%sql
CREATE OR REPLACE FUNCTION zg.production_scheduling_demo.optimize_routes_and_calculate_kpis(
  down_machines ARRAY<STRING>,
  planning_days INT,
  commit BOOLEAN
)
RETURNS TABLE (
  order_id STRING,
  machine_id STRING,
  profit DOUBLE,
  p_best DOUBLE,
  processing_hours DOUBLE,
  profit_score DOUBLE,
  base_confidence DOUBLE,
  expected_profit DOUBLE,
  expected_ontime_deliveries DOUBLE,
  factory_capacity_utilization DOUBLE,
  expected_orders_completed INT
)
LANGUAGE PYTHON
COMMENT "Runs greedy assignment and KPI scoring in memory. Commits to UC only if commit=true."
HANDLER "OptimizerHandler"
AS $$
import pandas as pd
from pyspark.sql import SparkSession

class OptimizerHandler:
    def __init__(self):
        self.catalog = "zg"
        self.schema = "production_scheduling_demo"

    def __call__(self, down_machines, planning_days, commit):
        spark = SparkSession.builder.getOrCreate()
        cand = spark.table(f"{self.catalog}.{self.schema}.candidate_routes_scored").toPandas()
        mach = spark.table(f"{self.catalog}.{self.schema}.machines_catalog").toPandas()
        down_machines = set(down_machines or [])

        # --- Greedy assignment logic ---
        machine_caps = {
            m["machine_id"]: float(m["daily_capacity_hours"] * planning_days)
            for _, m in mach.iterrows()
        }
        for m in down_machines:
            machine_caps[m] = 0.0

        order_priority = (
            cand.groupby("order_id")
                .agg(max_profit_score=("profit_score", "max"),
                     min_due=("promised_date", "min"),
                     min_hours=("processing_hours", "min"))
                .sort_values(by=["max_profit_score", "min_due", "min_hours"],
                             ascending=[False, True, True])
                .reset_index()
        )

        cand_by_order = {
            oid: g.sort_values(by=["profit_score", "processing_hours"], ascending=[False, True])
            for oid, g in cand.groupby("order_id", sort=False)
        }

        assigned = []
        for _, rowp in order_priority.iterrows():
            oid = rowp["order_id"]
            group = cand_by_order[oid]
            for _, c in group.iterrows():
                mid = c["machine_id"]
                if mid in down_machines:
                    continue
                need = float(c["processing_hours"])
                if machine_caps.get(mid, 0.0) >= need:
                    machine_caps[mid] -= need
                    assigned.append({
                        "order_id": oid,
                        "machine_id": mid,
                        "profit": float(c["margin"]),
                        "p_best": float(c["p_best"]),
                        "processing_hours": need,
                        "profit_score": float(c["profit_score"]),
                        "base_confidence": float(c["base_confidence"]),
                    })
                    break

        assigned_df = pd.DataFrame(assigned)

        # --- KPI computation (same as your compute_kpis function) ---
        if assigned_df.shape[0] == 0:
            kpis = dict(
                expected_profit=0.0,
                expected_ontime_deliveries=0.0,
                factory_capacity_utilization=0.0,
                expected_orders_completed=0,
            )
        else:
            total_profit = assigned_df["profit"].sum()
            expected_ontime = assigned_df["p_best"].sum()
            total_used = assigned_df["processing_hours"].sum()
            total_capacity = mach["daily_capacity_hours"].sum() * planning_days
            factory_util = total_used / total_capacity
            orders_completed = assigned_df["order_id"].nunique()

            kpis = dict(
                expected_profit=float(total_profit),
                expected_ontime_deliveries=float(expected_ontime),
                factory_capacity_utilization=float(factory_util),
                expected_orders_completed=int(orders_completed),
            )

        # --- Combine outputs ---
        for k, v in kpis.items():
            assigned_df[k] = v

        # --- Commit if confirmed ---
        if commit:
            fq_output = f"{self.catalog}.{self.schema}.assigned_scenario"
            spark.createDataFrame(assigned_df).write.mode("overwrite").saveAsTable(fq_output)

        return spark.createDataFrame(assigned_df)
$$;


In [0]:
%sql
CREATE OR REPLACE FUNCTION zg.production_scheduling_demo.optimize_kpi_summary(
  down_machines ARRAY<STRING>,
  planning_days INT
)
RETURNS TABLE (
  expected_profit DOUBLE,
  expected_ontime_deliveries DOUBLE,
  factory_capacity_utilization DOUBLE,
  expected_orders_completed INT
)
LANGUAGE PYTHON
COMMENT "Lightweight optimizer that runs greedy assignment in memory and returns only KPI summary for conversational previews."
HANDLER "KpiSummaryHandler"
AS $$
import pandas as pd
from pyspark.sql import SparkSession

class KpiSummaryHandler:
    def __init__(self):
        self.catalog = "zg"
        self.schema = "production_scheduling_demo"

    def __call__(self, down_machines, planning_days):
        spark = SparkSession.builder.getOrCreate()
        cand = spark.table(f"{self.catalog}.{self.schema}.candidate_routes_scored").toPandas()
        mach = spark.table(f"{self.catalog}.{self.schema}.machines_catalog").toPandas()
        down_machines = set(down_machines or [])

        machine_caps = {
            m["machine_id"]: float(m["daily_capacity_hours"] * planning_days)
            for _, m in mach.iterrows()
        }
        for m in down_machines:
            machine_caps[m] = 0.0

        # Greedy assignment (simplified)
        order_priority = (
            cand.groupby("order_id")
                .agg(max_profit_score=("profit_score", "max"))
                .sort_values(by="max_profit_score", ascending=False)
                .reset_index()
        )

        cand_by_order = {
            oid: g.sort_values(by=["profit_score", "processing_hours"], ascending=[False, True])
            for oid, g in cand.groupby("order_id", sort=False)
        }

        assigned = []
        for _, rowp in order_priority.iterrows():
            oid = rowp["order_id"]
            for _, c in cand_by_order[oid].iterrows():
                mid = c["machine_id"]
                if mid in down_machines:
                    continue
                need = float(c["processing_hours"])
                if machine_caps.get(mid, 0.0) >= need:
                    machine_caps[mid] -= need
                    assigned.append({
                        "profit": float(c["margin"]),
                        "p_best": float(c["p_best"]),
                        "processing_hours": need,
                        "order_id": oid
                    })
                    break

        assigned_df = pd.DataFrame(assigned)
        if assigned_df.empty:
            return spark.createDataFrame([{
                "expected_profit": 0.0,
                "expected_ontime_deliveries": 0.0,
                "factory_capacity_utilization": 0.0,
                "expected_orders_completed": 0
            }])

        total_profit = assigned_df["profit"].sum()
        expected_ontime = assigned_df["p_best"].sum()
        total_used = assigned_df["processing_hours"].sum()
        total_capacity = mach["daily_capacity_hours"].sum() * planning_days
        factory_util = total_used / total_capacity
        orders_completed = assigned_df["order_id"].nunique()

        return spark.createDataFrame([{
            "expected_profit": float(total_profit),
            "expected_ontime_deliveries": float(expected_ontime),
            "factory_capacity_utilization": float(factory_util),
            "expected_orders_completed": int(orders_completed)
        }])
$$;
