In [0]:
-- ===== LABELS (BINARY) =====
CREATE OR REPLACE TABLE gold.ml_labels AS
SELECT
  fc.claim_id,
  CASE
    WHEN fc.has_denial_any = 1 THEN 1
    WHEN fc.current_balance <= 0 AND fc.has_denial_any = 0 THEN 0
    ELSE NULL
  END AS label
FROM gold.fact_claim fc;

DELETE FROM gold.ml_labels WHERE label IS NULL;

In [0]:
-- ===== FEATURES (NO LEAKAGE, AT SUBMISSION) =====

-- Crude historical payer behavior on current sample
CREATE OR REPLACE TEMP VIEW _payer_hist AS
SELECT payer_id, AVG(CAST(has_denial_any AS DOUBLE)) AS payer_denial_rate_overall
FROM gold.fact_claim
GROUP BY payer_id;

CREATE OR REPLACE TABLE gold.ml_features AS
SELECT
  c.claim_id,
  c.payer_id,
  c.payer_name,
  c.payer_type,
  c.submission_date,
  c.weekday_submitted,
  c.is_weekend_submission,
  c.total_charges,
  c.expected_amount,
  COALESCE(c.ack_event_count,0)        AS ack_event_count,
  COALESCE(c.last_277_status_code,'')  AS last_277_status_code,
  COALESCE(h.payer_denial_rate_overall,0.0) AS payer_denial_rate_overall
FROM silver_curated.claims_current c
LEFT JOIN _payer_hist h USING (payer_id);

CREATE OR REPLACE VIEW gold.ml_training AS
SELECT f.*, l.label
FROM gold.ml_features f
JOIN gold.ml_labels l USING (claim_id);

-- Train XGBoost/LightGBM, log to MLflow, batch-score into gold.scored_claims (claim_id, score), then:

CREATE OR REPLACE VIEW gold.v_claims_current_scored AS
SELECT c.*, s.score AS denial_risk_score
FROM silver_curated.claims_current c
LEFT JOIN gold.scored_claims s USING (claim_id);