In [0]:
%run
./_common_helpers

---
### Purpose: ###  
Feed raw EHR bronze data into silver `claim_headers`, `claim_lines` tables.


---
- Normalize identifiers, parse timestamps, drop noise columns.
- Map EHR `workqueue_status` -> claim `current_status`.
- Treat each EHR row as one line; if later on you ingest line-item detail it will remain compatible.
- Bad rows missing `claim_id`, bad money/date go to the `_rejects` table.
---

---
### Assumes: ###  
`as_of_date` is the claim submission snapshot date from the EHR. 

In [0]:
USE CATALOG claims360_dev;
USE SCHEMA silver;
-- ===== 1) COMMON FILTERED BASE: only good rows make past, rest in rejects =====

CREATE OR REPLACE TEMP VIEW stage_ehr_filtered AS
WITH src AS (
  SELECT
    r.*,
    silver.fn_to_date_safe(as_of_date)      AS _submission_date,
    silver.fn_to_ts_safe(as_of_date)        AS _submission_ts,
    silver.fn_to_dec_safe(billed_amt)       AS _total_charges,
    silver.fn_to_dec_safe(expected_amt)     AS _expected_amount,
    upper(trim(workqueue_status))           AS _wq_status
  FROM bronze.ehr_claims_raw r
)
SELECT
  *
FROM src
WHERE claim_id IS NOT NULL
  AND _total_charges IS NOT NULL
  AND _expected_amount IS NOT NULL
  AND _submission_date IS NOT NULL;

In [0]:
-- ===== 2) STAGING VIEW: typed, filtered, deduped =====

CREATE OR REPLACE TEMP VIEW stage_claim_headers AS
WITH base AS (
  SELECT
    upper(trim(claim_id))   AS claim_id,
    upper(trim(payer_id))   AS payer_id,
    upper(trim(patient_id)) AS patient_id,
    silver.fn_to_date_safe(as_of_date)  AS submission_date,
    silver.fn_to_ts_safe(as_of_date)    AS submission_ts,
    silver.fn_to_dec_safe(billed_amt)   AS total_charges,
    silver.fn_to_dec_safe(expected_amt) AS expected_amount,
    upper(trim(workqueue_status)) AS wq_status,
    _ingest_ts, _ingest_file
  FROM bronze.ehr_claims_raw
  WHERE claim_id IS NOT NULL
    AND silver.fn_to_dec_safe(billed_amt) IS NOT NULL
    AND silver.fn_to_date_safe(as_of_date) IS NOT NULL
)
SELECT
  claim_id, payer_id, patient_id,
  submission_date, submission_ts,
  total_charges, expected_amount,
  CASE
    WHEN wq_status IN ('SUBMITTED','RESUBMITTED') THEN 'Submitted'
    WHEN wq_status IN ('IN WQ','PENDING REVIEW')  THEN 'Pending'
    WHEN wq_status = 'DENIED'                     THEN 'Denied'
    ELSE 'Pending'
  END AS current_status,
  submission_ts AS current_status_ts,
  CAST(NULL AS TIMESTAMP) AS last_277ca_event_ts,
  CAST(NULL AS TIMESTAMP) AS last_835_event_ts,
  dayofweek(submission_date) AS weekday_submitted,
  dayofweek(submission_date) IN (1,7) AS is_weekend_submission,
  'EHR' AS source_system,
  _ingest_ts, _ingest_file,
  ROW_NUMBER() OVER (PARTITION BY claim_id ORDER BY submission_ts DESC, _ingest_ts DESC) AS rn
FROM base
QUALIFY rn = 1;

In [0]:
-- ===== 3) TARGET TABLE AND SCD1 UPSERT =====

CREATE TABLE IF NOT EXISTS silver.claim_headers (
  claim_id STRING NOT NULL,
  payer_id STRING,
  patient_id STRING,
  submission_date DATE,
  submission_ts TIMESTAMP,
  total_charges DECIMAL(18,2),
  expected_amount DECIMAL(18,2),
  current_status STRING,
  current_status_ts TIMESTAMP,
  last_277ca_event_ts TIMESTAMP,
  last_835_event_ts TIMESTAMP,
  weekday_submitted INT,
  is_weekend_submission BOOLEAN,
  source_system STRING,
  _ingest_ts TIMESTAMP,
  _ingest_file STRING
)
USING DELTA
CLUSTER BY (claim_id);

MERGE INTO silver.claim_headers t
USING stage_claim_headers s
  ON t.claim_id = s.claim_id
WHEN MATCHED AND (
      s.current_status_ts > t.current_status_ts
   OR (s.current_status_ts = t.current_status_ts AND s._ingest_ts > t._ingest_ts)
) THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *;

In [0]:
-- ===== 4) REJECTS TABLE: create and insert-only merge =====

CREATE TABLE IF NOT EXISTS silver.ehr_claims_rejects (
  reject_key     STRING NOT NULL,   
  claim_id       STRING,
  reject_reason  STRING NOT NULL,
  _ingest_ts     TIMESTAMP,
  _ingest_file   STRING,
  _source_system STRING
)
USING DELTA
TBLPROPERTIES (delta.appendOnly = 'true');

MERGE INTO silver.ehr_claims_rejects AS t
USING (
  WITH typed AS (
    SELECT r.*,
           silver.fn_to_date_safe(as_of_date)  AS _d,
           silver.fn_to_dec_safe(billed_amt)   AS _b,
           silver.fn_to_dec_safe(expected_amt) AS _e
    FROM bronze.ehr_claims_raw r
  ),
  failed AS (
    SELECT *,
      CASE
        WHEN claim_id IS NULL THEN 'MISSING_CLAIM_ID'
        WHEN _b IS NULL       THEN 'BAD_BILLED_AMT'
        WHEN _e IS NULL       THEN 'BAD_EXPECTED_AMT'
        WHEN _d IS NULL       THEN 'BAD_DATE'
        ELSE 'OTHER'
      END AS reject_reason
    FROM typed
    WHERE claim_id IS NULL OR _b IS NULL OR _e IS NULL OR _d IS NULL
  )
  SELECT
    -- idempotent key
    concat_ws('|',
      COALESCE(upper(trim(claim_id)),'<NULL>'),
      COALESCE(_ingest_file,''),
      COALESCE(reject_reason,'')
    ) AS reject_key,
    UPPER(TRIM(claim_id)) AS claim_id,
    reject_reason,
    _ingest_ts,
    _ingest_file,
    _source_system
  FROM failed
) AS s
ON  t.reject_key = s.reject_key
WHEN NOT MATCHED THEN INSERT (
  reject_key, claim_id, reject_reason, _ingest_ts, _ingest_file, _source_system
) VALUES (
  s.reject_key, s.claim_id, s.reject_reason, s._ingest_ts, s._ingest_file, s._source_system
);

In [0]:
-- ===== 5) CLAIM LINES MERGE: one line per EHR row built from same filtered base as headers =====

MERGE INTO silver.claim_lines t
USING (
  WITH base AS (
    SELECT
      upper(trim(r.claim_id))                           AS claim_id,
      1                                                 AS line_number,
      CAST(NULL AS STRING)                              AS rev_code,
      upper(trim(cpt))                                  AS hcpcs_cpt,
      CAST(array() AS ARRAY<STRING>)                    AS modifier_codes,
      CAST(1 AS DECIMAL(18,2))                          AS units,
      CAST(r._total_charges AS DECIMAL(18,2))           AS line_charge,
      CAST(NULL AS STRING)                              AS rendering_npi,
      CAST(array(1) AS ARRAY<INT>)                      AS dx_pointer,
      (r._total_charges >= 5000)                        AS is_high_cost,
      'EHR'                                             AS source_system,
      r._ingest_ts                                      AS _ingest_ts,
      r._ingest_file                                    AS _ingest_file
    FROM stage_ehr_filtered r
  )
  SELECT * FROM base
) s
ON  t.claim_id = s.claim_id AND t.line_number = s.line_number
WHEN MATCHED AND s._ingest_ts > t._ingest_ts THEN UPDATE SET
  rev_code       = s.rev_code,
  hcpcs_cpt      = s.hcpcs_cpt,
  modifier_codes = s.modifier_codes,
  units          = s.units,
  line_charge    = s.line_charge,
  rendering_npi  = s.rendering_npi,
  dx_pointer     = s.dx_pointer,
  is_high_cost   = s.is_high_cost,
  source_system  = s.source_system,
  _ingest_ts     = s._ingest_ts,
  _ingest_file   = s._ingest_file
WHEN NOT MATCHED THEN INSERT (
  claim_id, line_number, rev_code, hcpcs_cpt, modifier_codes, units, line_charge,
  rendering_npi, dx_pointer, is_high_cost, source_system, _ingest_ts, _ingest_file
) VALUES (
  s.claim_id, s.line_number, s.rev_code, s.hcpcs_cpt, s.modifier_codes, s.units, s.line_charge,
  s.rendering_npi, s.dx_pointer, s.is_high_cost, s.source_system, s._ingest_ts, s._ingest_file
);