In [0]:
%run
./_common_helpers

---
### Purpose: ###  
Feed raw EHR bronze data into silver `claim_headers`, `claim_lines` tables


---
- Normalize types
- Map EHR `workqueue_status` -> claim `current_status`
- Treat each EHR row as one line; if later on you ingest line-item detail it will remain compatible.
- Bad rows missing `claim_id`, bad money/date go to the `_rejects` table
---

---
### Assumes: ###  
`as_of_date` is the claim submission snapshot date from the EHR. 

In [0]:
-- EHR to silver.claim_headers

-- Quarantine first
CREATE OR REPLACE TABLE silver.ehr_claim_headers_rejects AS
SELECT *
FROM bronze.ehr_claims_raw
WHERE claim_id IS NULL
  OR fn_to_dec_safe(billed_amt) IS NULL
  OR fn_to_date_safe(as_of_date) IS NULL;

-- Create the target once with constraints
CREATE TABLE IF NOT EXISTS silver.claim_headers (
  claim_id STRING NOT NULL,
  payer_id STRING,
  patient_id STRING,
  submission_date DATE,
  submission_ts TIMESTAMP,
  total_charges DECIMAL(18,2),
  expected_amount DECIMAL(18,2),
  current_status STRING,
  current_status_ts TIMESTAMP,
  last_277ca_event_ts TIMESTAMP,
  last_835_event_ts TIMESTAMP,
  weekday_submitted INT,
  is_weekend_submission BOOLEAN,
  source_system STRING,
  _ingest_ts TIMESTAMP,
  _ingest_file STRING,
  CONSTRAINT check_status CHECK (current_status IN ('Submitted','Pending','Denied','Accepted','Rejected'))
)
USING DELTA
CLUSTER BY (claim_id);

-- Stage the latest per-claim for this load window
CREATE OR REPLACE TEMP VIEW stg_ehr_claim_headers AS
SELECT *
FROM (
  SELECT
    upper(trim(claim_id)) AS claim_id,
    upper(trim(payer_id)) AS payer_id,
    upper(trim(patient_id)) AS patient_id,
    fn_to_date_safe(as_of_date) AS submission_date,
    fn_to_ts_safe(as_of_date)   AS submission_ts,
    fn_to_dec_safe(billed_amt)  AS total_charges,
    fn_to_dec_safe(expected_amt) AS expected_amount,
    CASE upper(trim(workqueue_status))
      WHEN 'SUBMITTED' THEN 'Submitted'
      WHEN 'RESUBMITTED' THEN 'Submitted'
      WHEN 'IN WQ' THEN 'Pending'
      WHEN 'PENDING REVIEW' THEN 'Pending'
      WHEN 'DENIED' THEN 'Denied'
      ELSE 'Pending'
    END AS current_status,
    fn_to_ts_safe(as_of_date)   AS current_status_ts,
    CAST(NULL AS TIMESTAMP)     AS last_277ca_event_ts,
    CAST(NULL AS TIMESTAMP)     AS last_835_event_ts,
    dayofweek(fn_to_date_safe(as_of_date)) AS weekday_submitted,
    CASE WHEN dayofweek(fn_to_date_safe(as_of_date)) IN (1,7) THEN true ELSE false END AS is_weekend_submission,
    'EHR' AS source_system,
    _ingest_ts, _ingest_file,
    ROW_NUMBER() OVER (PARTITION BY upper(trim(claim_id))
                       ORDER BY fn_to_ts_safe(as_of_date) DESC, _ingest_ts DESC) AS rn
  FROM bronze.ehr_claims_raw
  WHERE claim_id IS NOT NULL
    AND fn_to_dec_safe(billed_amt) IS NOT NULL
    AND fn_to_date_safe(as_of_date) IS NOT NULL
)
WHERE rn = 1;

-- Type-1 merge; latest current_status_ts is primary winner, then _ingest_ts
MERGE INTO silver.claim_headers t
USING stg_ehr_claim_headers s
ON t.claim_id = s.claim_id
WHEN MATCHED AND
     (s.current_status_ts >  t.current_status_ts OR
     (s.current_status_ts = t.current_status_ts AND s._ingest_ts > t._ingest_ts))
THEN UPDATE SET
  t.payer_id = s.payer_id,
  t.patient_id = s.patient_id,
  t.submission_date = s.submission_date,
  t.submission_ts = s.submission_ts,
  t.total_charges = s.total_charges,
  t.expected_amount = s.expected_amount,
  t.current_status = s.current_status,
  t.current_status_ts = s.current_status_ts,
  t.weekday_submitted = s.weekday_submitted,
  t.is_weekend_submission = s.is_weekend_submission,
  t.source_system = s.source_system,
  t._ingest_ts = s._ingest_ts,
  t._ingest_file = s._ingest_file
WHEN NOT MATCHED
THEN INSERT *
;

In [0]:
-- EHR to silver.claim_lines

CREATE OR REPLACE TABLE silver.ehr_claim_lines_rejects AS
SELECT *
FROM bronze.ehr_claims_raw
WHERE claim_id IS NULL
  OR fn_to_dec_safe(billed_amt) IS NULL
  OR fn_to_date_safe(as_of_date) IS NULL;

-- Each EHR row becomes one line
CREATE OR REPLACE TABLE silver.claim_lines AS
SELECT 
  upper(trim(r.claim_id))                                      AS claim_id,
  1                                                            AS line_number,
  cast(NULL as string)                                         AS rev_code,
  upper(trim(cpt))                                             AS hcpcs_cpt,
  array()                                                      AS modifier_codes,
  CAST(1 AS DECIMAL(18,2))                                     AS units,
  fn_to_dec_safe(billed_amt)                                   AS line_charge,
  NULL                                                         AS rendering_npi,
  array(1)                                                     AS dx_pointer,
  CASE WHEN fn_to_dec_safe(billed_amt) >= 5000 THEN true ELSE false END AS is_high_cost,
  'EHR'                                                        AS source_system,
  _ingest_ts,
  _ingest_file
FROM bronze.ehr_claims_raw r
WHERE r.claim_id IS NOT NULL
  AND fn_to_dec_safe(billed_amt) IS NOT NULL
  AND fn_to_date_safe(as_of_date) IS NOT NULL;