Skip to content

Pushdown operations on JSON_VALUE result #28225

@Vladilen

Description

@Vladilen

Problem

When CAST is performed on JSON_VALUE result, the cast itself is not pushdowned, the full JsonDocument is read

Test table

CREATE TABLE `bluesky_subcolumns` (
    id uint64 NOT NULL,
    data JSONDocument,
    PRIMARY KEY (id)
) WITH (STORE=Column, PARTITION_COUNT=64)

Query without cast

Query:

SELECT JSON_VALUE(data, "$.time_us") AS time_us
FROM bluesky_subcolumns
WHERE JSON_VALUE(data, "$.kind") = 'commit'
LIMIT 1;

AST:

(
(let $1 (DataType 'Utf8))
(let $2 (Uint64 '1))
(let $3 (StructType '('"time_us" (OptionalType $1))))
(let $4 '('('"_logical_id" '811) '('"_id" '"edf75cf3-646e0782-58c35990-752e0d4f") '('"_wide_channels" $3)))
(let $5 (DqPhyStage '() (lambda '() (block '(
  (let $9 (KqpTable '"/Root/testdb/bluesky_subcolumns" '"72075186224037897:40" '"" '3))
  (let $10 (KqpBlockReadOlapTableRanges $9 (Void) '('"data") '() '() (lambda '($13) (block '(
    (let $14 (KqpOlapJsonValue '"data" (Utf8 '"$.kind") $1))
    (let $15 (DataType 'Bool))
    (let $16 '('eq $14 (String '"commit") (OptionalType $15)))
    (let $17 '('?? $16 (Bool 'false) $15))
    (let $18 (KqpOlapJsonValue '"data" (Utf8 '"$.time_us") $1))
    (return (KqpOlapProjections (KqpOlapFilter $13 $17) '((KqpOlapProjection $18 '"data"))))
  )))))
  (let $11 (lambda '($19 $20) (BlockAsStruct '('"time_us" (BlockMember (BlockAsStruct '('"data" $19)) '"data"))) $20))
  (let $12 (lambda '($21 $22) (BlockMember $21 '"time_us") $22))
  (return (WideMap (WideTakeBlocks (WideMap (FromFlow $10) $11) $2) $12))
))) $4))
(let $6 (DqCnUnionAll (TDqOutput $5 '"0")))
(let $7 (DqPhyStage '($6) (lambda '($23) (block '(
  (let $24 (lambda '($25 $26) (BlockAsStruct '('"time_us" $25)) $26))
  (return (FromFlow (NarrowMap (ToFlow (WideFromBlocks (WideMap (WideTakeBlocks $23 $2) $24))) (lambda '($27) $27))))
))) '('('"_logical_id" '824) '('"_id" '"41160682-7207f409-187170ba-e713ee23"))))
(let $8 (DqCnResult (TDqOutput $7 '"0") '('"time_us")))
(return (KqpPhysicalQuery '((KqpPhysicalTx '($5 $7) '($8) '() '('('"type" '"generic")))) '((KqpTxResultBinding (ListType $3) '"0" '"0")) '('('"type" '"query"))))
)

SSA:

...
Command {
  Projection {
    Columns {
      Id: 2 -- data column
    }
  }
}

Query with cast

Query:

SELECT CAST(JSON_VALUE(data, "$.time_us") AS Double) AS time_us
FROM bluesky_subcolumns
WHERE JSON_VALUE(data, "$.kind") = 'commit'
LIMIT 1;

AST:

(
(let $1 (Uint64 '1))
(let $2 (OptionalType (DataType 'Double)))
(let $3 (StructType '('"time_us" $2)))
(let $4 '('('"_logical_id" '798) '('"_id" '"7bf5260f-df58e0ef-386b0a3f-c82750e4") '('"_wide_channels" $3)))
(let $5 (DqPhyStage '() (lambda '() (block '(
  (let $9 (KqpTable '"/Root/testdb/bluesky_subcolumns" '"72075186224037897:40" '"" '3))
  (let $10 (DataType 'Utf8))
  (let $11 (KqpBlockReadOlapTableRanges $9 (Void) '('"data") '() '() (lambda '($13) (block '(
    (let $14 (KqpOlapJsonValue '"data" (Utf8 '"$.kind") $10))
    (let $15 (DataType 'Bool))
    (let $16 '('eq $14 (String '"commit") (OptionalType $15)))
    (let $17 '('?? $16 (Bool 'false) $15))
    (return (KqpOlapFilter $13 $17))
  )))))
  (let $12 (lambda '($18 $19) (BlockMember (BlockAsStruct '('"data" $18)) '"data") $19))
  (return (FromFlow (WideMap (ToFlow (WideFromBlocks (WideTakeBlocks (WideMap (FromFlow $11) $12) $1))) (lambda '($20) (block '(
    (let $21 (OptionalType $10))
    (let $22 '((ResourceType '"JsonPath")))
    (let $23 (DictType $10 (ResourceType '"JsonNode")))
    (let $24 (CallableType '() '((VariantType (TupleType (TupleType (DataType 'Uint8) (DataType 'String)) $21))) '((OptionalType (DataType 'JsonDocument))) $22 '($23)))
    (let $25 (Udf '"Json2.JsonDocumentSqlValueConvertToUtf8" (Void) (VoidType) '"" $24 (VoidType) '"" '('('"strict"))))
    (let $26 (CallableType '() $22 '($10)))
    (let $27 (Udf '"Json2.CompilePath" (Void) (VoidType) '"" $26 (VoidType) '"" '()))
    (let $28 (Apply $25 $20 (Apply $27 (Utf8 '"$.time_us")) (Dict $23)))
    (let $29 (Visit $28 '"0" (lambda '($30) (Nothing $21)) '1 (lambda '($31) $31)))
    (return (SafeCast $29 $2))
  ))))))
))) $4))
(let $6 (DqCnUnionAll (TDqOutput $5 '"0")))
(let $7 (DqPhyStage '($6) (lambda '($32) (FromFlow (NarrowMap (Take (ToFlow $32) $1) (lambda '($33) (AsStruct '('"time_us" $33)))))) '('('"_logical_id" '811) '('"_id" '"5316536d-5dbf661e-6006c1c1-c50e2306"))))
(let $8 (DqCnResult (TDqOutput $7 '"0") '('"time_us")))
(return (KqpPhysicalQuery '((KqpPhysicalTx '($5 $7) '($8) '() '('('"type" '"generic")))) '((KqpTxResultBinding (ListType $3) '"0" '"0")) '('('"type" '"query"))))
)

SSA

Command {
  Projection {
    Columns {
      Id: 10 -- JSON_VALUE result
    }
  }
}

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions