Skip to content

Commit 88ee1ab

Browse files
authoredMar 17, 2025
Merge pull request #22 from quackscience/fix/read_parquet_mergetree
Fix/read parquet mergetree
2 parents 4f927ab + ef1779b commit 88ee1ab

8 files changed

+341
-112
lines changed
 

‎.github/workflows/MainDistributionPipeline.yml

+11-10
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,20 @@ concurrency:
1515
cancel-in-progress: true
1616

1717
jobs:
18-
duckdb-next-build:
19-
name: Build extension binaries (next)
20-
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
21-
with:
22-
duckdb_version: main
23-
ci_tools_version: main
24-
extension_name: chsql
18+
# Temporarily disabled because main is broken
19+
# duckdb-next-build:
20+
# name: Build extension binaries (next)
21+
# uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
22+
# with:
23+
# duckdb_version: 1.1.2
24+
# ci_tools_version: 1.1.2
25+
# extension_name: chsql
2526

2627
duckdb-stable-build:
2728
name: Build extension binaries
28-
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.3
29+
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.1
2930
with:
30-
duckdb_version: v1.1.3
31-
ci_tools_version: v1.1.3
31+
duckdb_version: v1.2.1
32+
ci_tools_version: v1.2.1
3233
extension_name: chsql
3334

‎chsql/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ set(TARGET_NAME chsql)
77
find_package(OpenSSL REQUIRED)
88
set(EXTENSION_NAME ${TARGET_NAME}_extension)
99
set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
10+
set(CHSQL_DUCKDB_VERSION ${DUCKDB_MAJOR_VERSION})
1011
project(${TARGET_NAME})
1112

1213
include_directories(
@@ -21,7 +22,7 @@ include_directories(
2122
../duckdb/third_party/mbedtls
2223
../duckdb/third_party/mbedtls/include
2324
../duckdb/third_party/brotli/include)
24-
set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp)
25+
set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp)
2526
build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
2627
build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
2728
# Link OpenSSL in both the static library as the loadable extension

‎chsql/extension_config.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This file is included by DuckDB's build system. It specifies which extension to load
2-
2+
set(CHSQL_DUCKDB_VERSION ${DUCKDB_MAJOR_VERSION})
33
include_directories(
44
./src/include
55
${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include
+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//
2+
// Created by hromozeka on 10.03.25.
3+
//
4+
5+
#ifndef PARQUET_TYPES_H
6+
#define PARQUET_TYPES_H
7+
8+
9+
#include "duckdb.hpp"
10+
#include <parquet_types.h>
11+
12+
struct ParquetType {
13+
/*duckdb_parquet::ConvertedType::type -> replaced to int to support -1 nodata value*/
14+
int converted_type;
15+
/* duckdb_parquet::Type::type -> replaced to int to support -1 for no matter value */
16+
int parquet_type;
17+
const duckdb::LogicalType logical_type;
18+
ParquetType(int converted_type, int parquet_type, const duckdb::LogicalType &logical_type)
19+
: converted_type(converted_type), parquet_type(parquet_type), logical_type(logical_type) {}
20+
virtual bool check_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx);
21+
virtual duckdb::LogicalType get_logical_type(const duckdb_parquet::SchemaElement &schema);
22+
};
23+
24+
struct LogicalParquetType : public ParquetType {
25+
bool (*get_isset)(const duckdb_parquet::SchemaElement& el);
26+
27+
LogicalParquetType(bool (*get_isset) (const duckdb_parquet::SchemaElement& el),
28+
const duckdb::LogicalType& logical_type)
29+
: ParquetType(-1, duckdb_parquet::Type::type::INT32, logical_type), get_isset(get_isset) {}
30+
bool check_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx) override;
31+
};
32+
33+
struct JSONParquetType : public ParquetType {
34+
JSONParquetType(): ParquetType(duckdb_parquet::ConvertedType::JSON, -1, duckdb::LogicalType::SQLNULL) {}
35+
duckdb::LogicalType get_logical_type(const duckdb_parquet::SchemaElement &schema) override;
36+
};
37+
38+
struct DecimalParquetType : public ParquetType {
39+
DecimalParquetType(): ParquetType(-1, duckdb_parquet::Type::type::INT32, duckdb::LogicalType::SQLNULL) {}
40+
bool check_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx) override;
41+
duckdb::LogicalType get_logical_type(const duckdb_parquet::SchemaElement &schema) override;
42+
};
43+
44+
class ParquetTypesManager {
45+
protected:
46+
static ParquetTypesManager *instance;
47+
static std::mutex instance_mutex;
48+
ParquetTypesManager();
49+
static ParquetTypesManager* get_instance();
50+
duckdb::LogicalType derive_logical_type(const duckdb_parquet::SchemaElement &s_ele, bool binary_as_string);
51+
public:
52+
static duckdb::LogicalType get_logical_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx);
53+
};
54+
55+
#endif //PARQUET_TYPES_H

0 commit comments

Comments
 (0)
Failed to load comments.