Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f40c2c0
add clp module
wraymo Aug 16, 2024
d2c2011
add clp code to velox
wraymo Aug 18, 2024
01b7597
add more methods
wraymo Aug 20, 2024
883f545
add more methods
wraymo Aug 21, 2024
0fbab4d
modify CMakeLists.txt
wraymo Aug 21, 2024
6a75c5f
add clp data source support
wraymo Aug 24, 2024
58fad92
add polymorphic type support
wraymo Aug 26, 2024
a5caad3
remove duplicate values and add null support
wraymo Aug 28, 2024
ee9e419
fix array bugs and the pipe blocking bug
wraymo Aug 28, 2024
f081b21
add concurrent split support
wraymo Sep 30, 2024
72ab7e8
fixed a bug affecting select count(*) queries
wraymo Sep 30, 2024
a76bf19
apply changes from upstream
wraymo Oct 2, 2024
b6d000e
fix some issues
wraymo Oct 3, 2024
e58717e
fix some bugs for concurrent ClppSplits
wraymo Oct 4, 2024
2a7ce15
fix some issues
wraymo Oct 9, 2024
7b3382a
another impl with fork and pipe
wraymo Oct 31, 2024
d1f050e
add clp lib code for clp connector
wraymo Nov 4, 2024
e939616
solve many dependency issues
wraymo Nov 4, 2024
b1bdf5a
add antlr jar
wraymo Nov 5, 2024
1c861f5
add single file archive and network reader support
wraymo Nov 12, 2024
1ecd635
add msgpack dependency
wraymo Nov 13, 2024
6982ffb
add datasouce url support
wraymo Nov 13, 2024
7d52533
add s3 support
wraymo Jan 22, 2025
8397c53
fix several issues
wraymo Jan 24, 2025
ee71aab
not treat warnings as errors
wraymo Jan 28, 2025
a73ea15
change the order
wraymo Jan 28, 2025
52a55d3
refactor the code
wraymo Feb 5, 2025
52b6f18
update clp dep
wraymo Feb 23, 2025
235b4bb
update velox code
wraymo Mar 17, 2025
6a6a0bc
rebase to the latest master branch
wraymo Mar 17, 2025
4b74119
revert IP related changes
wraymo Mar 17, 2025
16932ec
update connector APIs and change the way how we use clpsplit
Mar 18, 2025
0d3b636
fix warnings
Mar 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ tmp/
# Scripts
#==============================================================================#
*.jar
!third-party/antlr/antlr-4.13.1-complete.jar
scripts/PelotonTest/out
scripts/PelotonTest/lib

Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "velox/connectors/clp/search_lib/clp_src"]
path = velox/connectors/clp/search_lib/clp_src
url = https://github.com/y-scope/clp.git
194 changes: 194 additions & 0 deletions CMake/ExternalAntlr4Cpp.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# NOTE: ExternalAntlr4Cpp.cmake taken from
# https://github.com/antlr/antlr4/blob/4.13.1/runtime/Cpp/cmake/ExternalAntlr4Cpp.cmake

cmake_minimum_required(VERSION 3.7)

if(POLICY CMP0114)
cmake_policy(SET CMP0114 NEW)
endif()

include(ExternalProject)

set(ANTLR4_ROOT ${CMAKE_CURRENT_BINARY_DIR}/antlr4_runtime/src/antlr4_runtime)
set(ANTLR4_INCLUDE_DIRS ${ANTLR4_ROOT}/runtime/Cpp/runtime/src)
set(ANTLR4_GIT_REPOSITORY https://github.com/antlr/antlr4.git)
if(NOT DEFINED ANTLR4_TAG)
# Set to branch name to keep library updated at the cost of needing to rebuild after 'clean'
# Set to commit hash to keep the build stable and does not need to rebuild after 'clean'
set(ANTLR4_TAG master)
endif()
Comment on lines +29 to +33
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use of master branch as default tag could lead to non-reproducible builds.

Using the master branch as the default tag for the ANTLR4 repository could lead to non-reproducible builds as the branch can change over time. Consider using a specific version tag or commit hash for better build reproducibility.

if(NOT DEFINED ANTLR4_TAG)
    # Set to branch name to keep library updated at the cost of needing to rebuild after 'clean'
    # Set to commit hash to keep the build stable and does not need to rebuild after 'clean'
-    set(ANTLR4_TAG master)
+    set(ANTLR4_TAG 4.13.1)
endif()
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if(NOT DEFINED ANTLR4_TAG)
# Set to branch name to keep library updated at the cost of needing to rebuild after 'clean'
# Set to commit hash to keep the build stable and does not need to rebuild after 'clean'
set(ANTLR4_TAG master)
endif()
if(NOT DEFINED ANTLR4_TAG)
# Set to branch name to keep library updated at the cost of needing to rebuild after 'clean'
# Set to commit hash to keep the build stable and does not need to rebuild after 'clean'
set(ANTLR4_TAG 4.13.1)
endif()


# Ensure that the include dir already exists at configure time (to avoid cmake erroring
# on non-existent include dirs)
file(MAKE_DIRECTORY "${ANTLR4_INCLUDE_DIRS}")

if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist/$(Configuration))
elseif(${CMAKE_GENERATOR} MATCHES "Xcode.*")
set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist/$(CONFIGURATION))
else()
set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist)
endif()

if(MSVC)
set(ANTLR4_STATIC_LIBRARIES
${ANTLR4_OUTPUT_DIR}/antlr4-runtime-static.lib)
set(ANTLR4_SHARED_LIBRARIES
${ANTLR4_OUTPUT_DIR}/antlr4-runtime.lib)
set(ANTLR4_RUNTIME_LIBRARIES
${ANTLR4_OUTPUT_DIR}/antlr4-runtime.dll)
else()
set(ANTLR4_STATIC_LIBRARIES
${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.a)
if(MINGW)
set(ANTLR4_SHARED_LIBRARIES
${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll.a)
set(ANTLR4_RUNTIME_LIBRARIES
${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll)
elseif(CYGWIN)
set(ANTLR4_SHARED_LIBRARIES
${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll.a)
set(ANTLR4_RUNTIME_LIBRARIES
${ANTLR4_OUTPUT_DIR}/cygantlr4-runtime-${ANTLR4_TAG}.dll)
elseif(APPLE)
set(ANTLR4_RUNTIME_LIBRARIES
${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dylib)
else()
set(ANTLR4_RUNTIME_LIBRARIES
${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.so)
endif()
endif()

if(${CMAKE_GENERATOR} MATCHES ".* Makefiles")
# This avoids
# 'warning: jobserver unavailable: using -j1. Add '+' to parent make rule.'
set(ANTLR4_BUILD_COMMAND $(MAKE))
elseif(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
set(ANTLR4_BUILD_COMMAND
${CMAKE_COMMAND}
--build .
--config $(Configuration)
--target)
elseif(${CMAKE_GENERATOR} MATCHES "Xcode.*")
set(ANTLR4_BUILD_COMMAND
${CMAKE_COMMAND}
--build .
--config $(CONFIGURATION)
--target)
else()
set(ANTLR4_BUILD_COMMAND
${CMAKE_COMMAND}
--build .
--target)
endif()

if(NOT DEFINED ANTLR4_WITH_STATIC_CRT)
set(ANTLR4_WITH_STATIC_CRT ON)
endif()

if(ANTLR4_ZIP_REPOSITORY)
ExternalProject_Add(
antlr4_runtime
PREFIX antlr4_runtime
URL ${ANTLR4_ZIP_REPOSITORY}
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
BUILD_COMMAND ""
BUILD_IN_SOURCE 1
SOURCE_DIR ${ANTLR4_ROOT}
SOURCE_SUBDIR runtime/Cpp
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT}
-DDISABLE_WARNINGS:BOOL=ON
# -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard
# -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project
INSTALL_COMMAND ""
EXCLUDE_FROM_ALL 1)
else()
ExternalProject_Add(
antlr4_runtime
PREFIX antlr4_runtime
GIT_REPOSITORY ${ANTLR4_GIT_REPOSITORY}
GIT_TAG ${ANTLR4_TAG}
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
BUILD_COMMAND ""
BUILD_IN_SOURCE 1
SOURCE_DIR ${ANTLR4_ROOT}
SOURCE_SUBDIR runtime/Cpp
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT}
-DDISABLE_WARNINGS:BOOL=ON
# -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard
# -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project
INSTALL_COMMAND ""
EXCLUDE_FROM_ALL 1)
endif()

# Separate build step as rarely people want both
set(ANTLR4_BUILD_DIR ${ANTLR4_ROOT})
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14.0")
# CMake 3.14 builds in above's SOURCE_SUBDIR when BUILD_IN_SOURCE is true
set(ANTLR4_BUILD_DIR ${ANTLR4_ROOT}/runtime/Cpp)
endif()

ExternalProject_Add_Step(
antlr4_runtime
build_static
COMMAND ${ANTLR4_BUILD_COMMAND} antlr4_static
# Depend on target instead of step (a custom command)
# to avoid running dependent steps concurrently
DEPENDS antlr4_runtime
BYPRODUCTS ${ANTLR4_STATIC_LIBRARIES}
EXCLUDE_FROM_MAIN 1
WORKING_DIRECTORY ${ANTLR4_BUILD_DIR})
ExternalProject_Add_StepTargets(antlr4_runtime build_static)

add_library(antlr4_static STATIC IMPORTED)
add_dependencies(antlr4_static antlr4_runtime-build_static)
set_target_properties(antlr4_static PROPERTIES
IMPORTED_LOCATION ${ANTLR4_STATIC_LIBRARIES})
target_include_directories(antlr4_static
INTERFACE
${ANTLR4_INCLUDE_DIRS}
)

ExternalProject_Add_Step(
antlr4_runtime
build_shared
COMMAND ${ANTLR4_BUILD_COMMAND} antlr4_shared
# Depend on target instead of step (a custom command)
# to avoid running dependent steps concurrently
DEPENDS antlr4_runtime
BYPRODUCTS ${ANTLR4_SHARED_LIBRARIES} ${ANTLR4_RUNTIME_LIBRARIES}
EXCLUDE_FROM_MAIN 1
WORKING_DIRECTORY ${ANTLR4_BUILD_DIR})
ExternalProject_Add_StepTargets(antlr4_runtime build_shared)

add_library(antlr4_shared SHARED IMPORTED)
add_dependencies(antlr4_shared antlr4_runtime-build_shared)
set_target_properties(antlr4_shared PROPERTIES
IMPORTED_LOCATION ${ANTLR4_RUNTIME_LIBRARIES})
target_include_directories(antlr4_shared
INTERFACE
${ANTLR4_INCLUDE_DIRS}
)

if(ANTLR4_SHARED_LIBRARIES)
set_target_properties(antlr4_shared PROPERTIES
IMPORTED_IMPLIB ${ANTLR4_SHARED_LIBRARIES})
endif()
170 changes: 170 additions & 0 deletions CMake/FindANTLR.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# NOTE: FindANTLR.cmake taken from
# https://github.com/antlr/antlr4/blob/4.13.1/runtime/Cpp/cmake/FindANTLR.cmake

# TODO: Clean up ANTLR cmake files
# On macOS, the way Java is installed with brew doesn't also make it the default version of Java on
# the system. So we set JAVA_HOME to the install location here.
if (APPLE)
set(ENV{JAVA_HOME} "/usr/local/opt/openjdk@11/")
endif ()

set(ANTLR4_TAG 4.13.1)
add_definitions(-DANTLR4CPP_STATIC)
set(ANTLR_EXECUTABLE ${PROJECT_SOURCE_DIR}/third-party/antlr/antlr-${ANTLR4_TAG}-complete.jar)
include(ExternalAntlr4Cpp)

find_package(Java 11 REQUIRED COMPONENTS Runtime)

if(NOT ANTLR_EXECUTABLE)
find_program(ANTLR_EXECUTABLE
NAMES antlr.jar antlr4.jar antlr-4.jar antlr-${ANTLR4_TAG}-complete.jar)
endif()

if(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE)
execute_process(
COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}
OUTPUT_VARIABLE ANTLR_COMMAND_OUTPUT
ERROR_VARIABLE ANTLR_COMMAND_ERROR
RESULT_VARIABLE ANTLR_COMMAND_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE)

if(ANTLR_COMMAND_RESULT EQUAL 0)
string(REGEX MATCH "Version [0-9]+(\\.[0-9]+)*" ANTLR_VERSION ${ANTLR_COMMAND_OUTPUT})
string(REPLACE "Version " "" ANTLR_VERSION ${ANTLR_VERSION})
else()
message(
SEND_ERROR
"Command '${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}' "
"failed with the output '${ANTLR_COMMAND_ERROR}'")
endif()

macro(ANTLR_TARGET Name InputFile)
set(ANTLR_OPTIONS LEXER PARSER LISTENER VISITOR)
set(ANTLR_ONE_VALUE_ARGS PACKAGE OUTPUT_DIRECTORY DEPENDS_ANTLR)
set(ANTLR_MULTI_VALUE_ARGS COMPILE_FLAGS DEPENDS)
cmake_parse_arguments(ANTLR_TARGET
"${ANTLR_OPTIONS}"
"${ANTLR_ONE_VALUE_ARGS}"
"${ANTLR_MULTI_VALUE_ARGS}"
${ARGN})
set(ANTLR_${Name}_INPUT ${InputFile})

get_filename_component(ANTLR_INPUT ${InputFile} NAME_WE)
get_filename_component(ANTLR_INPUT_PARENT_DIR "${InputFile}" DIRECTORY)

if(ANTLR_TARGET_OUTPUT_DIRECTORY)
set(ANTLR_${Name}_OUTPUT_DIR ${ANTLR_TARGET_OUTPUT_DIRECTORY})
else()
set(ANTLR_${Name}_OUTPUT_DIR
${CMAKE_CURRENT_BINARY_DIR}/antlr4cpp_generated_src/${ANTLR_INPUT})
endif()

set(ANTLR_${Name}_ORIGINAL_OUTPUT_DIR ${ANTLR_${Name}_OUTPUT_DIR})
if(ANTLR_INPUT_PARENT_DIR)
set(ANTLR_${Name}_OUTPUT_DIR "${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT_PARENT_DIR}")
endif()

unset(ANTLR_${Name}_CXX_OUTPUTS)

if((ANTLR_TARGET_LEXER AND NOT ANTLR_TARGET_PARSER) OR
(ANTLR_TARGET_PARSER AND NOT ANTLR_TARGET_LEXER))
list(APPEND ANTLR_${Name}_CXX_OUTPUTS
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.h
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.cpp)
set(ANTLR_${Name}_OUTPUTS
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.interp
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.tokens)
else()
list(APPEND ANTLR_${Name}_CXX_OUTPUTS
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.h
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.cpp
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.h
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.cpp)
list(APPEND ANTLR_${Name}_OUTPUTS
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.interp
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.tokens)
endif()

if(ANTLR_TARGET_LISTENER)
list(APPEND ANTLR_${Name}_CXX_OUTPUTS
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.h
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.cpp
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.h
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.cpp)
list(APPEND ANTLR_TARGET_COMPILE_FLAGS -listener)
endif()

if(ANTLR_TARGET_VISITOR)
list(APPEND ANTLR_${Name}_CXX_OUTPUTS
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.h
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.cpp
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.h
${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.cpp)
list(APPEND ANTLR_TARGET_COMPILE_FLAGS -visitor)
endif()

if(ANTLR_TARGET_PACKAGE)
list(APPEND ANTLR_TARGET_COMPILE_FLAGS -package ${ANTLR_TARGET_PACKAGE})
endif()

list(APPEND ANTLR_${Name}_OUTPUTS ${ANTLR_${Name}_CXX_OUTPUTS})

if(ANTLR_TARGET_DEPENDS_ANTLR)
if(ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT)
list(APPEND ANTLR_TARGET_DEPENDS
${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT})
list(APPEND ANTLR_TARGET_DEPENDS
${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_OUTPUTS})
else()
message(SEND_ERROR
"ANTLR target '${ANTLR_TARGET_DEPENDS_ANTLR}' not found")
endif()
endif()

message(STATUS "====== ANTLR Debug Variables ======")
message(STATUS "ANTLR Executable: ${ANTLR_EXECUTABLE}")
message(STATUS "Java Executable: ${Java_JAVA_EXECUTABLE}")
message(STATUS "ANTLR Version: ${ANTLR_VERSION}")
message(STATUS "ANTLR Input File: ${ANTLR_${Name}_INPUT}")
message(STATUS "ANTLR Output Directory: ${ANTLR_${Name}_OUTPUT_DIR}")
message(STATUS "ANTLR Output Files: ${ANTLR_${Name}_OUTPUTS}")
message(STATUS "ANTLR Target depend: ${InputFile}")
message(STATUS "===================================")


add_custom_command(
OUTPUT ${ANTLR_${Name}_OUTPUTS}
COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}
${InputFile}
-o ${ANTLR_${Name}_ORIGINAL_OUTPUT_DIR}
-no-listener
-Dlanguage=Cpp
${ANTLR_TARGET_COMPILE_FLAGS}
DEPENDS ${InputFile}
${ANTLR_TARGET_DEPENDS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Building ${Name} with ANTLR ${ANTLR_VERSION}")
endmacro(ANTLR_TARGET)

endif(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
ANTLR
REQUIRED_VARS ANTLR_EXECUTABLE Java_JAVA_EXECUTABLE
VERSION_VAR ANTLR_VERSION
)
Loading
Loading