From fc3980af7edb87e5298fc5b3c075db7f524aa6a9 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Fri, 8 Sep 2023 18:00:37 +0800 Subject: [PATCH] [GIE Compiler] fix bugs of columnId in schema refactor(flex): Replace the Adhoc csv reader with Arrow CSV reader (#3154) 1. Use Arrow CSV Reader to replace current adhoc csv reader, to support more configurable options in `bulk_load.yaml`. 2. Introduce `CSVFragmentLoader`, `BasicFragmentLoader` for `MutablePropertyFragment`. With this PR merged, `MutablePropertyFragment` will support loading fragment from csv with options: - delimeter: default '|' - header_row: default true - quoting: default false - quoting_char: default '"' - escaping: default false - escaping_char: default'\\' - batch_size: the batch size of when reading file into memory, default 1MB. - batch_reader: default false. If set to true, `arrow::csv::StreamingReader` will be used to parse the input file. Otherwise, `arrow::TableReader` will be used. With this PR merged, the performance of graph loading will be improved. The Adhoc Reader denote the current implemented csv parser, 1,2,4,8 denotes the parallelism of graph loading, i.e. how many labels of vertex/edge are concurrently processed. Note that TableReader is around 10x faster than StreamingReader. The possible reason could be the multi-threading is used. See [arrow-csv-doc](https://arrow.apache.org/docs/cpp/csv.html) for details. | Reader | Phase | 1 | 2 | 4 | 8 | | --------- | -------------- | ---------- |---------- |---------- |---------- | | Adhoc Reader | ReadFile\+LoadGraph |805s| 468s| 349s| 313s| | Adhoc Reader | Serialization | 126s| 126s| 126s| 126s| | Adhoc Reader | **Total** |931s| 594s| 475s| 439s| | Table Reader | ReadFile | 9s |9s |9s| 9s| | Table Reader | LoadGraph |455s| 280s| 211s| 182s| | Table Reader |Serialization |126s| 126s| 126s| 126s| | Table Reader | **Total** | 600s| 415s| 346s| 317s| | Streaming Reader | ReadFile |91s| 91s| 91s| 91s| | Streaming Reader | LoadGraph | 555s| 289s| 196s| 149s| | Streaming Reader | Serialization |126s| 126s| 126s| 126s| | Streaming Reader | **Total** | 772s| 506s| 413s| 366s| | Reader | Phase | 1 | 2 | 4 | 8 | | --------- | -------------- | ---------- |---------- |---------- |---------- | | Adhoc Reader | ReadFile\+LoadGraph |2720s| 1548s| 1176s| 948s| | Adhoc Reader | Serialization | 409s| 409s| 409s| 409s| | Adhoc Reader | **Total** | 3129s| 1957s| 1585s| 1357s| | Table Reader | ReadFile |24s| 24s| 24s| 24s| | Table Reader | LoadGraph |1576s| 949s| 728s| 602s| | Table Reader |Serialization |409s| 409s| 409s| 409s| | Table Reader | **Total** | 2009s| 1382s| 1161s| 1035s| | Streaming Reader | ReadFile |300s| 300s| 300s| 300s| | Streaming Reader | LoadGraph | 1740s| 965s| 669s| 497s| | Streaming Reader | Serialization | 409s| 409s| 409s| 409s| | Streaming Reader | **Total** | 2539s| 1674s| 1378s| 1206s| | Reader | Phase | 1 | 2 | 4 | 8 | | --------- | -------------- | ---------- |---------- |---------- |---------- | | Adhoc Reader | ReadFile\+LoadGraph | 8260s| 4900s |3603s |2999s| | Adhoc Reader | Serialization | 1201s | 1201s| 1201s |1201s| | Adhoc Reader | **Total** | 9461s| 6101s | 4804s |4200s| | Table Reader | ReadFile | 73s |73s| 96s| 96s| | Table Reader | LoadGraph |4650s| 2768s| 2155s |1778s| | Table Reader |Serialization | 1201s | 1201s| 1201s |1201s| | Table Reader | **Total** | 5924s| 4042s| 3452s| 3075s| | Streaming Reader | ReadFile | 889s |889s | 889s| 889s| | Streaming Reader | LoadGraph | 5589s| 3005s| 2200s| 1712s| | Streaming Reader | Serialization | 1201s| 1201s| 1201s |1201s | | Streaming Reader | **Total** | 7679s | 5095s |4290s| 3802s| FIx #3116 minor fix and move modern graph fix grin test todo: do_start fix fix stash fix fix make rules unique dockerfile stash minor change remove plugin-dir fix minor fix debug debug fix fix fix bulk_load.yaml bash format some fix fix format fix grin test some fi check ci fix ci set fix ci fix dd f disable tmate fix some bug fix fix refactor fix fix fix minor some fix fix support default src_dst primarykey mapping in bulk load fix fix fix fix Ci rename fix java and add get_person_name.cypher --- .github/workflows/flex.yml | 4 +- .github/workflows/hqps-db-ci.yml | 30 +- .gitignore | 6 +- flex/bin/load_plan_and_gen.sh | 106 +- flex/bin/sync_server.cc | 66 +- flex/codegen/src/hqps/hqps_scan_builder.h | 51 +- .../graph_db/grin/src/topology/structure.cc | 30 +- flex/engines/graph_db/grin/test/test.c | 14 +- .../hqps_db/core/operator/edge_expand.h | 49 +- flex/engines/hqps_db/core/operator/limit.h | 1 + flex/engines/hqps_db/core/utils/keyed.h | 15 + flex/engines/hqps_db/database/adj_list.h | 22 +- .../hqps_db/database/mutable_csr_interface.h | 19 +- .../multi_edge_set/untyped_edge_set.h | 97 +- flex/engines/http_server/stored_procedure.cc | 36 +- flex/engines/http_server/stored_procedure.h | 16 +- flex/interactive/README.md | 58 + flex/interactive/bin/db_admin.sh | 654 ------ flex/interactive/bin/gs_interactive | 1993 +++++++++++++++++ flex/interactive/conf/engine_config.yaml | 13 + flex/interactive/conf/interactive.properties | 8 - flex/interactive/conf/interactive.yaml | 26 +- flex/interactive/data/ldbc/graph.json | 128 -- flex/interactive/data/ldbc/graph.yaml | 70 - .../docker/interactive-runtime.Dockerfile | 3 +- flex/interactive/examples/modern_graph | 1 - .../examples}/modern_graph/bulk_load.yaml | 2 +- .../modern_graph/count_vertex_num.cypher | 1 + .../modern_graph/get_person_name.cypher | 1 + .../examples}/modern_graph/modern_graph.yaml | 4 - .../examples}/modern_graph/person.csv | 0 .../modern_graph/person_created_software.csv | 0 .../modern_graph/person_knows_person.csv | 0 .../examples}/modern_graph/software.csv | 0 flex/interactive/examples/movies/ACTED_IN.csv | 173 ++ flex/interactive/examples/movies/DIRECTED.csv | 45 + flex/interactive/examples/movies/FOLLOWS.csv | 4 + flex/interactive/examples/movies/Movie.csv | 39 + flex/interactive/examples/movies/PRODUCED.csv | 16 + flex/interactive/examples/movies/Person.csv | 134 ++ flex/interactive/examples/movies/REVIEWED.csv | 10 + flex/interactive/examples/movies/WROTE.csv | 11 + flex/interactive/examples/movies/graph.yaml | 67 + flex/interactive/examples/movies/import.yaml | 115 + .../rt_mutable_graph/loading_config.cc | 26 +- flex/storages/rt_mutable_graph/schema.cc | 52 +- flex/storages/rt_mutable_graph/schema.h | 5 + flex/storages/rt_mutable_graph/types.h | 2 +- flex/tests/hqps/hqps_cypher_test.sh | 43 +- flex/utils/property/types.h | 22 + flex/utils/yaml_utils.cc | 32 + flex/utils/yaml_utils.h | 5 + .../groot/common/util/IrSchemaParser.java | 4 +- .../meta/procedure/GraphStoredProcedures.java | 4 + .../ir/meta/reader/LocalMetaDataReader.java | 11 +- .../common/ir/meta/schema/IrGraphSchema.java | 7 +- .../common/ir/meta/schema/Utils.java | 26 +- 57 files changed, 3274 insertions(+), 1103 deletions(-) delete mode 100755 flex/interactive/bin/db_admin.sh create mode 100755 flex/interactive/bin/gs_interactive create mode 100644 flex/interactive/conf/engine_config.yaml delete mode 100755 flex/interactive/conf/interactive.properties delete mode 100755 flex/interactive/data/ldbc/graph.json delete mode 100755 flex/interactive/data/ldbc/graph.yaml delete mode 120000 flex/interactive/examples/modern_graph rename flex/{storages/rt_mutable_graph => interactive/examples}/modern_graph/bulk_load.yaml (96%) create mode 100644 flex/interactive/examples/modern_graph/count_vertex_num.cypher create mode 100644 flex/interactive/examples/modern_graph/get_person_name.cypher rename flex/{storages/rt_mutable_graph => interactive/examples}/modern_graph/modern_graph.yaml (92%) rename flex/{storages/rt_mutable_graph => interactive/examples}/modern_graph/person.csv (100%) rename flex/{storages/rt_mutable_graph => interactive/examples}/modern_graph/person_created_software.csv (100%) rename flex/{storages/rt_mutable_graph => interactive/examples}/modern_graph/person_knows_person.csv (100%) rename flex/{storages/rt_mutable_graph => interactive/examples}/modern_graph/software.csv (100%) create mode 100644 flex/interactive/examples/movies/ACTED_IN.csv create mode 100644 flex/interactive/examples/movies/DIRECTED.csv create mode 100644 flex/interactive/examples/movies/FOLLOWS.csv create mode 100644 flex/interactive/examples/movies/Movie.csv create mode 100644 flex/interactive/examples/movies/PRODUCED.csv create mode 100644 flex/interactive/examples/movies/Person.csv create mode 100644 flex/interactive/examples/movies/REVIEWED.csv create mode 100644 flex/interactive/examples/movies/WROTE.csv create mode 100644 flex/interactive/examples/movies/graph.yaml create mode 100644 flex/interactive/examples/movies/import.yaml create mode 100644 flex/utils/yaml_utils.cc diff --git a/.github/workflows/flex.yml b/.github/workflows/flex.yml index 77267b8e6463..d454d2713416 100644 --- a/.github/workflows/flex.yml +++ b/.github/workflows/flex.yml @@ -54,8 +54,8 @@ jobs: cd flex/engines/graph_db/grin mkdir build && cd build cmake .. && sudo make -j$(nproc) - export FLEX_DATA_DIR=../../../../storages/rt_mutable_graph/modern_graph/ - ./run_grin_test + export FLEX_DATA_DIR=../../../../interactive/examples/modern_graph/ + ./run_grin_test 'flex://schema_file=../../../../interactive/examples/modern_graph/modern_graph.yaml&bulk_load_file=../../../../interactive/examples/modern_graph/bulk_load.yaml' - name: Test Graph Loading on modern graph env: diff --git a/.github/workflows/hqps-db-ci.yml b/.github/workflows/hqps-db-ci.yml index 56f2a0a5fb92..87578a0d5200 100644 --- a/.github/workflows/hqps-db-ci.yml +++ b/.github/workflows/hqps-db-ci.yml @@ -81,12 +81,16 @@ jobs: cd ${GIE_HOME}/compiler make build - - name: Prepare dataset + - name: Prepare dataset and workspace env: GS_TEST_DIR: ${{ github.workspace }}/gstest + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | # download dataset - git clone -b master --single-branch --depth=1 https://github.com/GraphScope/gstest.git ${GS_TEST_DIR} + git clone -b master --single-branch --depth=1 https://github.com/zhanglei1949/gstest.git ${GS_TEST_DIR} + mkdir -p ${INTERACTIVE_WORKSPACE}/data/ldbc + GRAPH_SCHEMA_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml + cp ${GRAPH_SCHEMA_YAML} ${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml - name: Sample Query test env: @@ -102,25 +106,16 @@ jobs: env: GS_TEST_DIR: ${{ github.workspace }}/gstest HOME : /home/graphscope/ + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | GIE_HOME=${GITHUB_WORKSPACE}/interactive_engine - - # create tmp ir.compiler.properties - touch /tmp/ir.compiler.properties - echo "engine.type: hiactor" >> /tmp/ir.compiler.properties - echo "graph.schema: ${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" >> /tmp/ir.compiler.properties - echo "graph.store: exp" >> /tmp/ir.compiler.properties - echo "graph.planner.is.on: true" >> /tmp/ir.compiler.properties - echo "graph.planner.opt: RBO" >> /tmp/ir.compiler.properties - echo "graph.planner.rules: FilterMatchRule,NotMatchToAntiJoinRule" >> /tmp/ir.compiler.properties - cd ${GITHUB_WORKSPACE}/flex/bin for i in 1 2 3 4 5 6 7 8 9 10 11 12; do cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/ic${i}_adhoc.cypher -w=/tmp/codgen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=/tmp/ir.compiler.properties " - cmd=${cmd}" --graph_schema_path=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" cmd=${cmd}" --gie_home=${GIE_HOME}" echo $cmd eval ${cmd} @@ -129,8 +124,8 @@ jobs: for i in 1 2 3 4 5 6 7 8 9; do cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/simple_match_${i}.cypher -w=/tmp/codgen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=/tmp/ir.compiler.properties " - cmd=${cmd}" --graph_schema_path=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" cmd=${cmd}" --gie_home=${GIE_HOME}" echo $cmd eval ${cmd} @@ -140,9 +135,10 @@ jobs: env: GS_TEST_DIR: ${{ github.workspace }}/gstest HOME : /home/graphscope/ + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | cd ${GITHUB_WORKSPACE}/flex/tests/hqps/ export FLEX_DATA_DIR=${GS_TEST_DIR}/flex/ldbc-sf01-long-date export ENGINE_TYPE=hiactor - bash hqps_cypher_test.sh ${GS_TEST_DIR} + bash hqps_cypher_test.sh ${GS_TEST_DIR} ${INTERACTIVE_WORKSPACE} diff --git a/.gitignore b/.gitignore index 50f7e42c339a..477bf177b5ef 100644 --- a/.gitignore +++ b/.gitignore @@ -90,8 +90,8 @@ core.* # Flex related flex/docs/ -flex/interactive/data/*/indices/ -flex/interactive/data/*/plugins/ +flex/interactive/data/* flex/interactive/logs/* flex/interactive/examples/sf0.1-raw/ -flex/interactive/.running \ No newline at end of file +flex/interactive/.running +flex/interactive/.env \ No newline at end of file diff --git a/flex/bin/load_plan_and_gen.sh b/flex/bin/load_plan_and_gen.sh index 354112fe9499..21626f302538 100755 --- a/flex/bin/load_plan_and_gen.sh +++ b/flex/bin/load_plan_and_gen.sh @@ -68,9 +68,11 @@ fi #fi cypher_to_plan() { - if [ $# -ne 7 ]; then - echo "Usage: $0 " - echo " , but receive: "$# + if [ $# -ne 9 ]; then + echo "Usage: cypher_to_plan " + echo " " + echo " " + echo " but receive: "$# exit 1 fi query_name=$1 @@ -81,6 +83,10 @@ cypher_to_plan() { graph_schema_path=$6 GIE_HOME=$7 + # get procedure_name and procedure_description + procedure_name=$8 + procedure_description=$9 + # find java executable echo "IR compiler properties = ${ir_compiler_properties}" #check file exists @@ -122,8 +128,8 @@ cypher_to_plan() { exit 1 fi # add extrac_key_value_config - extra_config="name:${query_name}" - extra_config="${extra_config},description:Autogenerated stored procedure configuration yaml for ${query_name}" + extra_config="name:${procedure_name}" + extra_config="${extra_config},description:${procedure_description}" cmd="java -cp ${GIE_HOME}/compiler/target/libs/*:${compiler_jar}" cmd="${cmd} -Dgraph.schema=${graph_schema_path}" @@ -150,8 +156,10 @@ cypher_to_plan() { compile_hqps_so() { #check input params size eq 2 or 3 - if [ $# -ne 5 ] && [ $# -ne 6 ]; then - echo "Usage: $0 [output_dir]" + if [ $# -gt 8 ] || [ $# -lt 5 ]; then + echo "Usage: $0 " + echo " " + echo " [output_dir] [stored_procedure_name] [stored_procedure_description]" exit 1 fi input_path=$1 @@ -159,17 +167,32 @@ compile_hqps_so() { ir_compiler_properties=$3 graph_schema_path=$4 gie_home=$5 - if [ $# -eq 6 ]; then + if [ $# -ge 6 ]; then output_dir=$6 else output_dir=${work_dir} fi + + if [ $# -ge 7 ]; then + procedure_name=$7 + else + procedure_name="" + fi + + if [ $# -ge 8 ]; then + procedure_description=$8 + else + procedure_description="" + fi + echo "Input path = ${input_path}" echo "Work dir = ${work_dir}" echo "ir compiler properties = ${ir_compiler_properties}" echo "graph schema path = ${graph_schema_path}" echo "GIE_HOME = ${gie_home}" echo "Output dir = ${output_dir}" + echo "Procedure name = ${procedure_name}" + echo "Procedure description = ${procedure_description}" last_file_name=$(basename ${input_path}) @@ -188,15 +211,24 @@ compile_hqps_so() { echo "Expect a .pb or .cc file" exit 1 fi + # if procedure_name is not set, use query_name + if [ -z "${procedure_name}" ]; then + procedure_name=${query_name} + fi + # if procedure_description is not set, use query_name + if [ -z "${procedure_description}" ]; then + procedure_description="Stored procedure for ${procedure_name}" + fi cur_dir=${work_dir} mkdir -p ${cur_dir} - output_cc_path="${cur_dir}/${query_name}.cc" + output_cc_path="${cur_dir}/${procedure_name}.cc" + dst_yaml_path="${output_dir}/${procedure_name}.yaml" if [[ $(uname) == "Linux" ]]; then - output_so_path="${cur_dir}/lib${query_name}.so" - dst_so_path="${output_dir}/lib${query_name}.so" + output_so_path="${cur_dir}/lib${procedure_name}.so" + dst_so_path="${output_dir}/lib${procedure_name}.so" elif [[ $(uname) == "Darwin" ]]; then - output_so_path="${cur_dir}/lib${query_name}.dylib" - dst_so_path="${output_dir}/lib${query_name}.dylib" + output_so_path="${cur_dir}/lib${procedure_name}.dylib" + dst_so_path="${output_dir}/lib${procedure_name}.dylib" else echo "Not support OS." exit 1 @@ -209,11 +241,14 @@ compile_hqps_so() { eval ${cmd} echo "----------------------------" elif [[ $last_file_name == *.cypher ]]; then - echo "Generating code from cypher query" + echo "Generating code from cypher query, procedure name: ${procedure_name}, description: ${procedure_description}" # first do .cypher to .pb - output_pb_path="${cur_dir}/${query_name}.pb" - output_yaml_path="${cur_dir}/${query_name}.yaml" - cypher_to_plan ${query_name} ${input_path} ${output_pb_path} ${output_yaml_path} ${ir_compiler_properties} ${graph_schema_path} ${gie_home} + output_pb_path="${cur_dir}/${procedure_name}.pb" + output_yaml_path="${cur_dir}/${procedure_name}.yaml" + cypher_to_plan ${procedure_name} ${input_path} ${output_pb_path} \ + ${output_yaml_path} ${ir_compiler_properties} ${graph_schema_path} ${gie_home} \ + ${procedure_name} "${procedure_description}" + echo "----------------------------" echo "Codegen from cypher query done." echo "----------------------------" @@ -235,7 +270,7 @@ compile_hqps_so() { cp ${CMAKE_TEMPLATE_PATH} ${cur_dir}/CMakeLists.txt # run cmake and make in output path. pushd ${cur_dir} - cmd="cmake . -DQUERY_NAME=${query_name} -DFLEX_INCLUDE_PREFIX=${FLEX_INCLUDE} -DFLEX_LIB_DIR=${FLEX_LIB_DIR}" + cmd="cmake . -DQUERY_NAME=${procedure_name} -DFLEX_INCLUDE_PREFIX=${FLEX_INCLUDE} -DFLEX_LIB_DIR=${FLEX_LIB_DIR}" # if CMAKE_CXX_COMPILER is set, use it. if [ ! -z ${CMAKE_CXX_COMPILER} ]; then cmd="${cmd} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" @@ -284,7 +319,7 @@ compile_hqps_so() { fi # check output_dir doesn't contains output_so_name if [ -f ${dst_so_path} ]; then - echo "Output dir ${output_dir} already contains ${query_name}.so" + echo "Output dir ${output_dir} already contains ${procedure_name}.so" echo "Please remove it first." exit 1 fi @@ -294,6 +329,12 @@ compile_hqps_so() { echo "Copy failed, ${dst_so_path} not exists." exit 1 fi + # copy the generated yaml + cp ${output_yaml_path} ${output_dir} + if [ ! -f ${dst_yaml_path} ]; then + echo "Copy failed, ${dst_yaml_path} not exists." + exit 1 + fi echo "Finish copying, output to ${dst_so_path}" } @@ -461,6 +502,14 @@ run() { OUTPUT_DIR="${i#*=}" shift # past argument=value ;; + --procedure_name=*) + PROCEDURE_NAME="${i#*=}" + shift # past argument=value + ;; + --procedure_desc=*) + PROCEDURE_DESCRIPTION="${i#*=}" + shift # past argument=value + ;; -* | --*) echo "Unknown option $i" exit 1 @@ -477,6 +526,8 @@ run() { echo "graph_schema_path ="${GRAPH_SCHEMA_PATH} echo "GIE_HOME ="${GIE_HOME} echo "Output path ="${OUTPUT_DIR} + echo "Procedure name ="${PROCEDURE_NAME} + echo "Procedure description ="${PROCEDURE_DESCRIPTION} # check input exist if [ ! -f ${INPUT} ]; then @@ -484,10 +535,25 @@ run() { exit 1 fi + if [ -z "${OUTPUT_DIR}" ]; then + OUTPUT_DIR=${WORK_DIR} + fi + # if engine_type equals hqps if [ ${ENGINE_TYPE} == "hqps" ]; then echo "Engine type is hqps, generating dynamic library for hqps engine." - compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${GIE_HOME} ${OUTPUT_DIR} + # if PROCEDURE_DESCRIPTION is not set, use empty string + if [ -z ${PROCEDURE_DESCRIPTION} ]; then + PROCEDURE_DESCRIPTION="Automatic generated description for stored procedure ${PROCEDURE_NAME}." + fi + # if PROCEDURE_NAME is not set, use input file name + if [ -z "${PROCEDURE_NAME}" ]; then + #remove the suffix of input file, the suffix is .cc or .cypher + PROCEDURE_NAME=$(basename ${INPUT}) + PROCEDURE_NAME="${PROCEDURE_NAME%.cc}" + PROCEDURE_NAME="${PROCEDURE_NAME%.pb}" + fi + compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${GIE_HOME} ${OUTPUT_DIR} ${PROCEDURE_NAME} "${PROCEDURE_DESCRIPTION}" # else if engine_type equals pegasus elif [ ${ENGINE_TYPE} == "pegasus" ]; then diff --git a/flex/bin/sync_server.cc b/flex/bin/sync_server.cc index 67b3d5c59f3d..616a6ca5e0e5 100644 --- a/flex/bin/sync_server.cc +++ b/flex/bin/sync_server.cc @@ -120,11 +120,11 @@ std::tuple parse_from_server_config( << engine_type_str; } } - auto shard_num_node = engine_node["shared_num"]; + auto shard_num_node = engine_node["shard_num"]; if (shard_num_node) { shard_num = shard_num_node.as(); } else { - LOG(INFO) << "shared_num not found, use default value " + LOG(INFO) << "shard_num not found, use default value " << DEFAULT_SHARD_NUM; } auto host_node = engine_node["hosts"]; @@ -151,23 +151,9 @@ std::tuple parse_from_server_config( } } -void load_plugins(const bpo::variables_map& vm) { - if (vm.count("plugin-dir") == 0) { - LOG(INFO) << "plugin-dir is not specified"; - return; - } - std::string plugin_dir = vm["plugin-dir"].as(); - if (!std::filesystem::exists(plugin_dir)) { - LOG(FATAL) << "plugin dir not exists: " << plugin_dir; - } - LOG(INFO) << "plugin dir: " << plugin_dir; - if (!plugin_dir.empty()) { - LOG(INFO) << "Load plugins from dir: " << plugin_dir; - server::StoredProcedureManager::get().LoadFromPluginDir(plugin_dir); - } -} - -void init_codegen_proxy(const bpo::variables_map& vm) { +void init_codegen_proxy(const bpo::variables_map& vm, + const std::string& graph_schema_file, + const std::string& engine_config_file) { std::string codegen_dir = parse_codegen_dir(vm); std::string codegen_bin; std::string gie_home; @@ -181,25 +167,6 @@ void init_codegen_proxy(const bpo::variables_map& vm) { LOG(FATAL) << "codegen bin not exists: " << codegen_bin; } } - std::string ir_compiler_properties; - std::string compiler_graph_schema; - if (vm.count("ir-compiler-prop") == 0) { - LOG(FATAL) << "ir-compiler-prop is not specified"; - } else { - ir_compiler_properties = vm["ir-compiler-prop"].as(); - if (!std::filesystem::exists(ir_compiler_properties)) { - LOG(FATAL) << "ir-compiler-prop not exists: " << ir_compiler_properties; - } - } - if (vm.count("compiler-graph-schema") == 0) { - LOG(FATAL) << "compiler-graph-schema is not specified"; - } else { - compiler_graph_schema = vm["compiler-graph-schema"].as(); - if (!std::filesystem::exists(compiler_graph_schema)) { - LOG(FATAL) << "compiler-graph-schema not exists: " - << compiler_graph_schema; - } - } if (vm.count("gie-home") == 0) { LOG(FATAL) << "gie-home is not specified"; } else { @@ -208,9 +175,8 @@ void init_codegen_proxy(const bpo::variables_map& vm) { LOG(FATAL) << "gie-home not exists: " << gie_home; } } - server::CodegenProxy::get().Init(codegen_dir, codegen_bin, - ir_compiler_properties, - compiler_graph_schema, gie_home); + server::CodegenProxy::get().Init(codegen_dir, codegen_bin, engine_config_file, + graph_schema_file, gie_home); } } // namespace gs @@ -226,12 +192,7 @@ int main(int argc, char** argv) { "graph-config,g", bpo::value(), "graph schema config file")( "data-path,a", bpo::value(), "data directory path")( "bulk-load,l", bpo::value(), "bulk-load config file")( - "plugin-dir,p", bpo::value(), "plugin directory path")( - "gie-home,h", bpo::value(), "path to gie home")( - "ir-compiler-prop,i", bpo::value(), - "ir compiler property file")("compiler-graph-schema,z", - bpo::value(), - "compiler graph schema file"); + "gie-home,h", bpo::value(), "path to gie home"); setenv("TZ", "Asia/Shanghai", 1); tzset(); @@ -251,9 +212,10 @@ int main(int argc, char** argv) { std::string data_path; std::string bulk_load_config_path; std::string plugin_dir; + std::string server_config_path; if (vm.count("server-config") != 0) { - std::string server_config_path = vm["server-config"].as(); + server_config_path = vm["server-config"].as(); // check file exists if (!std::filesystem::exists(server_config_path)) { LOG(ERROR) << "server-config not exists: " << server_config_path; @@ -294,8 +256,12 @@ int main(int argc, char** argv) { LOG(INFO) << "Finished loading graph, elapsed " << t0 << " s"; // loading plugin - gs::load_plugins(vm); - gs::init_codegen_proxy(vm); + if (!schema.GetPluginDir().empty() && !schema.GetPluginsList().empty()) { + server::StoredProcedureManager::get().LoadFromPluginDir( + schema.GetPluginDir(), schema.GetPluginsList()); + } + + gs::init_codegen_proxy(vm, graph_schema_path, server_config_path); server::HQPSService::get().init(shard_num, http_port, false); server::HQPSService::get().run_and_wait_for_exit(); diff --git a/flex/codegen/src/hqps/hqps_scan_builder.h b/flex/codegen/src/hqps/hqps_scan_builder.h index 22264acfc0f4..c8144e4554ee 100644 --- a/flex/codegen/src/hqps/hqps_scan_builder.h +++ b/flex/codegen/src/hqps/hqps_scan_builder.h @@ -89,6 +89,53 @@ class ScanOpBuilder { return *this; } + ScanOpBuilder& idx_predicate(const algebra::IndexPredicate& predicate) { + // check query_params not has predicate. + if (query_params_.has_predicate()) { + VLOG(10) << "query params already has predicate"; + return *this; + } + // Currently we only support one predicate. + if (predicate.or_predicates_size() < 1) { + VLOG(10) << "No predicate in index predicate"; + return *this; + } + if (predicate.or_predicates_size() != 1) { + throw std::runtime_error( + std::string("Currently only support one predicate")); + } + auto or_predicate = predicate.or_predicates(0); + if (or_predicate.predicates_size() != 1) { + throw std::runtime_error( + std::string("Currently only support one and predicate")); + } + auto triplet = or_predicate.predicates(0); + // add index predicate to query params + auto* new_predicate = query_params_.mutable_predicate(); + { + auto first_op = new_predicate->add_operators(); + common::Variable variable; + auto& property = triplet.key(); + *(variable.mutable_property()) = property; + variable.mutable_node_type()->set_data_type(common::DataType::INT64); + *(first_op->mutable_var()) = variable; + } + { + auto second = new_predicate->add_operators(); + second->set_logical(common::Logical::EQ); + second->mutable_node_type()->set_data_type(common::DataType::BOOLEAN); + } + { + auto third = new_predicate->add_operators(); + auto& value = triplet.value(); + third->mutable_node_type()->set_data_type(common::DataType::INT64); + *(third->mutable_const_()) = value; + } + VLOG(10) << "Add index predicate to query params: " + << query_params_.DebugString(); + return *this; + } + std::string Build() const { std::string label_name; std::vector labels_ids; @@ -275,7 +322,9 @@ static std::string BuildScanOp( } else { builder.resAlias(-1); } - return builder.queryParams(scan_pb.params()).Build(); + return builder.queryParams(scan_pb.params()) + .idx_predicate(scan_pb.idx_predicate()) + .Build(); } } // namespace gs diff --git a/flex/engines/graph_db/grin/src/topology/structure.cc b/flex/engines/graph_db/grin/src/topology/structure.cc index 5698fdc5291a..809b9f932d8a 100644 --- a/flex/engines/graph_db/grin/src/topology/structure.cc +++ b/flex/engines/graph_db/grin/src/topology/structure.cc @@ -35,9 +35,33 @@ GRIN_GRAPH grin_get_graph_from_storage(const char* uri) { return GRIN_NULL_GRAPH; } _uri = _uri.substr(pos + 3); - std::string graph_schema_path = _uri + "/modern_graph.yaml"; - std::string data_path = uri; - std::string bulk_load_config_path = _uri + "/bulk_load.yaml"; + LOG(INFO) << "Params: " << _uri; + std::string graph_schema_path, bulk_load_config_path; + if (pos != std::string::npos) { + auto params = _uri; + std::vector param_list; + boost::split(param_list, params, boost::is_any_of("&")); + for (auto& param : param_list) { + std::vector kv; + boost::split(kv, param, boost::is_any_of("=")); + if (kv.size() != 2) { + return GRIN_NULL_GRAPH; + } + if (kv[0] == "schema_file") { + graph_schema_path = kv[1]; + } else if (kv[0] == "bulk_load_file") { + bulk_load_config_path = kv[1]; + } + } + } else { + return GRIN_NULL_GRAPH; + } + VLOG(10) << "Schema file: " << graph_schema_path; + VLOG(10) << "Bulk load file: " << bulk_load_config_path; + if (graph_schema_path.empty() || bulk_load_config_path.empty()) { + return GRIN_NULL_GRAPH; + } + // get schema_file from if (!std::filesystem::exists(graph_schema_path) || !(std::filesystem::exists(bulk_load_config_path))) { return GRIN_NULL_GRAPH; diff --git a/flex/engines/graph_db/grin/test/test.c b/flex/engines/graph_db/grin/test/test.c index 4612d041885b..d645945e99a1 100644 --- a/flex/engines/graph_db/grin/test/test.c +++ b/flex/engines/graph_db/grin/test/test.c @@ -1050,11 +1050,17 @@ void test_vertex_property_value(const char* uri_str) { } void test_perf(const char* uri_str) { test_vertex_property_value(uri_str); } - +// uri_str = +//"flex://" +// "../../../../storages/rt_mutable_graph/modern_graph/?schema_file={schema_file}&bulk_load_file={bulk_load_file}"; int main(int argc, char** argv) { - const char* uri_str = - "flex://" - "../../../../storages/rt_mutable_graph/modern_graph/"; + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + const char* uri_str = argv[1]; + //print uri + printf("uri: %s\n", uri_str); test_index(uri_str); test_property(uri_str); diff --git a/flex/engines/hqps_db/core/operator/edge_expand.h b/flex/engines/hqps_db/core/operator/edge_expand.h index 4d365ed38aa9..cfa6cf081b0d 100644 --- a/flex/engines/hqps_db/core/operator/edge_expand.h +++ b/flex/engines/hqps_db/core/operator/edge_expand.h @@ -731,20 +731,25 @@ class EdgeExpand { // Expand from multi label vertices and though multi edge labels. // result in general edge set. auto src_label = cur_vertex_set.GetLabel(); - LOG(INFO) << "[EdgeExpandEMultiTriplet] real labels: " - << gs::to_string(edge_labels); + LOG(INFO) << "[EdgeExpandEMultiTriplet] real labels: "; + for (auto i = 0; i < edge_labels.size(); ++i) { + LOG(INFO) << std::to_string(edge_labels[i][0]) << " " + << std::to_string(edge_labels[i][1]) << " " + << std::to_string(edge_labels[i][2]); + } // for each triplet, returns a vector of edge iters. auto& vertices = cur_vertex_set.GetVertices(); using sub_graph_t = typename GRAPH_INTERFACE::sub_graph_t; using edge_iter_t = typename sub_graph_t::iterator; std::vector sub_graphs; + auto prop_names_vec = prop_names_to_vec(prop_names); for (auto i = 0; i < edge_labels.size(); ++i) { // Check whether the edge triplet match input vertices. // return a hanlder to get edges - auto sub_graph_vec = - graph.GetSubGraph(edge_labels[i][0], edge_labels[i][1], - edge_labels[i][2], gs::to_string(direction)); + auto sub_graph_vec = graph.GetSubGraph( + edge_labels[i][0], edge_labels[i][1], edge_labels[i][2], + gs::to_string(direction), prop_names_vec[i]); for (auto sub_graph : sub_graph_vec) { sub_graphs.emplace_back(sub_graph); } @@ -811,7 +816,8 @@ class EdgeExpand { } auto set = UnTypedEdgeSet( - vertices, label_indices, label_vec, std::move(label_to_subgraphs)); + vertices, label_indices, label_vec, std::move(label_to_subgraphs), + direction); return std::make_pair(std::move(set), std::move(offsets)); } @@ -854,12 +860,13 @@ class EdgeExpand { using sub_graph_t = typename GRAPH_INTERFACE::sub_graph_t; using edge_iter_t = typename sub_graph_t::iterator; std::vector sub_graphs; + auto prop_names_vec = prop_names_to_vec(prop_names); for (auto i = 0; i < edge_labels.size(); ++i) { // Check whether the edge triplet match input vertices. // return a hanlder to get edges - auto sub_graph_vec = - graph.GetSubGraph(edge_labels[i][0], edge_labels[i][1], - edge_labels[i][2], gs::to_string(direction)); + auto sub_graph_vec = graph.GetSubGraph( + edge_labels[i][0], edge_labels[i][1], edge_labels[i][2], + gs::to_string(direction), prop_names_vec[i]); for (auto sub_graph : sub_graph_vec) { sub_graphs.emplace_back(sub_graph); } @@ -936,7 +943,8 @@ class EdgeExpand { } auto set = UnTypedEdgeSet( - vertices, label_indices, label_vec, std::move(label_to_subgraphs)); + vertices, label_indices, label_vec, std::move(label_to_subgraphs), + direction); return std::make_pair(std::move(set), std::move(offsets)); } @@ -1490,7 +1498,26 @@ class EdgeExpand { << gs::to_string(edge_label_id) << ", new vertices count: " << tmp_offset.back(); } -}; // namespace gs + + template + static void emplace_prop_names_to_vec( + std::vector>& vec_vec_prop_names, + std::tuple...>& prop_names, + std::index_sequence) { + (vec_vec_prop_names.emplace_back(array_to_vec(std::get(prop_names))), + ...); + } + template + static std::vector> prop_names_to_vec( + std::tuple...>& prop_names) { + std::vector> vec_vec_prop_names; + vec_vec_prop_names.reserve(sizeof...(PropTuple)); + emplace_prop_names_to_vec( + vec_vec_prop_names, prop_names, + std::make_index_sequence()); + return vec_vec_prop_names; + } +}; } // namespace gs diff --git a/flex/engines/hqps_db/core/operator/limit.h b/flex/engines/hqps_db/core/operator/limit.h index 1ffd3f4fac32..06dc7947d59e 100644 --- a/flex/engines/hqps_db/core/operator/limit.h +++ b/flex/engines/hqps_db/core/operator/limit.h @@ -35,6 +35,7 @@ class LimitOp { size_t cur_ind = 0; std::vector new_offsets; new_offsets.emplace_back(0); + upper_bound = std::min((size_t) upper_bound, cur_.Size()); for (auto iter : ctx) { if (cur_ind >= lower_bound && cur_ind < upper_bound) { cur_offset += 1; diff --git a/flex/engines/hqps_db/core/utils/keyed.h b/flex/engines/hqps_db/core/utils/keyed.h index 4b4602088e20..7827f45dc8d0 100644 --- a/flex/engines/hqps_db/core/utils/keyed.h +++ b/flex/engines/hqps_db/core/utils/keyed.h @@ -383,6 +383,21 @@ struct KeyedAggT, } }; +template +struct KeyedAggT, AggFunc::COUNT, + std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + using aggregate_res_builder_t = CountBuilder; + + static aggregate_res_builder_t create_agg_builder( + const FlatEdgeSet& set, const GI& graph, + std::tuple>& selectors) { + return CountBuilder(); + } +}; + template static inline auto insert_into_builder_v2_impl( diff --git a/flex/engines/hqps_db/database/adj_list.h b/flex/engines/hqps_db/database/adj_list.h index 2f28afcdde78..fb8f8e996a01 100644 --- a/flex/engines/hqps_db/database/adj_list.h +++ b/flex/engines/hqps_db/database/adj_list.h @@ -49,9 +49,14 @@ class EdgeIter { inline label_id_t GetSrcLabel() const { return label_triplet_[0]; } inline Any GetData() const { return ptr1_->get_data(); } - inline bool IsValid() const { return ptr1_->is_valid(); } + inline bool IsValid() const { return ptr1_ && ptr1_->is_valid(); } - size_t Size() const { return ptr1_->size(); } + size_t Size() const { + if (ptr1_) { + return ptr1_->size(); + } + return 0; + } private: std::shared_ptr ptr1_; @@ -66,21 +71,28 @@ class SubGraph { using iterator = EdgeIter; using label_id_t = LabelT; SubGraph(const MutableCsrBase* first, - const std::array& label_triplet) - : first_(first), label_triplet_(label_triplet) {} + const std::array& label_triplet, + const std::vector& prop_names) + : first_(first), label_triplet_(label_triplet), prop_names_(prop_names) {} inline iterator get_edges(VID_T vid) const { - return iterator(label_triplet_, first_->edge_iter(vid)); + if (first_) { + return iterator(label_triplet_, first_->edge_iter(vid)); + } + return iterator(label_triplet_, nullptr); } label_id_t GetSrcLabel() const { return label_triplet_[0]; } label_id_t GetEdgeLabel() const { return label_triplet_[2]; } label_id_t GetDstLabel() const { return label_triplet_[1]; } + const std::vector& GetPropNames() const { return prop_names_; } + private: const MutableCsrBase* first_; // We assume first is out edge, second is in edge. std::array label_triplet_; + std::vector prop_names_; }; template diff --git a/flex/engines/hqps_db/database/mutable_csr_interface.h b/flex/engines/hqps_db/database/mutable_csr_interface.h index 5531246ddbf2..0f1b9f3c89fa 100644 --- a/flex/engines/hqps_db/database/mutable_csr_interface.h +++ b/flex/engines/hqps_db/database/mutable_csr_interface.h @@ -498,21 +498,21 @@ class MutableCSRInterface { // get edges with input vids. return a edge list. std::vector> GetSubGraph(const label_id_t src_label_id, const label_id_t dst_label_id, - const label_id_t edge_label_id, - const std::string& direction_str) const { + const label_id_t edge_label_id, const std::string& direction_str, + const std::vector& prop_names) const { const MutableCsrBase *csr = nullptr, *other_csr = nullptr; if (direction_str == "out" || direction_str == "Out" || direction_str == "OUT") { csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, edge_label_id); - return std::vector{ - sub_graph_t{csr, {src_label_id, dst_label_id, edge_label_id}}}; + return std::vector{sub_graph_t{ + csr, {src_label_id, dst_label_id, edge_label_id}, prop_names}}; } else if (direction_str == "in" || direction_str == "In" || direction_str == "IN") { csr = db_session_.graph().get_ie_csr(src_label_id, dst_label_id, edge_label_id); - return std::vector{ - sub_graph_t{csr, {src_label_id, dst_label_id, edge_label_id}}}; + return std::vector{sub_graph_t{ + csr, {src_label_id, dst_label_id, edge_label_id}, prop_names}}; } else if (direction_str == "both" || direction_str == "Both" || direction_str == "BOTH") { csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, @@ -520,8 +520,11 @@ class MutableCSRInterface { other_csr = db_session_.graph().get_ie_csr(src_label_id, dst_label_id, edge_label_id); return std::vector{ - sub_graph_t{csr, {src_label_id, dst_label_id, edge_label_id}}, - sub_graph_t{other_csr, {dst_label_id, src_label_id, edge_label_id}}}; + sub_graph_t{ + csr, {src_label_id, dst_label_id, edge_label_id}, prop_names}, + sub_graph_t{other_csr, + {dst_label_id, src_label_id, edge_label_id}, + prop_names}}; } else { throw std::runtime_error("Not implemented - " + direction_str); } diff --git a/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h index 64ff2f1501c3..fcdc9d1a42a8 100644 --- a/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h +++ b/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h @@ -171,12 +171,14 @@ class UnTypedEdgeSet { const std::vector& src_v, const std::vector& label_indices, const std::vector& labels, - std::unordered_map>&& adj_lists) + std::unordered_map>&& adj_lists, + const Direction& direction) : src_vertices_(src_v), label_indices_(label_indices), src_labels_(labels), adj_lists_(std::move(adj_lists)), - size_(0) { + size_(0), + direction_(direction) { sanity_check(); } @@ -368,6 +370,67 @@ class UnTypedEdgeSet { LOG(FATAL) << "not implemented, and should not be called"; } + template ::type* = nullptr> + auto ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + using dst_ele_tuple_t = std::tuple; + CHECK(repeat_array.size() == Size()); + size_t real_size = 0; + for (auto v : repeat_array) { + real_size += v; + } + std::vector dst_eles; + dst_eles.reserve(real_size); + auto edge_label_triplets = get_edge_triplets(); + auto edge_iters = generate_iters(); + std::vector label_triplet_indices; + label_triplet_indices.reserve(real_size); + std::vector sizes; + sizes.emplace_back(0); + for (auto i = 0; i < edge_label_triplets.size(); ++i) { + sizes.emplace_back(sizes.back() + edge_label_triplets[i].size()); + } + + // 0,2,4 + size_t cur_ind = 0; + for (auto i = 0; i < src_vertices_.size(); ++i) { + auto src_vid = src_vertices_[i]; + auto& cur_edge_iters = edge_iters[i]; + auto src_label_ind = label_indices_[i]; + auto src_label = src_labels_[src_label_ind]; + auto cur_triplets_vec = edge_label_triplets[src_label_ind]; + CHECK(cur_triplets_vec.size() == cur_edge_iters.size()); + + for (auto j = 0; j < cur_edge_iters.size(); ++j) { + auto& cur_iter = cur_edge_iters[j]; + while (cur_iter.IsValid()) { + auto dst_vid = cur_iter.GetDstId(); + auto data = cur_iter.GetData(); + for (auto k = 0; k < repeat_array[cur_ind]; ++k) { + dst_eles.emplace_back(std::make_tuple(src_vid, dst_vid, data)); + label_triplet_indices.emplace_back(sizes[src_label_ind] + j); + } + cur_iter.Next(); + cur_ind += 1; + } + } + } + std::vector> res_label_triplets; + // put edge_label_triplets into res_label_triplets + for (auto i = 0; i < edge_label_triplets.size(); ++i) { + auto& cur_triplets_vec = edge_label_triplets[i]; + for (auto j = 0; j < cur_triplets_vec.size(); ++j) { + res_label_triplets.emplace_back(cur_triplets_vec[j]); + } + } + std::vector> prop_names = get_prop_namees(); + CHECK(prop_names.size() == res_label_triplets.size()); + return FlatEdgeSet( + std::move(dst_eles), std::move(res_label_triplets), prop_names, + std::move(label_triplet_indices), direction_); + } + private: std::pair, std::unordered_map> preprocess_getting_labels(const std::vector& req_labels, @@ -420,6 +483,35 @@ class UnTypedEdgeSet { << " vertices, with " << edge_iter_vecs.size() << " iters"; return edge_iter_vecs; } + + std::vector>> get_edge_triplets() const { + std::vector>> ret; + for (auto iter : adj_lists_) { + auto& sub_graphs = iter.second; + std::vector> tmp; + for (auto i = 0; i < sub_graphs.size(); ++i) { + auto& sub_graph = sub_graphs[i]; + tmp.emplace_back(std::array({sub_graph.GetSrcLabel(), + sub_graph.GetDstLabel(), + sub_graph.GetEdgeLabel()})); + } + ret.emplace_back(std::move(tmp)); + } + return ret; + } + + std::vector> get_prop_namees() const { + std::vector> ret; + for (auto iter : adj_lists_) { + auto& sub_graphs = iter.second; + for (auto i = 0; i < sub_graphs.size(); ++i) { + auto& sub_graph = sub_graphs[i]; + ret.push_back(sub_graph.GetPropNames()); + } + } + return ret; + } + void sanity_check() { CHECK(src_vertices_.size() == label_indices_.size()); for (auto v : label_indices_) { @@ -437,6 +529,7 @@ class UnTypedEdgeSet { std::unordered_map> adj_lists_; // match src_label to all triplet. mutable size_t size_; // computed lazily + Direction direction_; }; } // namespace gs diff --git a/flex/engines/http_server/stored_procedure.cc b/flex/engines/http_server/stored_procedure.cc index 8f8d37747376..a4601d5d55f0 100644 --- a/flex/engines/http_server/stored_procedure.cc +++ b/flex/engines/http_server/stored_procedure.cc @@ -83,22 +83,10 @@ void close_lib(void* handle, const char* lib_path) { } } -std::vector get_yaml_files(const std::string& plugin_dir) { - std::filesystem::path dir_path = plugin_dir; - std::string suffix = ".yaml"; - std::vector res_yaml_files; - - for (auto& entry : std::filesystem::directory_iterator(dir_path)) { - if (entry.is_regular_file() && entry.path().extension() == suffix) { - res_yaml_files.emplace_back(entry.path()); - } - } - return res_yaml_files; -} - std::vector parse_from_multiple_yamls( const std::string& plugin_dir, - const std::vector& stored_procedure_yamls) { + const std::vector& stored_procedure_yamls, + const std::vector& valid_procedure_names) { std::vector stored_procedures; for (auto cur_yaml : stored_procedure_yamls) { LOG(INFO) << "Loading for: " << cur_yaml; @@ -109,17 +97,21 @@ std::vector parse_from_multiple_yamls( LOG(ERROR) << "Expect path in pre_installed procedure"; } else { std::string name = root["name"].as(); - std::string path = root["library"].as(); - if (!std::filesystem::exists(path)) { - // in case the path is relative to plugin_dir, prepend plugin_dir - path = plugin_dir + "/" + path; + if (find(valid_procedure_names.begin(), valid_procedure_names.end(), + name) != valid_procedure_names.end()) { + VLOG(10) << "Find valid procedure: " << name; + std::string path = root["library"].as(); if (!std::filesystem::exists(path)) { - LOG(ERROR) << "plugin - " << path << " file not found..."; + // in case the path is relative to plugin_dir, prepend plugin_dir + path = plugin_dir + "/" + path; + if (!std::filesystem::exists(path)) { + LOG(ERROR) << "plugin - " << path << " file not found..."; + } else { + stored_procedures.push_back({name, path}); + } } else { stored_procedures.push_back({name, path}); } - } else { - stored_procedures.push_back({name, path}); } } } @@ -151,7 +143,7 @@ std::vector parse_stored_procedures( } } } else { - LOG(WARNING) << "Expect ntry : " << stored_procedure_yaml; + LOG(WARNING) << "Expect entry : " << stored_procedure_yaml; } return stored_procedures; } diff --git a/flex/engines/http_server/stored_procedure.h b/flex/engines/http_server/stored_procedure.h index 3f6fcf85fc0d..7aad2cce6eed 100644 --- a/flex/engines/http_server/stored_procedure.h +++ b/flex/engines/http_server/stored_procedure.h @@ -34,6 +34,7 @@ #include "flex/engines/hqps_db/app/hqps_app_base.h" #include "flex/engines/hqps_db/database/mutable_csr_interface.h" #include "flex/utils/app_utils.h" +#include "flex/utils/yaml_utils.h" #include @@ -83,7 +84,9 @@ struct StoredProcedureMeta { std::vector parse_stored_procedures( const std::string& stored_procedure_yaml); std::vector parse_from_multiple_yamls( - const std::string& plugin_dir, const std::vector& stored_procedure_yamls); + const std::string& plugin_dir, + const std::vector& stored_procedure_yamls, + const std::vector& valid_procedure_names); enum class StoredProcedureType { kCypher = 0, @@ -142,17 +145,18 @@ class CypherStoredProcedure; std::shared_ptr create_stored_procedure_impl( int32_t procedure_id, const std::string& procedure_path); -std::vector get_yaml_files(const std::string& plugin_dir); - class StoredProcedureManager { public: static StoredProcedureManager& get(); StoredProcedureManager() {} // expect multiple query.yaml under this directory. - void LoadFromPluginDir(const std::string& plugin_dir) { - auto yaml_files = get_yaml_files(plugin_dir); - auto stored_procedures = parse_from_multiple_yamls(plugin_dir, yaml_files); + void LoadFromPluginDir( + const std::string& plugin_dir, + const std::vector& valid_procedure_names) { + auto yaml_files = gs::get_yaml_files(plugin_dir); + auto stored_procedures = parse_from_multiple_yamls(plugin_dir, yaml_files, + valid_procedure_names); CreateStoredProcedures(stored_procedures); } diff --git a/flex/interactive/README.md b/flex/interactive/README.md index fd6ca0e3e239..7713349e6142 100755 --- a/flex/interactive/README.md +++ b/flex/interactive/README.md @@ -3,3 +3,61 @@ GraphScope Interactive is a specialized construction of [GraphScope Flex](https://github.com/alibaba/GraphScope/tree/main/flex), designed to handle concurrent graph queries at an impressive speed. Its primary goal is to process as many queries as possible within a given timeframe, emphasizing a high query throughput rate. For the full documentation of GraphScope Interactive, please refer to [GraphScope-Interactive](https://graphscope.io/docs/interactive_engine/graphscope_interactive). +## Minimal tutorial + +In this minimal tutorial, we will show you how to run graph service on builtin modern graph. + +### Preparation + +Set `location` to `/home/graphscope/default_graph`. + +### init database + +```bash +./bin/gs_interactive init -c ./conf/interactive.yaml +``` + +### Start service + +```bash +./bin/gs_interactive service start +``` + +### Stop service +```bash +./bin/gs_interactive service stop +``` + +### Restart service +```bash +./bin/gs_interactive service restart +``` + +### Get service status +```bash +./bin/gs_interactive service status +``` + +### Compile stored procedure +```bash +./bin/gs_interactive procedure compile -g modern -i ./examples/modern_graph/count_vertex_num.cypher +``` + +### Disable stored procedure +```bash +./bin/gs_interactive procedure disable -g modern -n count_vertex_num +``` + +### Enable stored procedure +```bash +./bin/gs_interactive procedure enable -g modern -n count_vertex_num +``` + +### Use user defined graph +```bash +./bin/gs_interactive service stop +./bin/gs_interactive database remove -g modern +./bin/gs_interactive database create -g test -c ./examples/modern_graph/modern_graph.yaml +./bin/gs_interactive database import -g test -c ./examples/modern_graph/bulk_load.yaml +./bin/gs_interactive service start -g test +``` \ No newline at end of file diff --git a/flex/interactive/bin/db_admin.sh b/flex/interactive/bin/db_admin.sh deleted file mode 100755 index 8f98befcc53d..000000000000 --- a/flex/interactive/bin/db_admin.sh +++ /dev/null @@ -1,654 +0,0 @@ -#!/bin/bash -# Copyright 2020 Alibaba Group Holding Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# The product name -DB_PROD_NAME="interactive" - -# colored error and info functions to wrap messages. -RED='\033[0;31m' -GREEN='\033[0;32m' -NC='\033[0m' # No Color -err() { - echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] -ERROR- $* ${NC}" >&2 -} - -info() { - echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" -} - -################## Some Util Functions ################## -function parse_yaml { - local prefix=$2 - local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034') - sed -ne "s|^\($s\):|\1|" \ - -e "s|^\($s\)\($w\)$s:$s[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p" \ - -e "s|^\($s\)\($w\)$s:$s\(.*\)$s\$|\1$fs\2$fs\3|p" $1 | - awk -F$fs '{ - indent = length($1)/2; - vname[indent] = $2; - for (i in vname) {if (i > indent) {delete vname[i]}} - if (length($3) > 0) { - vn=""; for (i=0; i/dev/null 2>&1 - pwd -P -)" -info "HOST_DB_HOME = ${HOST_DB_HOME}" - -#################### DEFINE CONSTANTS #################### -GRAPHSCOPE_GROUP_ID=1001 - -# the configuration directory -HOST_DB_CONF_DIR="${HOST_DB_HOME}/conf" -# the data directory -HOST_DB_DATA_DIR="${HOST_DB_HOME}/data" -# the log directory -HOST_DB_LOG_DIR="${HOST_DB_HOME}/logs" -HOST_DB_SERVER_OUTPUT_LOG="${HOST_DB_LOG_DIR}/server.log" -HOST_DB_COMPILER_OUTPUT_LOG="${HOST_DB_LOG_DIR}/compiler.log" -HOST_DB_INTERACTIVE_YAML="${HOST_DB_CONF_DIR}/interactive.yaml" -HOST_DB_EXAMPLE_DATASET_DIR=${HOST_DB_HOME}/"examples/sf0.1-raw/" -HOST_DB_RUNNING_FILE="${HOST_DB_HOME}/.running" -# will export DOCKER_DB_HOME, if not set, exist -get_docker_workspace_from_yaml "${HOST_DB_INTERACTIVE_YAML}" - -DOCKER_DB_GRAPHSCOPE_HOME="/home/graphscope/GraphScope" -DOCKER_DB_DATA_DIR="${DOCKER_DB_HOME}/data" -DOCKER_DB_LOG_DIR="${DOCKER_DB_HOME}/logs" -DOCKER_DB_CONF_DIR="${DOCKER_DB_HOME}/conf" -DOCKER_DB_IR_CONF_FILE="${DOCKER_DB_HOME}/conf/interactive.properties" -DOCKER_DB_GIE_HOME="${DOCKER_DB_GRAPHSCOPE_HOME}/interactive_engine/" -DOCKER_DB_INTERACTIVE_YAML="${DOCKER_DB_HOME}/conf/interactive.yaml" -DOCKER_DB_SERVER_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/build/bin/sync_server" -DOCKER_DB_COMPILER_BIN="com.alibaba.graphscope.GraphServer" -DOCKER_DB_GEN_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/bin/load_plan_and_gen.sh" -DOCKER_DB_SERVER_OUTPUT_LOG=${DOCKER_DB_LOG_DIR}/server.log -DOCKER_DB_COMPILER_OUTPUT_LOG=${DOCKER_DB_LOG_DIR}/compiler.log -export DOCKER_DB_CONNECTOR_PORT=7687 -DB_CONNECT_DEFAULT_PORT=7687 -# update the port by parsing the yaml file -DOCKER_DB_CONNECTOR_PORT=$(parse_yaml "${HOST_DB_INTERACTIVE_YAML}" | grep "compiler_endpoint_boltConnector_port" | awk -F "=" '{print $2}') -#remove "" and space -DOCKER_DB_CONNECTOR_PORT=$(echo "${DOCKER_DB_CONNECTOR_PORT}" | sed 's/^"//' | sed 's/"$//') - -EXAMPLE_DATA_SET_URL="https://github.com/GraphScope/gstest.git" - -################### IMAGE VERSION ################### -GIE_DB_IMAGE_VERSION="v0.0.1" -GIE_DB_IMAGE_NAME="registry.cn-hongkong.aliyuncs.com/graphscope/${DB_PROD_NAME}" -GIE_DB_CONTAINER_NAME="${DB_PROD_NAME}-server" - - -#################### DEFINE FUNCTIONS #################### -function check_running_containers_and_exit(){ - # check if there is any running containers - info "Check running containers and exit" - running_containers=$(docker ps -a --format "{{.Names}}" | grep "${GIE_DB_CONTAINER_NAME}") - if [ -n "${running_containers}" ]; then - err "There are running containers: ${running_containers}, please stop them first." - exit 1 - fi - info "finish check" -} - -function check_container_running(){ - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is running" - else - info "container ${GIE_DB_CONTAINER_NAME} is not running" - # start the container - docker start "${GIE_DB_CONTAINER_NAME}" - fi -} - -function ensure_container_running(){ - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is running" - else - info "container ${GIE_DB_CONTAINER_NAME} is not running" - # start the container - docker start "${GIE_DB_CONTAINER_NAME}" - fi -} - -function check_process_running_in_container(){ - local container_name=$1 - local process_name=$2 - local error_msg=$3 - local process_id=$(docker top "${container_name}" | grep "${process_name}" | awk '{print $2}\') - if [ -z "${process_id}" ]; then - err "process ${process_name} is not running in container ${container_name}" - err "${error_msg}" - exit 1 - fi - info "process ${process_name} is running in container ${container_name}, process id is ${process_id}" -} - - -#################### DEFINE USAGE #################### -# parse the args and set the variables. -function usage() { - init_usage - start_usage - stop_usage - restart_usage - compile_usage - show_stored_procedure_usage - download_dataset_usage - destroy_usage -} - -function init_usage() { - cat << EOF - db_admin.sh init -p[---publish] - -v[--volume] - --version - Init the database, create the containers. --publish and --volume can be used multiple times. -EOF -} - -function start_usage() { - cat << EOF - db_admin.sh start -n [--name] -b [--bulk-load] -r[--root-data-dir] - Start the database with the given graph. graph schema file should be placed at ./data/{graph_name}/graph.yaml. - If mode is override, we need to clear the data directory first. -EOF -} - -function stop_usage() { - cat << EOF - db_admin.sh stop - Stop the database with the given graph. -EOF -} - -function restart_usage() { - cat << EOF - db_admin.sh restart - Restart the database with current running graph. -EOF -} - -function compile_usage(){ - cat << EOF - db_admin.sh compile -g[--graph] -i ${DOCKER_DB_COMPILER_OUTPUT_LOG} 2>&1 &" - cmd=${cmd}"\"" - info "Running cmd: ${cmd}" - eval ${cmd} - sleep 6 - check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} "check ${HOST_DB_COMPILER_OUTPUT_LOG} to see more details" - info "Successfuly start compiler" - info "DataBase service is running..., port is open on :${DOCKER_DB_CONNECTOR_PORT}" - - # if do_start success, we should write current args to ${HOST_DB_RUNNING_FILE} - echo "GRAPH_NAME=${GRAPH_NAME}" > ${HOST_DB_RUNNING_FILE} - echo "BULK_LOAD_FILE=${BULK_LOAD_FILE}" >> ${HOST_DB_RUNNING_FILE} - echo "ROOT_DATA_DIR=${root_data_dir}" >> ${HOST_DB_RUNNING_FILE} -# info "Successfuly write running args to ${HOST_DB_RUNNING_FILE}" -} - - -#################### Stop database #################### -function do_stop(){ - # stop the container - docker stop ${GIE_DB_CONTAINER_NAME} - info "Successfuly stop database" -} - - -#################### Get database status #################### -function do_status() { - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is running" - else - info "container ${GIE_DB_CONTAINER_NAME} is not running" - info "Please start database first" - fi - # the container is running but the process is not running - check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_SERVER_BIN} "check ${HOST_DB_SERVER_OUTPUT_LOG} to see more details" - check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} "check ${HOST_DB_COMPILER_OUTPUT_LOG} to see more details" - info "Database service is running..., port is open on :${DOCKER_DB_CONNECTOR_PORT}" -} - - -#################### Download dataset #################### -function do_download_dataset(){ - git clone ${EXAMPLE_DATA_SET_URL} ${HOST_DB_EXAMPLE_DATASET_DIR} - info "Successfuly download dataset to: ${HOST_DB_EXAMPLE_DATASET_DIR}" -} - - -#################### Restart #################### -function do_restart() { - # if the container is not running, exit - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "false" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is not running" - info "Please start database first" - exit 1 - fi - info "Stopping database first..." - do_stop - info "Successfuly stop database" - # read args from cached file. - # get num lines in file ${HOST_DB_RUNNING_FILE} - num_lines=$(wc -l < ${HOST_DB_RUNNING_FILE}) - if [ ${num_lines} -ne 3 ]; then - err "Error: ${HOST_DB_RUNNING_FILE} should have 3 lines, but got ${num_lines}, something wrong with the file ${HOST_DB_RUNNING_FILE}" - exit 1 - fi - # read args from file - GRAPH_NAME=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) - BULK_LOAD_FILE=$(sed -n '2p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) - ROOT_DATA_DIR=$(sed -n '3p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) - do_start -n ${GRAPH_NAME} -b ${BULK_LOAD_FILE} -r ${ROOT_DATA_DIR} - info "Finish restart database" -} - -# the compiled dynamic libs will be placed at data/${graph_name}/plugins/ -# after compilation, the user need to write the cooresponding yaml, telling the compiler about -# the input and output of the stored procedure -function do_compile() { - # check args num == 4 - # start container - ensure_container_running - if [ $# -ne 4 ]; then - err "stored_procedure command need 2 args, but got $#" - compile_usage - exit 1 - fi - graph_name="" - file_path="" # file path - output_dir="" - - while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -g | --graph) - graph_name="$2" - info "graph_name = ${graph_name}" - shift # past argument - shift - ;; - -i | --input) - file_path="$2" - shift # past argument - shift - ;; - *) - err "unknown option $1" - compile_usage - exit 1 - ;; - esac - done - - # check graph_name - if [ -z "${graph_name}" ]; then - err "graph_name is empty" - compile_usage - exit 1 - fi - - # check file_path - if [ -z "${file_path}" ]; then - err "file_path is empty" - compile_usage - exit 1 - fi - - # get real file_path - file_name=$(basename "${file_path}") - real_file_path=$(realpath "${file_path}") - # check exists - if [ ! -f "${real_file_path}" ]; then - err "file ${real_file_path} not exist" - exit 1 - fi - # check graph dir exists - graph_dir="${HOST_DB_HOME}/data/${graph_name}" - if [ ! -d "${graph_dir}" ]; then - err "graph ${graph_name} not exist" - exit 1 - fi - mkdir -p "${graph_dir}/plugins" - - DOCKER_OUTPUT_DIR="${DOCKER_DB_HOME}/data/${graph_name}/plugins" - HOST_OUTPUT_DIR="${HOST_DB_HOME}/data/${graph_name}/plugins" - DOCKER_DB_GRAPH_SCHEMA="${DOCKER_DB_HOME}/data/${graph_name}/graph.json" - DOCKER_REAL_FILE_PATH="/tmp/${file_name}" - # docker cp file to container - cmd="docker cp ${real_file_path} ${GIE_DB_CONTAINER_NAME}:${DOCKER_REAL_FILE_PATH}" - eval ${cmd} || exit 1 - - cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c \"" - cmd=${cmd}" ${DOCKER_DB_GEN_BIN}" - cmd=${cmd}" --engine_type=hqps" - cmd=${cmd}" --input=${DOCKER_REAL_FILE_PATH}" - cmd=${cmd}" --work_dir=/tmp/codegen/" - cmd=${cmd}" --ir_conf=${DOCKER_DB_IR_CONF_FILE}" - cmd=${cmd}" --graph_schema_path=${DOCKER_DB_GRAPH_SCHEMA}" - cmd=${cmd}" --gie_home=${DOCKER_DB_GIE_HOME}" - cmd=${cmd}" --output_dir=${DOCKER_OUTPUT_DIR}" - cmd=${cmd}" \"" - - echo "Running cmd: ${cmd}" - eval ${cmd} || exit 1 - # check output exists - # get the file_name of file_path - file_name="${file_name%.*}" - output_file="${HOST_OUTPUT_DIR}/lib${file_name}.so" - - if [ ! -f "${output_file}" ]; then - err "output file ${output_file} not exist, compilation failed" - exit 1 - fi - info "success generate dynamic lib ${output_file}, please create the cooresponding yaml file ${HOST_OUTPUT_DIR}/${file_name}.yaml." -} - -#################### Entry #################### -if [ $# -eq 0 ]; then - usage - exit 1 -fi - -while [[ $# -gt 0 ]]; do - key="$1" - - case $key in - -h | --help) - usage - exit - ;; - init) - shift - info "Start initiating database..." - do_init "$@" - exit 0 - ;; - start) - shift - info "Start database service..." - do_start "$@" - exit 0 - ;; - status) - shift - do_status "$@" - exit 0 - ;; - stop) - shift - do_stop "$@" - exit 0 - ;; - restart) - shift - do_restart # restart current graph - exit 0 - ;; - compile) - shift - do_compile "$@" - exit 0 - ;; - show_stored_procedure) - shift - do_show_stored_procedure "$@" - exit 0 - ;; - destroy) - shift - do_destroy "$@" - exit 0 - ;; - download_dataset) - shift - do_download_dataset - exit 0 - ;; - *) # unknown option - err "unknown option $1" - usage - exit 1 - ;; - esac -done - - - - diff --git a/flex/interactive/bin/gs_interactive b/flex/interactive/bin/gs_interactive new file mode 100755 index 000000000000..d80c06b5d42c --- /dev/null +++ b/flex/interactive/bin/gs_interactive @@ -0,0 +1,1993 @@ +#!/bin/bash +# Copyright 2020 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# The product name +DB_PROD_NAME="interactive" + +# colored error and info functions to wrap messages. +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +NC='\033[0m' # No Color +err() { + echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] -ERROR- $* ${NC}" >&2 +} + +info() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" +} + +emph(){ + echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" +} + +################## Some Util Functions ################## + +# source: https://github.com/mrbaseman/parse_yaml.git +function parse_yaml { + local prefix=$2 + local separator=${3:-_} + + local indexfix + # Detect awk flavor + if awk --version 2>&1 | grep -q "GNU Awk" ; then + # GNU Awk detected + indexfix=-1 + elif awk -Wv 2>&1 | grep -q "mawk" ; then + # mawk detected + indexfix=0 + fi + + local s='[[:space:]]*' sm='[ \t]*' w='[a-zA-Z0-9_]*' fs=${fs:-$(echo @|tr @ '\034')} i=${i:- } + cat $1 | \ + awk -F$fs "{multi=0; + if(match(\$0,/$sm\|$sm$/)){multi=1; sub(/$sm\|$sm$/,\"\");} + if(match(\$0,/$sm>$sm$/)){multi=2; sub(/$sm>$sm$/,\"\");} + while(multi>0){ + str=\$0; gsub(/^$sm/,\"\", str); + indent=index(\$0,str); + indentstr=substr(\$0, 0, indent+$indexfix) \"$i\"; + obuf=\$0; + getline; + while(index(\$0,indentstr)){ + obuf=obuf substr(\$0, length(indentstr)+1); + if (multi==1){obuf=obuf \"\\\\n\";} + if (multi==2){ + if(match(\$0,/^$sm$/)) + obuf=obuf \"\\\\n\"; + else obuf=obuf \" \"; + } + getline; + } + sub(/$sm$/,\"\",obuf); + print obuf; + multi=0; + if(match(\$0,/$sm\|$sm$/)){multi=1; sub(/$sm\|$sm$/,\"\");} + if(match(\$0,/$sm>$sm$/)){multi=2; sub(/$sm>$sm$/,\"\");} + } + print}" | \ + sed -e "s|^\($s\)?|\1-|" \ + -ne "s|^$s#.*||;s|$s#[^\"']*$||;s|^\([^\"'#]*\)#.*|\1|;t1;t;:1;s|^$s\$||;t2;p;:2;d" | \ + sed -ne "s|,$s\]$s\$|]|" \ + -e ":1;s|^\($s\)\($w\)$s:$s\(&$w\)\?$s\[$s\(.*\)$s,$s\(.*\)$s\]|\1\2: \3[\4]\n\1$i- \5|;t1" \ + -e "s|^\($s\)\($w\)$s:$s\(&$w\)\?$s\[$s\(.*\)$s\]|\1\2: \3\n\1$i- \4|;" \ + -e ":2;s|^\($s\)-$s\[$s\(.*\)$s,$s\(.*\)$s\]|\1- [\2]\n\1$i- \3|;t2" \ + -e "s|^\($s\)-$s\[$s\(.*\)$s\]|\1-\n\1$i- \2|;p" | \ + sed -ne "s|,$s}$s\$|}|" \ + -e ":1;s|^\($s\)-$s{$s\(.*\)$s,$s\($w\)$s:$s\(.*\)$s}|\1- {\2}\n\1$i\3: \4|;t1" \ + -e "s|^\($s\)-$s{$s\(.*\)$s}|\1-\n\1$i\2|;" \ + -e ":2;s|^\($s\)\($w\)$s:$s\(&$w\)\?$s{$s\(.*\)$s,$s\($w\)$s:$s\(.*\)$s}|\1\2: \3 {\4}\n\1$i\5: \6|;t2" \ + -e "s|^\($s\)\($w\)$s:$s\(&$w\)\?$s{$s\(.*\)$s}|\1\2: \3\n\1$i\4|;p" | \ + sed -e "s|^\($s\)\($w\)$s:$s\(&$w\)\(.*\)|\1\2:\4\n\3|" \ + -e "s|^\($s\)-$s\(&$w\)\(.*\)|\1- \3\n\2|" | \ + sed -ne "s|^\($s\):|\1|" \ + -e "s|^\($s\)\(---\)\($s\)||" \ + -e "s|^\($s\)\(\.\.\.\)\($s\)||" \ + -e "s|^\($s\)-${s}[\"']\(.*\)[\"']$s\$|\1$fs$fs\2|p;t" \ + -e "s|^\($s\)\($w\)$s:${s}[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p;t" \ + -e "s|^\($s\)-$s\(.*\)$s\$|\1$fs$fs\2|" \ + -e "s|^\($s\)\($w\)$s:${s}[\"']\?\(.*\)$s\$|\1$fs\2$fs\3|" \ + -e "s|^\($s\)[\"']\?\([^&][^$fs]\+\)[\"']$s\$|\1$fs$fs$fs\2|" \ + -e "s|^\($s\)[\"']\?\([^&][^$fs]\+\)$s\$|\1$fs$fs$fs\2|" \ + -e "s|$s\$||p" | \ + awk -F$fs "{ + gsub(/\t/,\" \",\$1); + if(NF>3){if(value!=\"\"){value = value \" \";}value = value \$4;} + else { + if(match(\$1,/^&/)){anchor[substr(\$1,2)]=full_vn;getline}; + indent = length(\$1)/length(\"$i\"); + vname[indent] = \$2; + value= \$3; + for (i in vname) {if (i > indent) {delete vname[i]; idx[i]=0}} + if(length(\$2)== 0){ vname[indent]= ++idx[indent] }; + vn=\"\"; for (i=0; i0)&&index(val, ref)==1){ + tmpval=assignment[val]; + sub(ref,full_vn,val); + if(match(val,\"$separator\$\")){ + gsub(ref,full_vn,tmpval); + } else if (length(tmpval) > 0) { + printf(\"%s=\\\"%s\\\"\n\", val, tmpval); + } + assignment[val]=tmpval; + } + } + } + } else if (length(value) > 0) { + printf(\"%s=\\\"%s\\\"\n\", full_vn, value); + } + }END{ + for(val in assignment){ + if(match(val,\"$separator\$\")) + printf(\"%s=\\\"%s\\\"\n\", val, assignment[val]); + } + }" +} + +# check if the file exists, if not, exit. +function check_file_exists(){ + if [ ! -f "$1" ]; then + err "file $1 not exists" + exit 1 + fi +} +function check_directory_exists(){ + if [ ! -d "$1" ]; then + err "directory $1 not exists" + exit 1 + fi +} + +HOST_DB_HOME="$( + cd "$(dirname "$0")/../" >/dev/null 2>&1 + pwd -P +)" +info "HOST_DB_HOME = ${HOST_DB_HOME}" +mkdir -p "${HOST_DB_HOME}/data" + +################### GET USER INFO ################### +# get uid +uid=$(id -u) +# get group id +gid=$(id -g) + + +#################### DEFINE CONSTANTS #################### + +# the log directory +# HOST_DB_INTERACTIVE_YAML="${HOST_DB_CONF_DIR}/interactive.yaml" +HOST_DB_RUNNING_FILE="${HOST_DB_HOME}/.running" +HOST_DB_ENV_FILE="${HOST_DB_HOME}/.env" + +DOCKER_DB_GRAPHSCOPE_HOME="/home/graphscope/GraphScope" +DOCKER_DB_GIE_HOME="${DOCKER_DB_GRAPHSCOPE_HOME}/interactive_engine/" +DOCKER_DB_SERVER_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/build/bin/sync_server" +DOCKER_DB_GRAPH_IMPORT_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/build/tests/rt_mutable_graph/test_graph_loading" +DOCKER_DB_COMPILER_BIN="com.alibaba.graphscope.GraphServer" +DOCKER_DB_GEN_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/bin/load_plan_and_gen.sh" +HOST_DB_TMP_DIR="/tmp" + +#################### DEFINE DEFAULT CONSTATNS #################### +DATABASE_VERSION="v0.0.2" +DATABASE_DEFAULT_GRAPH_NAME="gs_interactive_default_graph" +DATABASE_CURRENT_GRAPH_NAME=${DATABASE_DEFAULT_GRAPH_NAME} +DATABASE_DEFAULT_GRAPH_DOCKER_PATH="/home/graphscope/${DATABASE_DEFAULT_GRAPH_NAME}" +DATABASE_DEFAULT_GRAPH_MOUNT_CMD="${HOST_DB_HOME}/examples/modern_graph/:${DATABASE_DEFAULT_GRAPH_DOCKER_PATH}" +DATABASE_VOLUMES="${DATABASE_DEFAULT_GRAPH_MOUNT_CMD}" +DATABASE_LOG_LEVEL="INFO" +DATABASE_PORTS="" + +## compiler related default configuration +DATABASE_COMPILER_PLANNER_IS_ON="true" +DATABASE_COMPILER_PLANNER_OPT="RBO" +DATABASE_COMPILER_PLANNER_RULES="FilterMatchRule,FilterIntoJoinRule,NotExistToAntiJoinRule" +DATABASE_COMPILER_ENDPOINT_ADDRESS="localhost" +DATABASE_COMPILER_BOLT_PORT="7687" +DATABASE_COMPILER_QUERY_TIMEOUT="20000" + +## hiactor related default configuration +DATABASE_COMPUTE_ENGINE_PORT="10000" +DATABASE_COMPUTE_ENGINE_SHARD_NUM=1 + +## directories +DATABASE_WORKSPACE="/home/graphscope/workspace/" +DATABASE_DATA_DIR_NAME="data" +DATABASE_LOG_DIR_NAME="logs" +DATABASE_CONF_DIR_NAME="conf" + + +################### IMAGE VERSION ################### +GIE_DB_IMAGE_VERSION="v0.0.2" +GIE_DB_IMAGE_NAME="registry.cn-hongkong.aliyuncs.com/graphscope/${DB_PROD_NAME}" +GIE_DB_CONTAINER_NAME="${DB_PROD_NAME}-server" + + +#################### Prepare uncreated directories #################### + +info "Finish create log dir" + +#################### DEFINE FUNCTIONS #################### + +function check_graph_name_valid() { + if [ $# -ne 1 ]; then + err "Expect graph name given." + exit 1 + fi + local graph_name=$1 + # check graph_name can be a valid directory name + if [[ ! "${graph_name}" =~ ^[a-zA-Z0-9_]+$ ]]; then + err "graph name [${graph_name}] is not valid, only [a-zA-Z0-9_] are allowed." + exit 1 + fi +} +function check_running_containers_and_exit(){ + # check if there is any running containers + info "Check running containers and exit" + running_containers=$(docker ps -a --format "{{.Names}}" | grep "${GIE_DB_CONTAINER_NAME}") + if [[ -n "${running_containers}" ]]; then + err "There are running containers: ${running_containers}, please stop them first." + exit 1 + fi + info "finish check" +} + +function check_container_running(){ + if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then + info "container ${GIE_DB_CONTAINER_NAME} is running" + else + info "container ${GIE_DB_CONTAINER_NAME} is not running" + # start the container + docker start "${GIE_DB_CONTAINER_NAME}" + fi +} + +function ensure_container_running(){ + if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then + info "container ${GIE_DB_CONTAINER_NAME} is running" + else + info "container ${GIE_DB_CONTAINER_NAME} is not running" + # start the container + docker start "${GIE_DB_CONTAINER_NAME}" + fi +} + +function check_process_running_in_container(){ + local container_name=$1 + local process_name=$2 + local error_msg=$3 + local process_id=$(docker top "${container_name}" | grep "${process_name}" | awk '{print $2}\') + if [ -z "${process_id}" ]; then + err "process ${process_name} is not running in container ${container_name}" + err "${error_msg}" + exit 1 + fi + info "process ${process_name} is running in container ${container_name}, process id is ${process_id}" +} + +function check_process_not_running_in_container(){ + local container_name=$1 + local process_name=$2 + local error_msg=$3 + local process_id=$(docker top "${container_name}" | grep "${process_name}" | awk '{print $2}\') + if [ -z "${process_id}" ]; then + info "process ${process_name} is not running in container ${container_name}" + else + err "process ${process_name} is running in container ${container_name}, process id is ${process_id}" + err "${error_msg}" + exit 1 + fi +} + +# check the given graph is locked or not. +function check_graph_not_running(){ + info "Check graph whether is not running" + if [ $# -ne 1 ]; then + err "Expect graph name given." + exit 1 + fi + local graph_name=$1 + # check whether .lock is presented in container's data/${graph_name}/ directory + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + local lock_file="${DATABASE_WORKSPACE}/data/${graph_name}/.lock" + info "Check lock file ${lock_file}" + # check lock_file whether exists in container, if not exists, exit 0, else exit 1 + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ ! -f ${lock_file} ]" +} + +function update_init_config_from_yaml(){ + if [ $# -ne 1 ]; then + err "Expect configuration file given" + exit 1 + fi + config_file=$1 + eval $(parse_yaml "${config_file}") + # update workspace if exists + if [[ -n "${workspace}" ]]; then + DATABASE_WORKSPACE="${workspace}" + fi + # update database version if exists + if [[ -n "${version}" ]]; then + DATABASE_VERSION="${version}" + fi + # append the found volumes to DATABASE_VOLUMES + # map the HOST_DB_HOME/data/ to ${DATABASE_WORKSPACE}/data + DATABASE_VOLUMES="${DATABASE_VOLUMES},${HOST_DB_HOME}/data:${DATABASE_WORKSPACE}/data" + + x=1 + while true; do + volume_x_key="volume_${x}" + volume_x=$(eval echo "\$${volume_x_key}") + if [ -z "${volume_x}" ]; then + break + fi + DATABASE_VOLUMES="${DATABASE_VOLUMES},${volume_x}" + x=$((x + 1)) + done + # append compiler port and engine port to DATABASE_PORTS + DATABASE_PORTS="${DATABASE_COMPILER_BOLT_PORT}:${DATABASE_COMPILER_BOLT_PORT}" + DATABASE_PORTS="${DATABASE_PORTS},${DATABASE_COMPUTE_ENGINE_PORT}:${DATABASE_COMPUTE_ENGINE_PORT}" +} + +function update_engine_config_from_yaml(){ + if [ $# -ne 1 ]; then + err "Expect configuration file given" + exit 1 + fi + config_file=$1 + eval $(parse_yaml "${config_file}") + if [[ -n "${log_level}" ]]; then + DATABASE_LOG_LEVEL="${log_level}" + fi + # default_graph + if [[ -n "${default_graph}" ]]; then + DATABASE_CURRENT_GRAPH_NAME="${default_graph}" + fi + # compiler + if [[ -n ${compiler_planner_is_on} ]]; then + DATABASE_COMPILER_PLANNER_IS_ON="${compiler_planner_is_on}" + fi + info "Found compiler planner opt: ${compiler_planner_is_on}, ${DATABASE_COMPILER_PLANNER_IS_ON}" + if [[ -n ${compiler_planner_opt} ]]; then + DATABASE_COMPILER_PLANNER_OPT="${compiler_planner_opt}" + fi + # append the founded compiler planner rules to DATABASE_COMPILER_PLANNER_RULES + x=1 + while true; do + compiler_planner_rules_x_key="compiler_planner_rules_${x}" + compiler_planner_rules_x=$(eval echo "\$${compiler_planner_rules_x_key}") + if [ -z "${compiler_planner_rules_x}" ]; then + break + fi + # check compiler_planner_rules_x present in DATABASE_COMPILER_PLANNER_RULES, if not, append + if [[ ! "${DATABASE_COMPILER_PLANNER_RULES}" =~ "${compiler_planner_rules_x}" ]]; then + DATABASE_COMPILER_PLANNER_RULES="${DATABASE_COMPILER_PLANNER_RULES},${compiler_planner_rules_x}" + fi + x=$((x + 1)) + done + if [[ -n "${compiler_endpoint_address}" ]]; then + DATABASE_COMPILER_ENDPOINT_ADDRESS="${compiler_endpoint_address}" + fi + if [[ -n "${compiler_endpoint_bolt_connector_port}" ]]; then + DATABASE_COMPILER_BOLT_PORT="${compiler_endpoint_bolt_connector_port}" + fi + if [[ -n "${compiler_query_timeout}" ]]; then + DATABASE_COMPILER_QUERY_TIMEOUT="${compiler_query_timeout}" + fi +} + +function amplify_graph_schema(){ + # two args, input schema_file and output schema_file + if [ $# -ne 2 ]; then + err "Expect two args, but got $#" + exit 1 + fi + input_schema_file=$1 + output_schema_file=$2 + # check input_schema_file exists + check_file_exists "${input_schema_file}" + # rm output_schema_file if exists + if [ -f "${output_schema_file}" ]; then + rm "${output_schema_file}" + fi + # read input_schema_file line by line, and append to output_schema_file + local cur_vertex_label_ind=0 + local cur_edge_label_ind=0 + local cur_prop_ind=0 + # the type id may not be specified in input_schema_file + local type_id_specified=false + local prop_id_specified=false + # if stored_type is not specified in input_schema_file's content, append it to output_schema_file + stored_type_specified=false + label_type="vertex" + IFS='' + while read -r line; do + if [[ "${line}" =~ ^[[:space:]]*# ]]; then + echo "found comment line ${line}" + else + # if line contains 'type_name', then prepend - type_id: 0 before it + if [[ "${line}" =~ vertex_types ]]; then + label_type="vertex" + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ edge_types ]]; then + label_type="edge" + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ type_id ]]; then + type_id_specified=true + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ property_id ]]; then + prop_id_specified=true + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ type_name ]] && [ "${type_id_specified}" == "false" ]; then + if [ "${label_type}" == "edge" ]; then + echo " - type_id: ${cur_edge_label_ind}" >> "${output_schema_file}" + cur_edge_label_ind=$((cur_edge_label_ind + 1)) + else + echo " - type_id: ${cur_vertex_label_ind}" >> "${output_schema_file}" + cur_vertex_label_ind=$((cur_vertex_label_ind + 1)) + fi + cur_prop_ind=0 + type_id_specified=false + echo "${line}" | sed 's/- / /g' >> "${output_schema_file}" + elif [[ "${line}" =~ property_name ]] && [ "${prop_id_specified}" == "false" ]; then + echo " - property_id: ${cur_prop_ind}" >> "${output_schema_file}" + cur_prop_ind=$((cur_prop_ind + 1)) + prop_id_specified=false + # replace the '- ' in ${line} and append to output_schema_file + echo "${line}" | sed 's/- / /g' >> "${output_schema_file}" + elif [[ "${line}" =~ store_type ]]; then + stored_type_specified=true + emph "stored type : ${stored_type_specified}" + echo "${line}" >> "${output_schema_file}" + else + echo "${line}" >> "${output_schema_file}" + fi + fi + done <<< $(cat "${input_schema_file}") + if [ "${stored_type_specified}" == "false" ]; then + emph "stored type : ${stored_type_specified}" + echo "store_type: mutable_csr" >> "${output_schema_file}" + fi + info "Finish amplify schema file ${input_schema_file} to ${output_schema_file}" +} + + +#################### DEFINE USAGE #################### + +function init_usage() { + cat << EOF + gs_interactive init -c [--config] + Init the database, create the containers. Specify the database version and volume mounting in the config yaml. +EOF +} + +function destroy_usage() { + cat << EOF + gs_interactive destroy + Destroy the current database, remove the container. +EOF +} + +function create_usage() { + cat << EOF + gs_interactive database create -g [--graph] -c [--config] + Create a graph in database, with the provided schema file. + User should import data to the created graph. +EOF +} + +function remove_usage() { + cat << EOF + gs_interactive database remove -g [--graph] + Remove the database with the given graph. +EOF +} + +function import_usage() { + cat << EOF + gs_interactive database import -g [--graph] -c [--config] + Load the raw data specified in bulk load file to the specified graph. +EOF +} + +function list_usage() { + cat << EOF + gs_interactive database list + List all graphs in the database. +EOF +} + +function database_usage(){ + create_usage + remove_usage + import_usage + list_usage +} + + +function start_usage() { + cat << EOF + gs_interactive service start -g [--graph] -c [--config] + Start the graph service on the specified graph, with the provided engine config file. +EOF +} + +function stop_usage() { + cat << EOF + gs_interactive service stop + Stop the database with the given graph. +EOF +} + +function restart_usage() { + cat << EOF + gs_interactive service restart -c [--config] [engine config file] + Restart the database with current running graph. Can update with new engine config file. +EOF +} + +function get_log_usage() { + cat << EOF + gs_interactive service get_log -o [--output] output directory + Get the log of the specified service/compiler, and write to the output file. +EOF +} + +function services_usage(){ + start_usage + stop_usage + restart_usage + get_log_usage +} + +function compile_usage(){ + cat << EOF + gs_interactive procedure compile -g[--graph] -i -d [--description] + --compile_only + Compile cypher/.cc to dynamic library, according to the schema of graph. The output library will be placed at ./data/{graph_name}/lib. + If --compile_only is specified, the library will not be loaded to the graph. +EOF +} + +## .enable and .disable file contols the stored procedure enable/disable + +function enable_proc_usage(){ + cat << EOF + gs_interactive procedure enable -g[--graph] -n[--name] + -c[--config] + Enable the stored procedure in the given graph, with the provided library. + stored_procedures.yaml contains the the stored procedure names at each line. +EOF +} + +function disable_proc_usage(){ + cat << EOF + gs_interactive procedure disable -g[--graph] -n[--name] + -c[--config] + Disable the stored procedure in the given graph, with the provided library. + stored_procedures.yaml contains the the stored procedure names at each line. +EOF +} + +function show_stored_procedure_usage(){ + cat << EOF + gs_interactive procedure show -g[--graph] graph_name + Show all stored procedure for the given graph. +EOF +} + + +function procedure_usage(){ + compile_usage + enable_proc_usage + disable_proc_usage + show_stored_procedure_usage +} + +# parse the args and set the variables. +function usage() { + init_usage + destroy_usage + database_usage + services_usage + procedure_usage +} + +################### Generate config file ################### +function do_gen_conf(){ + # receive only one args, the config file + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -o | --output) + output_config_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + exit 1 + ;; + esac + done + + #if output_config_file exists, remove + if [ -f "${output_config_file}" ]; then + rm "${output_config_file}" + fi + + # echo directories + echo "directories:" >> ${output_config_file} + echo " workspace: ${DATABASE_WORKSPACE}" >> ${output_config_file} + echo " subdirs:" >> ${output_config_file} + echo " data: ${DATABASE_DATA_DIR_NAME}" >> ${output_config_file} + echo " logs: ${DATABASE_LOG_DIR_NAME}" >> ${output_config_file} + echo " conf: ${DATABASE_CONF_DIR_NAME}" >> ${output_config_file} + + # log level + echo "log_level: ${DATABASE_LOG_LEVEL}" >> ${output_config_file} + + # current graph + echo "default_graph: ${DATABASE_CURRENT_GRAPH_NAME}" >> ${output_config_file} + + + #compute_engine + echo "compute_engine:" >> ${output_config_file} + echo " type: hiactor" >> ${output_config_file} + echo " hosts:" >> ${output_config_file} + echo " - localhost:${DATABASE_COMPUTE_ENGINE_PORT}" >> ${output_config_file} + echo " shard_num: ${DATABASE_COMPUTE_ENGINE_SHARD_NUM}" >> ${output_config_file} + + + #compiler + echo "compiler:" >> ${output_config_file} + echo " planner:" >> ${output_config_file} + echo " is_on: ${DATABASE_COMPILER_PLANNER_IS_ON}" >> ${output_config_file} + echo " opt: ${DATABASE_COMPILER_PLANNER_OPT}" >> ${output_config_file} + # split compiler planner rules and put as sequences in yaml + echo " rules:" >> ${output_config_file} + IFS=',' read -ra RULES_ARRAY <<<"${DATABASE_COMPILER_PLANNER_RULES}" + for rule in "${RULES_ARRAY[@]}"; do + echo " - ${rule}" >> ${output_config_file} + done + echo " endpoint:" >> ${output_config_file} + echo " default_listen_address: ${DATABASE_COMPILER_ENDPOINT_ADDRESS}" >> ${output_config_file} + echo " bolt_connector:" >> ${output_config_file} + echo " port: ${DATABASE_COMPILER_BOLT_PORT}" >> ${output_config_file} + echo " gremlin_connector:" >> ${output_config_file} + echo " disabled: true" >> ${output_config_file} + echo " port: 8182" >> ${output_config_file} + echo " query_timeout: ${DATABASE_COMPILER_QUERY_TIMEOUT}" >> ${output_config_file} + info "Finish generate config file ${output_config_file}" +} + +function generate_real_engine_conf(){ + # expect less than three args + if [ $# -gt 2 ] || [ $# -eq 0 ]; then + err "Expect one or two args, but got $#" + exit 1 + fi + if [ $# -eq 1 ]; then + real_engine_config_file=$1 + info "engine config file is not specified, using default engine config" + do_gen_conf -o ${real_engine_config_file} + else + engine_config_file=$1 + real_engine_config_file=$2 + check_file_exists "${engine_config_file}" + update_engine_config_from_yaml "${engine_config_file}" + do_gen_conf -o ${real_engine_config_file} + fi +} + +function update_graph_yaml_with_procedure_enabling(){ + # expect one args, graph name + if [ $# -ne 1 ]; then + err "Expect one args, but got $#" + exit 1 + fi + graph_name=$1 + + # gather .enable and append to graph yaml + + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # copy graph_name's graph0.yaml to ${HOST_DB_TMP_DIR} + docker cp "${GIE_DB_CONTAINER_NAME}:${DATABASE_WORKSPACE}/data/${graph_name}/graph0.yaml" "${HOST_DB_TMP_DIR}/graph0.yaml" + echo "" >> ${HOST_DB_TMP_DIR}/graph0.yaml + echo "stored_procedures:" >> ${HOST_DB_TMP_DIR}/graph0.yaml + echo " enable_lists:" >> ${HOST_DB_TMP_DIR}/graph0.yaml + # copy graph_name's .enable file to ${HOST_DB_TMP_DIR} + docker cp "${GIE_DB_CONTAINER_NAME}:${DATABASE_WORKSPACE}/data/${graph_name}/plugins/.enable" "${HOST_DB_TMP_DIR}/.enable" || true + # get .enable and .disable file + local enable_file="${HOST_DB_TMP_DIR}/.enable" + any_stored_procedures=false + # check enable_file and disable_file exists + if [ ! -f "${enable_file}" ]; then + info "enable file ${enable_file} not exists, all stored procedure are enabled" + else + # cat all lines in enable_file and split by '\n' + while IFS= read -r line; do + echo "Found enable procedure: $line" + echo " - ${line}" >> ${HOST_DB_TMP_DIR}/graph0.yaml + any_stored_procedures=true + done < ${enable_file} + fi + if [ "${any_stored_procedures}" = false ]; then + echo " directory: not-a-directory" >> ${HOST_DB_TMP_DIR}/graph0.yaml + else + echo " directory: plugins" >> ${HOST_DB_TMP_DIR}/graph0.yaml + fi + + # copy graph0.yaml to container + docker cp "${HOST_DB_TMP_DIR}/graph0.yaml" "${GIE_DB_CONTAINER_NAME}:${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + info "Finish update graph yaml with procedure enabling, add stored proc.edures of size ${#ENABLED_ARRAY[@]}." +} + + +function check_database_initialized(){ + # check if the container is running + running_containers=$(docker ps -a --format "{{.Names}}" | grep "${GIE_DB_CONTAINER_NAME}") + if [[ -z "${running_containers}" ]]; then + err "Database is not initialized, please run init command first." + exit 1 + fi +} + +#################### Init database #################### +# Init the current data base. +# create a user with same user id in container +function do_init(){ + # check running containers and exit + check_running_containers_and_exit + info "Ok, no running instance found, start init database..." + # if no containers running, procede to init + +# check args num 1, and get the first args as CONFIG_FILE + if [ $# -eq 0 ]; then + err "init command need 1 args, but got $#" + init_usage + exit 1 + fi + + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -c | --config) + config_file="$2" + shift # past argument + shift + ;; + *) + err "unknown option $1" + init_usage + exit 1 + ;; + esac + done + + check_file_exists "${config_file}" + + # parse yaml config + # eval $(parse_yaml "${config_file}") + + # Parse the configuration presented in yaml, and override the default values. + update_init_config_from_yaml "${config_file}" + + #0. Found workspace + info "Found docker db home: ${DATABASE_WORKSPACE}" + # put docker_workspace into env + echo "export DATABASE_WORKSPACE=${DATABASE_WORKSPACE}" >> ${HOST_DB_ENV_FILE} + echo "export DATABASE_DATA_DIR_NAME=${DATABASE_DATA_DIR_NAME}" >> ${HOST_DB_ENV_FILE} + info "Found databse version: ${DATABASE_VERSION}" + + #2. Found mounting volumes from yaml file + mount_cmd="" + # split DATABASE_VOLUMES and append to mount_cmd + IFS=',' read -ra VOLUME_ARRAY <<<"${DATABASE_VOLUMES}" + for volume in "${VOLUME_ARRAY[@]}"; do + # split with : and check host path exists + volume_value_array=(${volume//:/ }) + # if volume_value_array length is not 2, error + if [ ${#volume_value_array[@]} -ne 2 ]; then + err "volume ${volume_value_array} is not valid, should be :" + exit 1 + fi + # get host_path + host_path=${volume_value_array[0]} + docker_path=${volume_value_array[1]} + # check host_path exists + info "Found host path: ${host_path}" + check_directory_exists "${host_path}" || (err "host path ${host_path} not exists" && exit 1) + mount_cmd="${mount_cmd} -v ${volume}" + done +# mount_cmd="${mount_cmd} -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro" + + info "Found docker volumes: ${mount_cmd}" + + #3. get mapped port + port_cmd="" + # split the DATABASE_PORTS and append to port_cmd + IFS=',' read -ra DATABASE_PORTS_ARRAY <<<"${DATABASE_PORTS}" + for ports in "${DATABASE_PORTS_ARRAY[@]}"; do + port_x_value_array=(${ports//:/ }) + # if volume_x_value_array length is not 2, error + if [ ${#port_x_value_array[@]} -ne 2 ]; then + err "port ${port_x_value_array} is not valid, should be :" + exit 1 + fi + # get host_path + host_port=${port_x_value_array[0]} + docker_port=${port_x_value_array[1]} + #check port are int + if ! [[ "${host_port}" =~ ^[0-9]+$ ]]; then + err "host port ${host_port} is not valid" + exit 1 + fi + if ! [[ "${docker_port}" =~ ^[0-9]+$ ]]; then + err "docker port ${docker_port} is not valid" + exit 1 + fi + port_cmd="${port_cmd} -p ${host_port}:${docker_port}" + done + info "Found docker port: ${port_cmd}" + + # get uid + local uid=$(id -u) + local gid=$(id -g) + # get group name + local group_name=$(id -gn) + # get username + local username=$(id -un) + + GIE_DB_IMAGE_NAME_TAG="${GIE_DB_IMAGE_NAME}:${DATABASE_VERSION}" + cmd="docker run -it -d --privileged --name ${GIE_DB_CONTAINER_NAME}" + # create user in container + cmd="${cmd} ${port_cmd} ${mount_cmd} ${GIE_DB_IMAGE_NAME_TAG} bash" + + info "Running cmd: ${cmd}" + eval ${cmd} || docker rm "${GIE_DB_CONTAINER_NAME}" + + info "Finish init database" + + # create the workspace directory in container + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${DATABASE_WORKSPACE}" || exit 1 + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "sudo chown -R graphscope:graphscope ${DATABASE_WORKSPACE}" || exit 1 + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${DATABASE_WORKSPACE}/logs" || exit 1 + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${DATABASE_WORKSPACE}/conf" || exit 1 + + # create default_graph + do_create -g ${DATABASE_DEFAULT_GRAPH_NAME} -c ${HOST_DB_HOME}/examples/modern_graph/modern_graph.yaml + # check whether do_import success + if ! do_import -g ${DATABASE_DEFAULT_GRAPH_NAME} -c ${HOST_DB_HOME}/examples/modern_graph/bulk_load.yaml; then + err "Fail to import default graph" + exit 1 + fi + info "Successfuly create and import default graph: [${DATABASE_DEFAULT_GRAPH_NAME}]" +} + + +#################### Create graph #################### +function do_create(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -c | --config) + schema_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + create_usage + exit 1 + ;; + esac + done + # check graph_name is set + if [ -z "${graph_name}" ]; then + err "graph name is not specified" + create_usage + exit 1 + fi + + if [ -z "${schema_file}" ]; then + err "graph schema is not specified, need graph_schema.yaml given by -c option" + create_usage + exit 1 + fi + + check_graph_name_valid "${graph_name}" + if [ -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] has already been created." + exit 1 + fi + check_file_exists "${schema_file}" + amplify_schema_file="${HOST_DB_TMP_DIR}/graph0.yaml" + # add some default settings and non-user-awared settings to schema file. + amplify_graph_schema ${schema_file} ${amplify_schema_file} + # check graph is running inside docker + check_graph_not_running ${graph_name} || (err "Can not create graph ${graph_name}, since a graph with same nameing running." && exit 1) + # create the graph directory in the docker's workspace + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker_graph_schema_file="${docker_graph_dir}/graph.yaml" + docker_graph_schema_file_back="${docker_graph_dir}/graph0.yaml" # used for later adding/removing stored procedure + # check docker_graph_schema_file exists in the container, if exists, tell user to remove it first + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -f ${docker_graph_schema_file} ] && echo -e \"${RED} Graph ${graph_name} already exists, please remove it first. ${NC}\" && exit 1 || exit 0" || exit 1 + # create the graph directory in the docker's workspace + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${docker_graph_dir}" || exit 1 + # create plugins dir + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${docker_graph_dir}/plugins" || exit 1 + # copy the schema file to the docker's workspace + docker cp "${amplify_schema_file}" "${GIE_DB_CONTAINER_NAME}:${docker_graph_schema_file}" || exit 1 + docker cp "${amplify_schema_file}" "${GIE_DB_CONTAINER_NAME}:${docker_graph_schema_file_back}" || exit 1 + info "Successfuly create graph [${graph_name}]" + #TODO: support creating an empty graph +} + +###################Remove graph#################### +function do_remove(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + *) + err "unknown option $1" + create_usage + exit 1 + ;; + esac + done + # check graph_name is set + if [ -z "${graph_name}" ]; then + err "graph name is not specified" + remove_usage + exit 1 + fi + check_graph_name_valid "${graph_name}" + # check if the graph is created + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be removed, since it is not created." + exit 1 + fi + # check graph is running inside docker + if ! check_graph_not_running ${graph_name}; then + err "Can not remove graph ${graph_name}, since a graph with same nameing running!" + exit 1; + fi + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + ### let user confirm to remove the graph + read -p "Are you sure to remove graph [${graph_name}?] [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort removing graph [${graph_name}]" + exit 1 + fi + + # rm -rf the graph directory in the docker's workspace + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "rm -rf ${docker_graph_dir}" || ( echo "Fail to delete graph ${graph_name}" && exit 1) + info "Successfuly remove graph [${graph_name}]" + # if graph_name is current graph, set current graph to .running + if [ "${DATABASE_CURRENT_GRAPH_NAME}" = "${graph_name}" ]; then + info "Remove current graph [${graph_name}], set current graph to default_graph: ${DATABASE_DEFAULT_GRAPH_NAME}" + echo "export DATABASE_CURRENT_GRAPH_NAME=${DATABASE_DEFAULT_GRAPH_NAME}" >> ${HOST_DB_ENV_FILE} + fi + #local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + # if the removed graph is in .running, remove it + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + if [ "${graph_name}" = "${graph_name}" ]; then + rm ${HOST_DB_RUNNING_FILE} + fi + fi +} + +#################### Import #################### +function do_import(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -c | --config) + bulk_load_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + import_usage + exit 1 + ;; + esac + done + # check graph_name is set + if [ -z "${graph_name}" ]; then + err "graph name is not specified" + import_usage + exit 1 + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + info "Import data to graph [${graph_name}] from ${bulk_load_file}" + # check if the container is running + check_container_running + # check if the bulk_load_file exists + check_file_exists "${bulk_load_file}" + info "bulk_load_file ${bulk_load_file} exists" + + check_graph_not_running ${graph_name} || info "Can not import data to graph [${graph_name}], since it is already running." + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # check graph_schema_file exists in container, if not, let user create graph first + docker_graph_schema_file="${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -f ${docker_graph_schema_file} ] || (echo -e \"${RED}graph [${graph_name}] not exists, please create it first.${NC}\" && exit 1)" + info "Graph Schema exists" + # copy the bulk_load_file to container + bulk_load_file_name=$(basename "${bulk_load_file}") + docker_bulk_load_file="/tmp/${bulk_load_file_name}" + docker cp "${bulk_load_file}" "${GIE_DB_CONTAINER_NAME}:${docker_bulk_load_file}" + + docker_graph_data_dir="${DATABASE_WORKSPACE}/data//${graph_name}/indices" + # currently we can only overwrite the indices, so if it exists, remove it first + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_data_dir} ] && rm -rf ${docker_graph_data_dir} || exit 0" || exit 1 + + cmd="${DOCKER_DB_GRAPH_IMPORT_BIN} ${docker_graph_schema_file} ${docker_bulk_load_file} ${docker_graph_data_dir}" + info "Running cmd: ${cmd}" + # docker exec the cmd, fail if cmd fail + if ! docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "${cmd} ||(echo -e \"${RED} Fail to import graph [${graph_name}]${NC}\" && exit 1 )"; then + err "Fail to import graph [${graph_name}]" + exit 1 + fi + info "Successfuly import data to graph [${graph_name}]" +} + +#################### List Graph #################### +function do_list() { + # check if the container is running + check_container_running + # get all graph names + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + host_data_dir="${HOST_DB_HOME}/${DATABASE_DATA_DIR_NAME}" + # get all graph names into a array + graph_names=($(ls ${host_data_dir})) + # if graph_names is empty, no graph exists + if [ ${#graph_names[@]} -eq 0 ]; then + info "No graph has been created." + exit 0 + fi + # print all graph names + for graph_name in "${graph_names[@]}"; do + echo "Found graph: [${graph_name}]" + done + +} + +#################### Destroy #################### +function do_destroy() { + + # if container not exists, exit + if ! docker ps -a --format '{{.Names}}' | grep -Eq "^${GIE_DB_CONTAINER_NAME}$"; then + info "Database has not been created, exit" + exit 0 + fi + + # let user confirm to destroy the database + read -p "Are you sure to destroy the database? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort destroying database" + exit 1 + fi + + info "Destroying database..." + docker stop "${GIE_DB_CONTAINER_NAME}" + docker rm "${GIE_DB_CONTAINER_NAME}" + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # rm host data/* + sudo rm -rf ${HOST_DB_HOME}/data/* + + #rm .running + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + rm ${HOST_DB_RUNNING_FILE} + fi + if [ -f "${HOST_DB_ENV_FILE}" ]; then + rm ${HOST_DB_ENV_FILE} + fi + + info "Finish destroy database" +} + +#################### Start database #################### +function do_start(){ + check_database_initialized + if [ -f "${HOST_DB_ENV_FILE}" ];then + . ${HOST_DB_ENV_FILE} + fi + info "Starting database..." + + # check whether the .running file exists, if exists, exit + local compiler_process_id=$(docker top "${GIE_DB_CONTAINER_NAME}" | grep "${DOCKER_DB_COMPILER_BIN}" | awk '{print $2}\') + local server_process_id=$(docker top "${GIE_DB_CONTAINER_NAME}" | grep "${DOCKER_DB_SERVER_BIN}" | awk '{print $2}\') + # if both process are running, exit + # if only one process is running, kill the process + if [ -n "${compiler_process_id}" ] && [ -n "${server_process_id}" ]; then + local old_graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + err "Database is already running on [${old_graph_name}], please stop it first" + exit 1 + fi + if [ -n "${compiler_process_id}" ]; then + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "kill -9 ${compiler_process_id}" + fi + if [ -n "${server_process_id}" ]; then + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "kill -9 ${server_process_id}" + fi + + + # set trap to do_stop + trap do_stop SIGINT SIGTERM + + graph_name="" + engine_config_file="" + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -c | --config) + engine_config_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + start_usage + exit 1 + ;; + esac + done + # try parse default_graph from engine_config_file + # generate real engine config file, put it at /tmp/real_engine_config.yaml + if [ -z "${graph_name}" ]; then + graph_name=${DATABASE_CURRENT_GRAPH_NAME} + else + DATABASE_CURRENT_GRAPH_NAME=${graph_name} + info "Using user specified graph [${graph_name}]" + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it and import data first." + exit 1 + fi + + real_engine_config_file="/tmp/real_engine_config.yaml" + if [ -z "${engine_config_file}" ]; then + generate_real_engine_conf "${real_engine_config_file}" + else + generate_real_engine_conf "${engine_config_file}" "${real_engine_config_file}" + fi + + # copy engine config file to container + dst_engine_config_file="${DATABASE_WORKSPACE}/conf/engine_config.yaml" + docker cp "${real_engine_config_file}" "${GIE_DB_CONTAINER_NAME}:${dst_engine_config_file}" || (echo "fail to copy $engine_config_file to container" && exit 1) + + + # check if modern_graph exists in container, get the result as bool + docker_graph_schema_file="${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + wal_file="${DATABASE_WORKSPACE}/data/${graph_name}/indices/init_snapshot.bin" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "( [ -f ${docker_graph_schema_file} ] && echo \"true\" e) || echo \"false\"" > /tmp/graph_exists + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "( [ -f ${wal_file} ] && echo \"true\" e) || echo \"false\"" > /tmp/graph_loaded + graph_exists=$(cat /tmp/graph_exists) + graph_loaded=$(cat /tmp/graph_loaded) + if [ "${graph_exists}" = "false" ]; then + # if graph_name is default_graph, we should create it first + # otherwise, we should tell user to create it first + if [ "${graph_name}" != "${DATABASE_DEFAULT_GRAPH_NAME}" ]; then + err "graph [${graph_name}] not exists, please create it first" + exit 1 + fi + fi + + if [ "${graph_loaded}" = "false" ]; then + err "graph [${graph_name}] is empty, please import data first" + exit 1 + fi + + info "graph [${graph_name}] exists, start it" + + do_stop + ensure_container_running + # regenerate graph.yaml from graph0.yaml and override graph.yaml with stored procedure enable and disable + update_graph_yaml_with_procedure_enabling ${graph_name} + + # the bulk_load_file shoud place inside ${DATABASE_WORKSPACE}. and should use relative path + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + info "In start datebase, received graph_name = ${graph_name}, engine_config_file = ${engine_config_file}" + docker_server_log_path="${DATABASE_WORKSPACE}/logs/server.log" + graph_schema_file="${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + csr_data_dir="${DATABASE_WORKSPACE}/data/${graph_name}/indices" + cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c \"" + cmd="${cmd} ${DOCKER_DB_SERVER_BIN} -c ${dst_engine_config_file}" + cmd="${cmd} -g ${graph_schema_file} --data-path ${csr_data_dir}" + cmd="${cmd} --gie-home ${DOCKER_DB_GIE_HOME}" + cmd="${cmd} > ${docker_server_log_path} 2>&1 & \"" + echo "Running cmd: ${cmd}" + # eval command, if fails exist + eval ${cmd} || (echo "Fail to launch hqps server" && exit 1) + sleep 4 + # check whether the process is running + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_SERVER_BIN} ", use gs_interactive service get_log -o [dir] to see get logs" + info "Successfuly start server" + + # start compiler + docker_compiler_log_path="${DATABASE_WORKSPACE}/logs/compiler.log" + cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c \"" + cmd=${cmd}"java -cp \"${DOCKER_DB_GIE_HOME}/compiler/target/libs/*:${DOCKER_DB_GIE_HOME}/compiler/target/compiler-0.0.1-SNAPSHOT.jar\" " + cmd=${cmd}" -Djna.library.path=${DOCKER_DB_GIE_HOME}/executor/ir/target/release" + cmd=${cmd}" -Dgraph.schema=${graph_schema_file}" + # should error be reported? + # cmd=${cmd}" -Dgraph.stored.procedures.uri=file:${docker_graph_plugin_dir}" + cmd=${cmd}" ${DOCKER_DB_COMPILER_BIN} ${dst_engine_config_file} > ${docker_compiler_log_path} 2>&1 &" + cmd=${cmd}"\"" + info "Running cmd: ${cmd}" + eval ${cmd} + sleep 6 + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} ", use gs_interactive service get_log -o [dir] to see more details" + info "Successfuly start compiler" + # get cypher port from engine config file + # bolt_connector_port=$(parse_yaml "${engine_config_file}" | grep "compiler_endpoint_bolt_connector_port" | awk -F "=" '{print $2}') + emph "DataBase service is running, port is open on :${DATABASE_COMPILER_BOLT_PORT}" + + # if do_start success, we should write current args to ${HOST_DB_RUNNING_FILE} + echo "GRAPH_NAME=${graph_name}" > ${HOST_DB_RUNNING_FILE} + echo "ENGINE_CONFIG_FILE=${engine_config_file}" >> ${HOST_DB_RUNNING_FILE} + # create .lock file + docker_graph_lock_file="${DATABASE_WORKSPACE}/data/${graph_name}/.lock" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "touch ${docker_graph_lock_file}" || exit 1 +} + + +#################### Stop database #################### +function do_stop(){ + # if container is not running, do nothing + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + else + info "No running database found, do nothing" + fi + # get graph_name from ${HOST_DB_RUNNING_FILE} + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + docker_graph_lock_file="${DATABASE_WORKSPACE}/data/${graph_name}/.lock" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "rm -f ${docker_graph_lock_file}" || exit 1 + info "Successfuly remove ${docker_graph_lock_file} file" + else + info "No graph is running" + fi + + # stop the SERVER_BIN process and graph_server process + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "pkill -f ${DOCKER_DB_SERVER_BIN}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "pkill -f ${DOCKER_DB_COMPILER_BIN}" + sleep 6 + info "Successfuly stop database" +} + + +#################### Get database status #################### +function do_status() { + if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then + info "container ${GIE_DB_CONTAINER_NAME} is running" + else + info "container ${GIE_DB_CONTAINER_NAME} is not running" + info "Please start database first" + fi + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # the container is running but the process is not running + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_SERVER_BIN} "The service is stopped or down. Use gs_interactive service get_log -o [dir] to see more details" + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} "The service is stopped or down. Use gs_interactive service get_log -o [dir] to see more details" + # get cypher port from engine config file in container + + docker_engine_config_file="${DATABASE_WORKSPACE}/conf/engine_config.yaml" + # copy the engine config file to host's tmp directory + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_engine_config_file}" "${HOST_DB_TMP_DIR}/engine_config.yaml" || exit 1 + eval $(parse_yaml "${HOST_DB_TMP_DIR}/engine_config.yaml") + emph "Database service is running, port is open on :${compiler_endpoint_bolt_connector_port}" + # print current running graph name + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + emph "Current running graph is [${graph_name}]" + else + err "No graph is running" + fi +} + + + +#################### Restart #################### +function do_restart() { + # read args from cached file. + # get num lines in file ${HOST_DB_RUNNING_FILE} + num_lines=$(wc -l < ${HOST_DB_RUNNING_FILE}) + if [ ${num_lines} -ne 2 ]; then + err "Error: ${HOST_DB_RUNNING_FILE} should have 2 lines, but got ${num_lines}, something wrong with the file ${HOST_DB_RUNNING_FILE}" + exit 1 + fi + # read args from file + GRAPH_NAME=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + ENGINE_CONFIG_FILE=$(sed -n '2p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + # parse current args, override the args from file + info "Restarting database..." + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + GRAPH_NAME="$2" + shift # past argument + shift + ;; + -c | --config) + ENGINE_CONFIG_FILE="$2" + shift + shift + ;; + *) + err "unknown option $1" + restart_usage + exit 1 + ;; + esac + done + do_stop + info "Successfuly stop database" + do_start -g ${GRAPH_NAME} -c ${ENGINE_CONFIG_FILE} + info "Finish restarting database..." +} + +#################### Get log #################### +function do_log(){ + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -o | --output) + directory="$2" + shift # past argument + shift + ;; + *) + err "unknown option $1" + get_log_usage + exit 1 + ;; + esac + done + # check directory is set + if [ -z "${directory}" ]; then + err "output directory is not specified" + get_log_usage + exit 1 + fi + # get log directory in container + docker_log_dir="${DATABASE_WORKSPACE}/logs" + # copy ${docker_log_dir}/compiler.log and ${docker_log_dir}/server.log to ${directory} + docker_compiler_log="${docker_log_dir}/compiler.log" + docker_server_log="${docker_log_dir}/server.log" + # docker cp + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_compiler_log}" "${directory}/compiler.log" || exit 1 + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_server_log}" "${directory}/server.log" || exit 1 + info "Successfuly get log to ${directory}, please check compiler.log and server.log" +} + +# the compiled dynamic libs will be placed at data/${graph_name}/plugins/ +# after compilation, the user need to write the cooresponding yaml, telling the compiler about +# the input and output of the stored procedure +function do_compile() { + ensure_container_running + if [ $# -lt 4 ]; then + err "compile stored_procedure command at least 4 args, but got $#" + compile_usage + exit 1 + fi + compile_only=false + + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + info "graph_name = ${graph_name}" + shift # past argument + shift + ;; + -i | --input) + file_path="$2" + shift # past argument + shift + ;; + -n | --name) + stored_procedure_name="$2" + shift + shift + ;; + -d | --description) + stored_procedure_desc="$2" + shift + shift + ;; + --compile_only) + compile_only=true + shift + shift + ;; + *) + err "unknown option $1" + compile_usage + exit 1 + ;; + esac + done + + # check graph_name + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + compile_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + # check graph exists + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + + # check file_path + check_file_exists "${file_path}" + # get real file_path + file_name=$(basename "${file_path}") + # get file_name and assign to stored_procedure_name if stored_procedure_name is not set + if [ -z "${stored_procedure_name}" ]; then + stored_procedure_name="${file_name%.*}" + fi + real_file_path=$(realpath "${file_path}") + # check exists + if [ ! -f "${real_file_path}" ]; then + err "file ${real_file_path} not exist" + exit 1 + fi + + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + + real_engine_config_file="/tmp/real_engine_config.yaml" + # update default graph name + DATABASE_CURRENT_GRAPH_NAME=${graph_name} + generate_real_engine_conf "${real_engine_config_file}" + # copy to container + docker_engine_config="${DATABASE_WORKSPACE}/conf/engine_config.yaml" + docker cp "${real_engine_config_file}" "${GIE_DB_CONTAINER_NAME}:${docker_engine_config}" || exit 1 + + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker_graph_schema="${docker_graph_dir}/graph.yaml" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo -e \"${RED} Graph [${graph_name}] not exists, please create it first.${NC}\" && exit 1)" + + container_output_dir="${DATABASE_WORKSPACE}/data/${graph_name}/plugins" + cotainer_input_path="/tmp/${file_name}" + # docker cp file to container + cmd="docker cp ${real_file_path} ${GIE_DB_CONTAINER_NAME}:${cotainer_input_path}" + eval ${cmd} || exit 1 + + cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c '" + cmd=${cmd}" ${DOCKER_DB_GEN_BIN}" + cmd=${cmd}" --engine_type=hqps" + cmd=${cmd}" --input=${cotainer_input_path}" + cmd=${cmd}" --work_dir=/tmp/codegen/" + cmd=${cmd}" --ir_conf=${docker_engine_config}" + cmd=${cmd}" --graph_schema_path=${docker_graph_schema}" + cmd=${cmd}" --gie_home=${DOCKER_DB_GIE_HOME}" + cmd=${cmd}" --output_dir=${container_output_dir}" + cmd=${cmd}" --procedure_name=${stored_procedure_name}" + if [ ! -z "${stored_procedure_desc}" ]; then + cmd=${cmd}" --procedure_desc=\"${stored_procedure_desc}\"" + fi + cmd=${cmd}" '" + + echo "Running cmd: ${cmd}" + eval ${cmd} || exit 1 + # check output exists + # remove the suffix of file_name + output_file="${HOST_DB_HOME}/data/${graph_name}/plugins/lib${stored_procedure_name}.so" + + if [ ! -f "${output_file}" ]; then + err "output file ${output_file} not exist, compilation failed" + exit 1 + fi + info "success generate dynamic lib ${output_file}." + + # if not compile_only, we should add the stored_procedure_name to .enable + docker_graph_enable_file="${docker_graph_dir}/plugins/.enable" + # copy container to host + rm -f /tmp/.enable + # if docker_graph_enable_file exists. copy it to host + docker exec "${GIE_DB_CONTAINER_NAME}" test -e "${docker_graph_enable_file}" && (docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable") + + if [ ! -f "/tmp/.enable" ]; then + touch "/tmp/.enable" + fi + # if compile_only equal to false + if [ "${compile_only}" = false ]; then + echo "${stored_procedure_name}" >> /tmp/.enable + fi + # copy back + docker cp "/tmp/.enable" "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" || exit 1 +} + +function do_enable(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -n | --name) + stored_procedure_names="$2" + shift + shift + ;; + -c | --config) + stored_procedure_names_yaml="$2" + shift + shift + ;; + *) + err "unknown option $1" + enable_proc_usage + exit 1 + ;; + esac + done + # check graph_name not empty + # check graph_name + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + enable_proc_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + + # --name and --config can not be set at the same time + if [ ! -z "${stored_procedure_names}" ] && [ ! -z "${stored_procedure_names_yaml}" ]; then + err "--name and --config can not be set at the same time" + enable_proc_usage + exit 1 + fi + # use stored_procedures_names_yaml if it is set + if [ ! -z "${stored_procedure_names_yaml}" ]; then + check_file_exists "${stored_procedure_names_yaml}" + # cat the file and get each line as a stored_procedure_name, join them with ',' + stored_procedure_names=$(< ${stored_procedure_names_yaml} tr '\n' ',' | sed 's/,$//') + fi + info "stored_procedure_names = ${stored_procedure_names}" + # add the names to .enable file for graph_name + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo -e \"${RED} Graph ${graph_name} not exists, please create it first.${NC}\" && exit 1)" + docker_graph_plugin_dir="${docker_graph_dir}/plugins" + docker_graph_enable_file="${docker_graph_plugin_dir}/.enable" + rm -f /tmp/.enable + # copy the .enable file to host, and append the stored_procedure_names to it; if the stored_procedure_names already exists, do nothing + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable" || true + if [ ! -f "/tmp/.enable" ]; then + touch "/tmp/.enable" + fi + old_line_num=$(wc -l < /tmp/.enable) + # split the stored_procedure_names by ',' and append them to .enable file + IFS=',' read -ra stored_procedure_names_array <<< "${stored_procedure_names}" + for stored_procedure_name in "${stored_procedure_names_array[@]}"; do + # check if the stored_procedure_name already exists in .enable file + if grep -q "${stored_procedure_name}" "/tmp/.enable"; then + info "stored_procedure_name ${stored_procedure_name} already exists in .enable file, skip" + else + echo "${stored_procedure_name}" >> /tmp/.enable + fi + done + # copy the .enable file back to container + docker cp "/tmp/.enable" "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" || exit 1 + new_line_num=$(wc -l < /tmp/.enable) + info "Successfuly enable stored_procedures ${stored_procedure_names} for graph [${graph_name}], ${old_line_num} -> ${new_line_num}" +} + +function do_disable(){ + disable_all=false + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -n | --name) + stored_procedure_names="$2" + shift + shift + ;; + -c | --config) + stored_procedure_names_yaml="$2" + shift + shift + ;; + -a | --all) + disable_all=true + shift + ;; + *) + err "unknown option $1" + disable_proc_usage + exit 1 + ;; + esac + done + + # --name and --config can not be set at the same time + if [ ! -z "${stored_procedure_names}" ] && [ ! -z "${stored_procedure_names_yaml}" ] && [ "${disable_all}" = true ]; then + err "--name, --config and --all can not be set at the same time" + disable_proc_usage + exit 1 + fi + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # check graph_name not empty + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + disable_proc_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + + info "graph_name = ${graph_name}" + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo \"graph ${graph_name} not exists, please create it first\" && exit 1)" + docker_graph_plugin_dir="${docker_graph_dir}/plugins" + docker_graph_enable_file="${docker_graph_plugin_dir}/.enable" + echo "disable_all = ${disable_all}" + if [ "${disable_all}" = true ]; then + # clear the .enable file + info "disable all stored_procedures for graph [${graph_name}]" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "(rm \"\" > ${docker_graph_enable_file}) || exit 0" || exit 1 + info "Successfuly disable all stored_procedures for graph [${graph_name}]" + exit 0 + fi + + # use stored_procedures_names_yaml if it is set + if [ ! -z "${stored_procedure_names_yaml}" ]; then + check_file_exists "${stored_procedure_names_yaml}" + # cat the file and get each line as a stored_procedure_name, join them with ',' + stored_procedure_names=$(< ${stored_procedure_names_yaml} tr '\n' ',' | sed 's/,$//') + fi + info "stored_procedure_names = ${stored_procedure_names}" + # add the names to .enable file for graph_name + + # copy the .enable file to host, and remove the stored_procedure_names from it + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable" || exit 1 + old_line_num=$(wc -l < /tmp/.enable) + # split the stored_procedure_names by ',' and remove them from .enable file + IFS=',' read -ra stored_procedure_names_array <<< "${stored_procedure_names}" + for stored_procedure_name in "${stored_procedure_names_array[@]}"; do + sed -i "/${stored_procedure_name}/d" /tmp/.enable + done + # copy the .enable file back to container + docker cp "/tmp/.enable" "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" || exit 1 + new_line_num=$(wc -l < /tmp/.enable) + info "Successfuly disable stored_procedures ${stored_procedure_names} for graph [${graph_name}], ${old_line_num} -> ${new_line_num}" +} + +function do_show(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift + shift + ;; + *) + err "unknown option $1" + show_stored_procedure_usage + exit 1 + ;; + esac + done + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # check graph_name + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + show_stored_procedure_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + info "graph_name = ${graph_name}" + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo -e \"${RED}Graph [${graph_name}] not exists, please create it first. ${NC}\" && exit 1)" || exit 1 + docker_graph_plugin_dir="${docker_graph_dir}/plugins" + docker_graph_enable_file="${docker_graph_plugin_dir}/.enable" + # check if docker_graph_enable_file exists, if not ,exit + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -f ${docker_graph_enable_file} ] || (echo -e \"${RED}Graph [${graph_name}] has no procedures registered. ${NC}\" && exit 1)" || exit 1 + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable" || exit 1 + info "Enabled stored_procedures for graph: [${graph_name}]" + # iterate the .enable file and print the stored_procedure_name + while read -r line; do + emph "Procedure: ${line}" + done < /tmp/.enable +} + +function do_database(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + create) + shift + do_create "$@" + exit 0 + ;; + remove) + shift + do_remove "$@" + exit 0 + ;; + import) + shift + do_import "$@" + exit 0 + ;; + list) + shift + do_list "$@" + exit 0 + ;; + *) + err "unknown option $1" + database_usage + exit 1 + ;; + esac + done +} + +function do_service(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + start) + shift + do_start "$@" + exit 0 + ;; + stop) + shift + do_stop "$@" + exit 0 + ;; + restart) + shift + do_restart "$@" + exit 0 + ;; + status) + shift + do_status "$@" + exit 0 + ;; + get_log) + shift + do_log "$@" + exit 0 + ;; + *) + err "unknown option $1" + services_usage + exit 1 + ;; + esac + done +} + +function do_procedure(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + compile) + shift + do_compile "$@" + exit 0 + ;; + enable) + shift + do_enable "$@" + exit 0 + ;; + disable) + shift + do_disable "$@" + exit 0 + ;; + show) + shift + do_show "$@" + exit 1 + ;; + *) + err "unknown option $1" + procedure_usage + exit 1 + ;; + esac + done + procedure_usage +} + +#################### Entry #################### +if [ $# -eq 0 ]; then + usage + exit 1 +fi + +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + -h | --help) + usage + exit + ;; + init) + shift + info "Start initiating database..." + do_init "$@" + exit 0 + ;; + database) + shift + check_database_initialized + do_database "$@" + exit 0 + ;; + service) + shift + check_database_initialized + do_service "$@" + exit 0 + ;; + procedure) + shift + check_database_initialized + do_procedure "$@" + exit 0 + ;; + destroy) + shift + do_destroy "$@" + exit 0 + ;; + *) # unknown option + err "unknown option $1" + usage + exit 1 + ;; + esac +done + + + + diff --git a/flex/interactive/conf/engine_config.yaml b/flex/interactive/conf/engine_config.yaml new file mode 100644 index 000000000000..0bb4487bda46 --- /dev/null +++ b/flex/interactive/conf/engine_config.yaml @@ -0,0 +1,13 @@ +log_level: INFO # default INFO +default_graph: modern # configure the graph to be loaded while starting the service, if graph name not specified +compute_engine: + shard_num: 1 # the number of shared workers, default 1 +compiler: + planner: + is_on: true + opt: RBO + rules: + - FilterMatchRule + - FilterIntoJoinRule + - NotExistToAntiJoinRule + query_timeout: 20000 # query timeout in milliseconds, default 2000 \ No newline at end of file diff --git a/flex/interactive/conf/interactive.properties b/flex/interactive/conf/interactive.properties deleted file mode 100755 index 9dbf43af1ece..000000000000 --- a/flex/interactive/conf/interactive.properties +++ /dev/null @@ -1,8 +0,0 @@ -engine.type: hiactor -hiactor.hosts: localhost:10000 -graph.store: exp -graph.schema: file:../data/ldbc/graph.json -graph.stored.procedures.uri: file:/tmp -graph.planner: {"isOn":true,"opt":"RBO","rules":["FilterIntoJoinRule", "FilterMatchRule", "NotMatchToAntiJoinRule"]} -gremlin.server.disabled: true -neo4j.bolt.server.port: 7687 \ No newline at end of file diff --git a/flex/interactive/conf/interactive.yaml b/flex/interactive/conf/interactive.yaml index 969b6ca80401..815c4752be24 100755 --- a/flex/interactive/conf/interactive.yaml +++ b/flex/interactive/conf/interactive.yaml @@ -1,23 +1,3 @@ ---- -version: 0.0.1 -directories: - workspace: /home/graphscope/interactive/ - subdirs: - data: data # by default data, relative to ${workspace} - conf: conf # by default conf, relative to ${workspace} - logs: logs # by default logs, relative to ${workspace} -logLevel: INFO # default INFO -default_graph: modern # configure the graph to be loaded while starting the service, if graph name not specified - # may include other configuration items of other engines -compute_engine: - type: hiactor - hosts: - - localhost:10000 # currently only one host can be specified - shared_num: 1 # the number of shared workers, default 1 -compiler: - planner: {"isOn":true,"opt":"RBO","rules":["FilterMatchRule"]} # Confirm这个配置 - endpoint: - default_listen_address: localhost # default localhost - bolt_connector: # for cypher, there may be other connectors, such as bolt_connector, https_connector - enabled: true # default false - port: 7687 +version: v0.0.2 +#volume: +# - {HOST_PATH}:${CONTAINER_PATH} \ No newline at end of file diff --git a/flex/interactive/data/ldbc/graph.json b/flex/interactive/data/ldbc/graph.json deleted file mode 100755 index f16dd1710336..000000000000 --- a/flex/interactive/data/ldbc/graph.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "entities": [ - { - "label": { - "id": 1, - "name": "software" - }, - "columns": [ - { - "key": { - "id": 4, - "name": "id" - }, - "data_type": 1, - "is_primary_key": false - }, - { - "key": { - "id": 0, - "name": "name" - }, - "data_type": 4, - "is_primary_key": false - }, - { - "key": { - "id": 2, - "name": "lang" - }, - "data_type": 4, - "is_primary_key": false - } - ] - }, - { - "label": { - "id": 0, - "name": "person" - }, - "columns": [ - { - "key": { - "id": 4, - "name": "id" - }, - "data_type": 1, - "is_primary_key": false - }, - { - "key": { - "id": 0, - "name": "name" - }, - "data_type": 4, - "is_primary_key": false - }, - { - "key": { - "id": 1, - "name": "age" - }, - "data_type": 1, - "is_primary_key": false - } - ] - } - ], - "relations": [ - { - "label": { - "id": 0, - "name": "knows" - }, - "entity_pairs": [ - { - "src": { - "id": 0, - "name": "person" - }, - "dst": { - "id": 0, - "name": "person" - } - } - ], - "columns": [ - { - "key": { - "id": 3, - "name": "weight" - }, - "data_type": 3, - "is_primary_key": false - } - ] - }, - { - "label": { - "id": 1, - "name": "created" - }, - "entity_pairs": [ - { - "src": { - "id": 0, - "name": "person" - }, - "dst": { - "id": 1, - "name": "software" - } - } - ], - "columns": [ - { - "key": { - "id": 3, - "name": "weight" - }, - "data_type": 3, - "is_primary_key": false - } - ] - } - ], - "is_table_id": true, - "is_column_id": false -} \ No newline at end of file diff --git a/flex/interactive/data/ldbc/graph.yaml b/flex/interactive/data/ldbc/graph.yaml deleted file mode 100755 index c37d4731b071..000000000000 --- a/flex/interactive/data/ldbc/graph.yaml +++ /dev/null @@ -1,70 +0,0 @@ -name: modern # then must have a modern dir under ${data} directory -store_type: mutable_csr # v6d, groot, gart -stored_procedures: - directory: plugins # default plugins, relative to ${workspace}/${name} -schema: - vertex_types: - - type_name: person - x_csr_params: - max_vertex_num: 100 - properties: - - property_id: 0 - property_name: id - property_type: - primitive_type: DT_SIGNED_INT64 - - property_id: 1 - property_name: name - property_type: - primitive_type: DT_STRING - - property_id: 2 - property_name: age - property_type: - primitive_type: DT_SIGNED_INT32 - primary_keys: - - id - - type_name: software - x_csr_params: - max_vertex_num: 100 - properties: - - property_id: 0 - property_name: id - property_type: - primitive_type: DT_SIGNED_INT64 - x_csr_params: - - property_id: 1 - property_name: name - property_type: - primitive_type: DT_STRING - - property_id: 2 - property_name: lang - property_type: - primitive_type: DT_STRING - primary_keys: - - id - edge_types: - - type_name: knows - x_csr_params: - incoming_edge_strategy: None - outgoing_edge_strategy: Multiple - vertex_type_pair_relations: - source_vertex: person - destination_vertex: person - relation: MANY_TO_MANY - properties: - - property_id: 0 - property_name: weight - property_type: - primitive_type: DT_DOUBLE - - type_name: created - x_csr_params: - incoming_edge_strategy: None - outgoing_edge_strategy: Single - vertex_type_pair_relations: - source_vertex: person - destination_vertex: software - relation: ONE_TO_MANY - properties: - - property_id: 0 - property_name: weight - property_type: - primitive_type: DT_DOUBLE diff --git a/flex/interactive/docker/interactive-runtime.Dockerfile b/flex/interactive/docker/interactive-runtime.Dockerfile index 1b8ee02de93e..399b67f91d4a 100755 --- a/flex/interactive/docker/interactive-runtime.Dockerfile +++ b/flex/interactive/docker/interactive-runtime.Dockerfile @@ -9,4 +9,5 @@ RUN cd /home/graphscope/ && git clone -b main --single-branch https://github.com cd GraphScope/flex && mkdir build && cd build && cmake .. -DBUILD_DOC=OFF && sudo make -j install # install graphscope GIE -RUN . /home/graphscope/.cargo/env && cd /home/graphscope/GraphScope/interactive_engine/compiler && make build +RUN . /home/graphscope/.cargo/env && cd /home/graphscope/GraphScope/interactive_engine && \ + mvn clean install -DskipTests -Drevision=0.0.1-SNAPSHOT -Pexperimental diff --git a/flex/interactive/examples/modern_graph b/flex/interactive/examples/modern_graph deleted file mode 120000 index 8ed59122aab3..000000000000 --- a/flex/interactive/examples/modern_graph +++ /dev/null @@ -1 +0,0 @@ -../../storages/rt_mutable_graph/modern_graph/ \ No newline at end of file diff --git a/flex/storages/rt_mutable_graph/modern_graph/bulk_load.yaml b/flex/interactive/examples/modern_graph/bulk_load.yaml similarity index 96% rename from flex/storages/rt_mutable_graph/modern_graph/bulk_load.yaml rename to flex/interactive/examples/modern_graph/bulk_load.yaml index 8d9085d81aca..739302554d6e 100644 --- a/flex/storages/rt_mutable_graph/modern_graph/bulk_load.yaml +++ b/flex/interactive/examples/modern_graph/bulk_load.yaml @@ -2,7 +2,7 @@ graph: modern loading_config: data_source: scheme: file # file, oss, s3, hdfs; only file is supported now - # location: # specify it or use FLEX_DATA_DIR env. + #location: /home/graphscope/gs_interactive_default_graph/ import_option: init # append, overwrite, only init is supported now format: type: csv diff --git a/flex/interactive/examples/modern_graph/count_vertex_num.cypher b/flex/interactive/examples/modern_graph/count_vertex_num.cypher new file mode 100644 index 000000000000..cca16c40269d --- /dev/null +++ b/flex/interactive/examples/modern_graph/count_vertex_num.cypher @@ -0,0 +1 @@ +MATCH(v:person { id: $personId}) RETURN COUNT(v); \ No newline at end of file diff --git a/flex/interactive/examples/modern_graph/get_person_name.cypher b/flex/interactive/examples/modern_graph/get_person_name.cypher new file mode 100644 index 000000000000..ce4ee61d64ea --- /dev/null +++ b/flex/interactive/examples/modern_graph/get_person_name.cypher @@ -0,0 +1 @@ +MATCH(p : person {id: $personId}) RETURN p.firstName, p.lastName; \ No newline at end of file diff --git a/flex/storages/rt_mutable_graph/modern_graph/modern_graph.yaml b/flex/interactive/examples/modern_graph/modern_graph.yaml similarity index 92% rename from flex/storages/rt_mutable_graph/modern_graph/modern_graph.yaml rename to flex/interactive/examples/modern_graph/modern_graph.yaml index 7823b3fd7561..7d6308bb96b0 100644 --- a/flex/storages/rt_mutable_graph/modern_graph/modern_graph.yaml +++ b/flex/interactive/examples/modern_graph/modern_graph.yaml @@ -1,9 +1,5 @@ name: modern # then must have a modern dir under ${data} directory store_type: mutable_csr # v6d, groot, gart -stored_procedures: - directory: plugins # default plugins, relative to ${workspace}/${name} - enable_lists: - - ldbc_ic1 schema: vertex_types: - type_id: 0 diff --git a/flex/storages/rt_mutable_graph/modern_graph/person.csv b/flex/interactive/examples/modern_graph/person.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/person.csv rename to flex/interactive/examples/modern_graph/person.csv diff --git a/flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv b/flex/interactive/examples/modern_graph/person_created_software.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv rename to flex/interactive/examples/modern_graph/person_created_software.csv diff --git a/flex/storages/rt_mutable_graph/modern_graph/person_knows_person.csv b/flex/interactive/examples/modern_graph/person_knows_person.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/person_knows_person.csv rename to flex/interactive/examples/modern_graph/person_knows_person.csv diff --git a/flex/storages/rt_mutable_graph/modern_graph/software.csv b/flex/interactive/examples/modern_graph/software.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/software.csv rename to flex/interactive/examples/modern_graph/software.csv diff --git a/flex/interactive/examples/movies/ACTED_IN.csv b/flex/interactive/examples/movies/ACTED_IN.csv new file mode 100644 index 000000000000..e9494b38ca6d --- /dev/null +++ b/flex/interactive/examples/movies/ACTED_IN.csv @@ -0,0 +1,173 @@ +start|end +1|0 +2|0 +3|0 +4|0 +8|0 +1|9 +2|9 +3|9 +4|9 +1|10 +2|10 +3|10 +4|10 +1|11 +12|11 +13|11 +16|15 +17|15 +18|15 +19|15 +20|15 +21|15 +22|15 +23|15 +24|15 +25|15 +26|15 +28|15 +16|29 +30|29 +31|29 +32|29 +33|29 +34|29 +16|37 +22|37 +38|37 +39|37 +40|37 +41|37 +42|37 +43|37 +44|37 +49|46 +47|46 +40|46 +48|46 +50|46 +20|46 +51|46 +17|52 +53|52 +54|52 +22|52 +60|56 +22|56 +57|56 +58|56 +59|56 +63|62 +64|62 +58|62 +65|62 +71|67 +34|67 +54|67 +68|67 +69|67 +70|67 +71|73 +34|73 +74|73 +75|73 +76|73 +77|73 +71|78 +34|78 +80|78 +82|81 +34|81 +83|81 +84|81 +71|85 +86|85 +12|85 +1|87 +88|87 +89|87 +90|87 +51|92 +93|92 +94|92 +70|92 +60|95 +80|95 +89|95 +98|97 +99|97 +89|97 +1|100 +101|100 +102|100 +103|100 +71|105 +4|105 +106|105 +107|105 +71|111 +112|111 +113|111 +114|111 +4|116 +117|116 +118|116 +119|116 +120|116 +122|121 +123|121 +124|121 +125|121 +126|121 +127|121 +120|121 +127|128 +129|128 +64|128 +120|128 +71|130 +131|130 +132|130 +42|130 +65|130 +133|130 +134|130 +135|130 +138|137 +139|137 +19|137 +140|137 +133|137 +17|141 +142|141 +24|141 +143|141 +71|144 +19|144 +145|144 +146|144 +134|144 +146|147 +53|147 +94|147 +148|147 +71|150 +53|150 +17|152 +142|152 +17|154 +155|154 +1|154 +60|157 +140|157 +71|159 +160|159 +148|159 +71|161 +71|162 +164|162 +165|162 +77|162 +163|162 +146|162 diff --git a/flex/interactive/examples/movies/DIRECTED.csv b/flex/interactive/examples/movies/DIRECTED.csv new file mode 100644 index 000000000000..8063205f7546 --- /dev/null +++ b/flex/interactive/examples/movies/DIRECTED.csv @@ -0,0 +1,45 @@ +start|end +5|0 +6|0 +5|9 +6|9 +5|10 +6|10 +14|11 +27|15 +35|29 +45|37 +27|46 +55|52 +61|56 +66|62 +72|67 +72|73 +79|78 +27|81 +71|85 +91|87 +59|92 +96|95 +99|97 +104|100 +108|105 +5|105 +6|105 +115|111 +25|116 +5|121 +6|121 +25|128 +136|130 +115|137 +142|141 +115|144 +149|147 +151|150 +153|152 +156|154 +158|157 +96|159 +151|161 +166|162 diff --git a/flex/interactive/examples/movies/FOLLOWS.csv b/flex/interactive/examples/movies/FOLLOWS.csv new file mode 100644 index 000000000000..f8bb4bce99a4 --- /dev/null +++ b/flex/interactive/examples/movies/FOLLOWS.csv @@ -0,0 +1,4 @@ +start|end +170|169 +168|169 +167|168 diff --git a/flex/interactive/examples/movies/Movie.csv b/flex/interactive/examples/movies/Movie.csv new file mode 100644 index 000000000000..500cfd6e0aac --- /dev/null +++ b/flex/interactive/examples/movies/Movie.csv @@ -0,0 +1,39 @@ +id|released|tagline|title +0|1999|Welcome to the Real World|The Matrix +9|2003|Free your mind|The Matrix Reloaded +10|2003|Everything that has a beginning has an end|The Matrix Revolutions +11|1997|Evil has its winning ways|The Devil's Advocate +15|1992|In the heart of the nation's capital, in a courthouse of the U.S. government, one man will stop at nothing to keep his honor, and one will stop at nothing to find the truth.|A Few Good Men +29|1986|I feel the need, the need for speed.|Top Gun +37|2000|The rest of his life begins now.|Jerry Maguire +46|1986|For some, it's the last real taste of innocence, and the first real taste of life. But for everyone, it's the time that memories are made of.|Stand By Me +52|1997|A comedy from the heart that goes for the throat.|As Good as It Gets +56|1998|After life there is more. The end is just the beginning.|What Dreams May Come +62|1999|First loves last. Forever.|Snow Falling on Cedars +67|1998|At odds in life... in love on-line.|You've Got Mail +73|1993|What if someone you never met, someone you never saw, someone you never knew was the only someone for you?|Sleepless in Seattle +78|1990|A story of love, lava and burning desire.|Joe Versus the Volcano +81|1998|Can two friends sleep together and still love each other in the morning?|When Harry Met Sally +85|1996|In every life there comes a time when that thing you dream becomes that thing you do|That Thing You Do +87|2000|Pain heals, Chicks dig scars... Glory lasts forever|The Replacements +92|2006|Based on the extraordinary true story of one man's fight for freedom|RescueDawn +95|1996|Come as you are|The Birdcage +97|1992|It's a hell of a thing, killing a man|Unforgiven +100|1995|The hottest data on earth. In the coolest head in town|Johnny Mnemonic +105|2012|Everything is connected|Cloud Atlas +111|2006|Break The Codes|The Da Vinci Code +116|2006|Freedom! Forever!|V for Vendetta +121|2008|Speed has no limits|Speed Racer +128|2009|Prepare to enter a secret world of assassins|Ninja Assassin +130|1999|Walk a mile you'll never forget.|The Green Mile +137|2008|400 million people were waiting for the truth.|Frost/Nixon +141|1992|He didn't want law. He wanted justice.|Hoffa +144|1995|Houston, we have a problem.|Apollo 13 +147|1996|Don't Breathe. Don't Look Back.|Twister +150|2000|At the edge of the world, his journey begins.|Cast Away +152|1975|If he's crazy, what does that make you?|One Flew Over the Cuckoo's Nest +154|2003|nan|Something's Gotta Give +157|1999|One robot's 200 year journey to become an ordinary man.|Bicentennial Man +159|2007|A stiff drink. A little mascara. A lot of nerve. Who said they couldn't bring down the Soviet empire.|Charlie Wilson's War +161|2004|This Holiday Season... Believe|The Polar Express +162|1992|Once in a lifetime you get a chance to do something different.|A League of Their Own diff --git a/flex/interactive/examples/movies/PRODUCED.csv b/flex/interactive/examples/movies/PRODUCED.csv new file mode 100644 index 000000000000..5a58e7fd9f51 --- /dev/null +++ b/flex/interactive/examples/movies/PRODUCED.csv @@ -0,0 +1,16 @@ +start|end +7|0 +7|9 +7|10 +45|37 +27|81 +72|81 +110|105 +5|116 +6|116 +7|116 +7|121 +5|128 +6|128 +7|128 +156|154 diff --git a/flex/interactive/examples/movies/Person.csv b/flex/interactive/examples/movies/Person.csv new file mode 100644 index 000000000000..ccec81a744cf --- /dev/null +++ b/flex/interactive/examples/movies/Person.csv @@ -0,0 +1,134 @@ +id|born|name +1|1964|Keanu Reeves +2|1967|Carrie-Anne Moss +3|1961|Laurence Fishburne +4|1960|Hugo Weaving +5|1967|Lilly Wachowski +6|1965|Lana Wachowski +7|1952|Joel Silver +8|1978|Emil Eifrem +12|1975|Charlize Theron +13|1940|Al Pacino +14|1944|Taylor Hackford +16|1962|Tom Cruise +17|1937|Jack Nicholson +18|1962|Demi Moore +19|1958|Kevin Bacon +20|1966|Kiefer Sutherland +21|1971|Noah Wyle +22|1968|Cuba Gooding Jr. +23|1957|Kevin Pollak +24|1943|J.T. Walsh +25|1967|James Marshall +26|1948|Christopher Guest +27|1947|Rob Reiner +28|1961|Aaron Sorkin +30|1957|Kelly McGillis +31|1959|Val Kilmer +32|1962|Anthony Edwards +33|1933|Tom Skerritt +34|1961|Meg Ryan +35|1944|Tony Scott +36|1941|Jim Cash +38|1969|Renee Zellweger +39|1962|Kelly Preston +40|1974|Jerry O'Connell +41|1970|Jay Mohr +42|1961|Bonnie Hunt +43|1971|Regina King +44|1996|Jonathan Lipnicki +45|1957|Cameron Crowe +47|1970|River Phoenix +48|1971|Corey Feldman +49|1972|Wil Wheaton +50|1966|John Cusack +51|1942|Marshall Bell +53|1963|Helen Hunt +54|1963|Greg Kinnear +55|1940|James L. Brooks +57|1960|Annabella Sciorra +58|1929|Max von Sydow +59|1942|Werner Herzog +60|1951|Robin Williams +61|1956|Vincent Ward +63|1970|Ethan Hawke +64|1971|Rick Yune +65|1940|James Cromwell +66|1953|Scott Hicks +68|1968|Parker Posey +69|1973|Dave Chappelle +70|1967|Steve Zahn +71|1956|Tom Hanks +72|1941|Nora Ephron +74|1956|Rita Wilson +75|1953|Bill Pullman +76|1949|Victor Garber +77|1962|Rosie O'Donnell +79|1950|John Patrick Stanley +80|1956|Nathan Lane +82|1948|Billy Crystal +83|1956|Carrie Fisher +84|1949|Bruno Kirby +86|1977|Liv Tyler +88|1970|Brooke Langton +89|1930|Gene Hackman +90|1968|Orlando Jones +91|1950|Howard Deutch +93|1974|Christian Bale +94|1954|Zach Grenier +96|1931|Mike Nichols +98|1930|Richard Harris +99|1930|Clint Eastwood +101|1947|Takeshi Kitano +102|1968|Dina Meyer +103|1958|Ice-T +104|1953|Robert Longo +106|1966|Halle Berry +107|1949|Jim Broadbent +108|1965|Tom Tykwer +109|1969|David Mitchell +110|1961|Stefan Arndt +112|1939|Ian McKellen +113|1976|Audrey Tautou +114|1971|Paul Bettany +115|1954|Ron Howard +117|1981|Natalie Portman +118|1946|Stephen Rea +119|1940|John Hurt +120|1967|Ben Miles +122|1985|Emile Hirsch +123|1960|John Goodman +124|1946|Susan Sarandon +125|1966|Matthew Fox +126|1980|Christina Ricci +127|1982|Rain +129|2003|Naomie Harris +131|1957|Michael Clarke Duncan +132|1953|David Morse +133|1968|Sam Rockwell +134|1955|Gary Sinise +135|1959|Patricia Clarkson +136|1959|Frank Darabont +138|1938|Frank Langella +139|1969|Michael Sheen +140|1960|Oliver Platt +142|1944|Danny DeVito +143|1965|John C. Reilly +145|1950|Ed Harris +146|1955|Bill Paxton +148|1967|Philip Seymour Hoffman +149|1943|Jan de Bont +151|1951|Robert Zemeckis +153|1932|Milos Forman +155|1946|Diane Keaton +156|1949|Nancy Meyers +158|1958|Chris Columbus +160|1967|Julia Roberts +163|1954|Madonna +164|1956|Geena Davis +165|1963|Lori Petty +166|1943|Penny Marshall +167|2003|Paul Blythe +168|2003|Angela Scope +169|2003|Jessica Thompson +170|2003|James Thompson diff --git a/flex/interactive/examples/movies/REVIEWED.csv b/flex/interactive/examples/movies/REVIEWED.csv new file mode 100644 index 000000000000..ab81554f3d8e --- /dev/null +++ b/flex/interactive/examples/movies/REVIEWED.csv @@ -0,0 +1,10 @@ +start|end|review|rating +169|105|yes|95 +169|87|bravo|65 +170|87|good|100 +168|87|bad|62 +169|97|cool|85 +169|95|poor|45 +169|111|bad|68 +170|111|bad|65 +169|37|bravo|92 diff --git a/flex/interactive/examples/movies/WROTE.csv b/flex/interactive/examples/movies/WROTE.csv new file mode 100644 index 000000000000..794dde4cb6e2 --- /dev/null +++ b/flex/interactive/examples/movies/WROTE.csv @@ -0,0 +1,11 @@ +start|end +28|15 +36|29 +45|37 +72|81 +109|105 +5|116 +6|116 +5|121 +6|121 +156|154 diff --git a/flex/interactive/examples/movies/graph.yaml b/flex/interactive/examples/movies/graph.yaml new file mode 100644 index 000000000000..c90fa0d7fc5a --- /dev/null +++ b/flex/interactive/examples/movies/graph.yaml @@ -0,0 +1,67 @@ +name: movies +schema: + vertex_types: + - type_name: Movie + properties: + - property_name: id + property_type: + primitive_type: DT_SIGNED_INT64 + - property_name: release + property_type: + primitive_type: DT_SIGNED_INT32 + - property_name: tagline + property_type: + primitive_type: DT_STRING + - property_name: title + property_type: + primitive_type: DT_STRING + primary_keys: + - id + - type_name: Person + properties: + - property_name: id + property_type: + primitive_type: DT_SIGNED_INT64 + - property_name: born + property_type: + primitive_type: DT_SIGNED_INT32 + - property_name: name + property_type: + primitive_type: DT_STRING + primary_keys: + - id + edge_types: + - type_name: ACTED_IN + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + - type_name: DIRECTED + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + - type_name: REVIEW + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + properties: + - property_name: rating + property_type: + primitive_type: DT_SIGNED_INT32 + - type_name: FOLLOWS + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Person + relation: MANY_TO_MANY + - type_name: WROTE + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + - type_name: PRODUCED + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY diff --git a/flex/interactive/examples/movies/import.yaml b/flex/interactive/examples/movies/import.yaml new file mode 100644 index 000000000000..778a1d9c1e1e --- /dev/null +++ b/flex/interactive/examples/movies/import.yaml @@ -0,0 +1,115 @@ +graph: movies +loading_config: + data_source: + scheme: file # file, oss, s3, hdfs; only file is supported now + location: /home/graphscope/movies/ + import_option: init # append, overwrite, only init is supported now + format: + type: csv + metadata: + delimiter: "|" # other loading configuration places here + header_row: true # whether to use the first row as the header + quoting: false + quote_char: '"' + double_quote: true + escape_char: '\' + escaping: false + block_size: 4MB + batch_reader: true +vertex_mappings: + - type_name: Person # must align with the schema + inputs: + - Person.csv + - type_name: Movie + inputs: + - Movie.csv +edge_mappings: + - type_triplet: + edge: ACTED_IN + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - ACTED_IN.csv + - type_triplet: + edge: DIRECTED + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - DIRECTED.csv + - type_triplet: + edge: FOLLOWS + source_vertex: Person + destination_vertex: Person + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - FOLLOWS.csv + - type_triplet: + edge: PRODUCED + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - PRODUCED.csv + - type_triplet: + edge: REVIEW + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + column_mappings: + - column: + index: 3 + name: rating + property: rating + inputs: + - REVIEWED.csv + - type_triplet: + edge: WROTE + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - WROTE.csv diff --git a/flex/storages/rt_mutable_graph/loading_config.cc b/flex/storages/rt_mutable_graph/loading_config.cc index 39e99ecb13e0..cfd8336a4dd5 100644 --- a/flex/storages/rt_mutable_graph/loading_config.cc +++ b/flex/storages/rt_mutable_graph/loading_config.cc @@ -62,11 +62,11 @@ static bool fetch_src_dst_column_mapping(const Schema& schema, YAML::Node node, return false; } } - + return true; } else { - LOG(ERROR) << "No primary key column mapping for [" << key << "]"; + LOG(WARNING) << "No primary key column mapping for [" << key << "]"; + return false; } - return true; } // Function to parse memory size represented as a string @@ -124,7 +124,10 @@ static bool parse_column_mappings( std::string property_name; // property name is optional. if (!get_scalar(node[i], "property", property_name)) { - LOG(ERROR) << "Expect property name for column mapping"; + LOG(ERROR) << "Expect property name for column mapping, when parsing " + "column mapping for label: " + << label_name << ", column_id: " << column_id + << ", column_name: " << column_name; return false; } if (!condition(label_name, property_name)) { @@ -301,17 +304,18 @@ static bool parse_edge_files( if (!fetch_src_dst_column_mapping(schema, node, src_label_id, "source_vertex_mappings", src_columns)) { - LOG(ERROR) << "Field [source_vertex_mappings] is not set for edge [" - << src_label << "->[" << edge_label << "]->" << dst_label - << "]"; - return false; + LOG(WARNING) << "Field [source_vertex_mappings] is not set for edge [" + << src_label << "->[" << edge_label << "]->" << dst_label + << "], using default choice: column_id 0"; + src_columns.push_back(0); } if (!fetch_src_dst_column_mapping(schema, node, dst_label_id, "destination_vertex_mappings", dst_columns)) { - LOG(ERROR) << "Field [destination_vertex_mappings] is not set for edge[" - << src_label << "->[" << edge_label << "]->" << dst_label; - return false; + LOG(WARNING) << "Field [destination_vertex_mappings] is not set for edge[" + << src_label << "->[" << edge_label << "]->" << dst_label + << "], using default choice: column_id 1"; + dst_columns.push_back(1); } VLOG(10) << "src: " << src_label << ", dst: " << dst_label diff --git a/flex/storages/rt_mutable_graph/schema.cc b/flex/storages/rt_mutable_graph/schema.cc index 95e5e94fabd7..a552020e815a 100644 --- a/flex/storages/rt_mutable_graph/schema.cc +++ b/flex/storages/rt_mutable_graph/schema.cc @@ -705,7 +705,8 @@ static bool parse_schema_config_file(const std::string& path, Schema& schema) { return false; } if (!expect_config(graph_node, "store_type", std::string("mutable_csr"))) { - return false; + LOG(WARNING) << "store_type is not set properly, use default value: " + << "mutable_csr"; } auto schema_node = graph_node["schema"]; @@ -723,21 +724,60 @@ static bool parse_schema_config_file(const std::string& path, Schema& schema) { return false; } } + // get the directory of path + auto parent_dir = std::filesystem::path(path).parent_path().string(); if (graph_node["stored_procedures"]) { auto stored_procedure_node = graph_node["stored_procedures"]; auto directory = stored_procedure_node["directory"].as(); // check is directory if (!std::filesystem::exists(directory)) { - LOG(WARNING) << "plugin directory - " << directory << " not found..."; + LOG(ERROR) << "plugin directory - " << directory + << " not found, try with parent dir:" << parent_dir; + directory = parent_dir + "/" + directory; + if (!std::filesystem::exists(directory)) { + LOG(ERROR) << "plugin directory - " << directory << " not found..."; + return true; + } } + schema.SetPluginDir(directory); std::vector files_got; if (!get_sequence(stored_procedure_node, "enable_lists", files_got)) { LOG(ERROR) << "stored_procedures is not set properly"; + return true; } + std::vector all_procedure_yamls = get_yaml_files(directory); + std::vector all_procedure_names; + { + // get all procedure names + for (auto& f : all_procedure_yamls) { + YAML::Node procedure_node = YAML::LoadFile(f); + if (!procedure_node || !procedure_node.IsMap()) { + LOG(ERROR) << "procedure is not set properly"; + return false; + } + std::string procedure_name; + if (!get_scalar(procedure_node, "name", procedure_name)) { + LOG(ERROR) << "name is not set properly for " << f; + return false; + } + all_procedure_names.push_back(procedure_name); + } + } + for (auto& f : files_got) { - if (!std::filesystem::exists(f)) { - LOG(ERROR) << "plugin - " << f << " file not found..."; + auto real_file = directory + "/" + f; + if (!std::filesystem::exists(real_file)) { + LOG(ERROR) << "plugin - " << real_file << " file not found..."; + // it seems that f is not the filename, but the plugin name, try to find + // the plugin in the directory + if (std::find(all_procedure_names.begin(), all_procedure_names.end(), + f) == all_procedure_names.end()) { + LOG(ERROR) << "plugin - " << f << " not found..."; + } else { + VLOG(1) << "plugin - " << f << " found..."; + schema.EmplacePlugin(f); + } } else { schema.EmplacePlugin(std::filesystem::canonical(f)); } @@ -757,6 +797,10 @@ void Schema::EmplacePlugin(const std::string& plugin) { plugin_list_.emplace_back(plugin); } +void Schema::SetPluginDir(const std::string& dir) { plugin_dir_ = dir; } + +std::string Schema::GetPluginDir() const { return plugin_dir_; } + // check whether prop in vprop_names, or is the primary key bool Schema::vertex_has_property(const std::string& label, const std::string& prop) const { diff --git a/flex/storages/rt_mutable_graph/schema.h b/flex/storages/rt_mutable_graph/schema.h index c0a017088d79..81e6bff50ce7 100644 --- a/flex/storages/rt_mutable_graph/schema.h +++ b/flex/storages/rt_mutable_graph/schema.h @@ -148,6 +148,10 @@ class Schema { void EmplacePlugin(const std::string& plugin_name); + void SetPluginDir(const std::string& plugin_dir); + + std::string GetPluginDir() const; + private: label_t vertex_label_to_index(const std::string& label); @@ -169,6 +173,7 @@ class Schema { std::map ie_strategy_; std::vector max_vnum_; std::vector plugin_list_; + std::string plugin_dir_; }; } // namespace gs diff --git a/flex/storages/rt_mutable_graph/types.h b/flex/storages/rt_mutable_graph/types.h index 40c68b7d74a2..2b73eeaf83f4 100644 --- a/flex/storages/rt_mutable_graph/types.h +++ b/flex/storages/rt_mutable_graph/types.h @@ -35,7 +35,7 @@ static constexpr const char* DT_SIGNED_INT32 = "DT_SIGNED_INT32"; static constexpr const char* DT_SIGNED_INT64 = "DT_SIGNED_INT64"; static constexpr const char* DT_DOUBLE = "DT_DOUBLE"; static constexpr const char* DT_STRING = "DT_STRING"; -static constexpr const char* DT_DATE = "DT_DATE"; +static constexpr const char* DT_DATE = "DT_DATE32"; } // namespace gs diff --git a/flex/tests/hqps/hqps_cypher_test.sh b/flex/tests/hqps/hqps_cypher_test.sh index da5ce441b545..1cb4837dfccf 100644 --- a/flex/tests/hqps/hqps_cypher_test.sh +++ b/flex/tests/hqps/hqps_cypher_test.sh @@ -19,24 +19,29 @@ SERVER_BIN=${FLEX_HOME}/build/bin/sync_server GIE_HOME=${FLEX_HOME}/../interactive_engine/ # -if [ $# -lt 1 ]; then - echo "only receives: $# args, need 1" - echo "Usage: $0 " +if [ $# -lt 2 ]; then + echo "only receives: $# args, need 2" + echo "Usage: $0 " exit 1 fi GS_TEST_DIR=$1 +INTERACTIVE_WORKSPACE=$2 if [ ! -d ${GS_TEST_DIR} ]; then echo "GS_TEST_DIR: ${GS_TEST_DIR} not exists" exit 1 fi +if [ ! -d ${INTERACTIVE_WORKSPACE} ]; then + echo "INTERACTIVE_WORKSPACE: ${INTERACTIVE_WORKSPACE} not exists" + exit 1 +fi -GRAPH_CONFIG_PATH=${FLEX_HOME}/interactive/conf/interactive.yaml -GRAPH_SCHEMA_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml +ENGINE_CONFIG_PATH=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml +ORI_GRAPH_SCHEMA_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml +GRAPH_SCHEMA_YAML=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml GRAPH_BULK_LOAD_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_bulk_load.yaml -COMPILER_GRAPH_SCHEMA=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json +COMPILER_GRAPH_SCHEMA=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml GRAPH_CSR_DATA_DIR=${HOME}/csr-data-dir/ -HQPS_IR_CONF=/tmp/hqps.ir.properties # check if GRAPH_SCHEMA_YAML exists if [ ! -f ${GRAPH_SCHEMA_YAML} ]; then echo "GRAPH_SCHEMA_YAML: ${GRAPH_SCHEMA_YAML} not found" @@ -79,21 +84,6 @@ kill_service(){ # kill service when exit trap kill_service EXIT -create_ir_conf(){ - rm ${HQPS_IR_CONF} || true - echo "engine.type: hiactor" >> ${HQPS_IR_CONF} - echo "hiactor.hosts: localhost:10000" >> ${HQPS_IR_CONF} - echo "graph.store: exp" >> ${HQPS_IR_CONF} - echo "graph.schema: ${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" >> ${HQPS_IR_CONF} - echo "graph.planner.is.on: true" >> ${HQPS_IR_CONF} - echo "graph.planner.opt: RBO" >> ${HQPS_IR_CONF} - echo "graph.planner.rules: FilterMatchRule,NotMatchToAntiJoinRule" >> ${HQPS_IR_CONF} - echo "gremlin.server.disabled: true" >> ${HQPS_IR_CONF} - echo "neo4j.bolt.server.port: 7687" >> ${HQPS_IR_CONF} - - echo "Finish generate HQPS_IR_CONF" - cat ${HQPS_IR_CONF} -} # start engine service and load ldbc graph start_engine_service(){ @@ -105,9 +95,9 @@ start_engine_service(){ # export FLEX_DATA_DIR export FLEX_DATA_DIR=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ - cmd="${SERVER_BIN} -c ${GRAPH_CONFIG_PATH} -g ${GRAPH_SCHEMA_YAML} " + cmd="${SERVER_BIN} -c ${ENGINE_CONFIG_PATH} -g ${GRAPH_SCHEMA_YAML} " cmd="${cmd} --data-path ${GRAPH_CSR_DATA_DIR} -l ${GRAPH_BULK_LOAD_YAML} " - cmd="${cmd} -i ${HQPS_IR_CONF} -z ${COMPILER_GRAPH_SCHEMA} --gie-home ${GIE_HOME}" + cmd="${cmd} --gie-home ${GIE_HOME}" echo "Start engine service with command: ${cmd}" ${cmd} & @@ -122,10 +112,12 @@ start_engine_service(){ start_compiler_service(){ echo "try to start compiler service" pushd ${GIE_HOME}/compiler - cmd="make run graph.schema:=${COMPILER_GRAPH_SCHEMA} config.path=${HQPS_IR_CONF}" + cmd="make run graph.schema=${COMPILER_GRAPH_SCHEMA} config.path=${ENGINE_CONFIG_PATH}" echo "Start compiler service with command: ${cmd}" ${cmd} & sleep 5 + # check if Graph Server is running, if not exist + ps -ef | grep "com.alibaba.graphscope.GraphServer" | grep -v grep info "Start compiler service success" popd } @@ -151,7 +143,6 @@ run_simple_test(){ } kill_service -create_ir_conf start_engine_service start_compiler_service run_ldbc_test diff --git a/flex/utils/property/types.h b/flex/utils/property/types.h index 629e2a79a0f0..56b178297756 100644 --- a/flex/utils/property/types.h +++ b/flex/utils/property/types.h @@ -153,6 +153,28 @@ struct Any { return AnyConverter::to_any(value); } + bool operator==(const Any& other) const { + if (type == other.type) { + if (type == PropertyType::kInt32) { + return value.i == other.value.i; + } else if (type == PropertyType::kInt64) { + return value.l == other.value.l; + } else if (type == PropertyType::kDate) { + return value.d.milli_second == other.value.d.milli_second; + } else if (type == PropertyType::kString) { + return value.s == other.value.s; + } else if (type == PropertyType::kEmpty) { + return true; + } else if (type == PropertyType::kDouble) { + return value.db == other.value.db; + } else { + return false; + } + } else { + return false; + } + } + PropertyType type; AnyValue value; }; diff --git a/flex/utils/yaml_utils.cc b/flex/utils/yaml_utils.cc new file mode 100644 index 000000000000..2413e3b07fab --- /dev/null +++ b/flex/utils/yaml_utils.cc @@ -0,0 +1,32 @@ + +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "flex/utils/yaml_utils.h" +namespace gs { +std::vector get_yaml_files(const std::string& plugin_dir) { + std::filesystem::path dir_path = plugin_dir; + std::vector res_yaml_files; + + for (auto& entry : std::filesystem::directory_iterator(dir_path)) { + if (entry.is_regular_file() && (entry.path().extension() == ".yaml") || + (entry.path().extension() == ".yml")) { + res_yaml_files.emplace_back(entry.path()); + } + } + return res_yaml_files; +} + +} // namespace gs diff --git a/flex/utils/yaml_utils.h b/flex/utils/yaml_utils.h index ea0f37d8b7aa..fc4572fa1c40 100644 --- a/flex/utils/yaml_utils.h +++ b/flex/utils/yaml_utils.h @@ -21,7 +21,12 @@ #include #include +#include "glog/logging.h" + namespace gs { + +std::vector get_yaml_files(const std::string& plugin_dir); + namespace config_parsing { template bool get_scalar(YAML::Node node, const std::string& key, T& value) { diff --git a/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java b/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java index b9994a984a92..bdcedca48c1d 100644 --- a/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java +++ b/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java @@ -38,7 +38,7 @@ public static IrSchemaParser getInstance() { private IrSchemaParser() {} - public String parse(GraphSchema graphSchema) { + public String parse(GraphSchema graphSchema, boolean isColumnId) { List vertices = graphSchema.getVertexList(); List edges = graphSchema.getEdgeList(); List entities = new ArrayList(); @@ -49,7 +49,7 @@ public String parse(GraphSchema graphSchema) { schemaMap.put("entities", entities); schemaMap.put("relations", relations); schemaMap.put("is_table_id", true); - schemaMap.put("is_column_id", true); + schemaMap.put("is_column_id", isColumnId); return JSON.toJson(schemaMap); } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java index 267113925e02..badc9894fca2 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java @@ -20,11 +20,14 @@ import com.google.common.collect.Maps; import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.InputStream; import java.util.Map; public class GraphStoredProcedures implements StoredProcedures { + private static Logger logger = LoggerFactory.getLogger(GraphStoredProcedures.class); private final Map storedProcedureMetaMap; public GraphStoredProcedures(MetaDataReader reader) throws Exception { @@ -32,6 +35,7 @@ public GraphStoredProcedures(MetaDataReader reader) throws Exception { for (InputStream inputStream : reader.getStoredProcedures()) { StoredProcedureMeta createdMeta = StoredProcedureMeta.Deserializer.perform(inputStream); this.storedProcedureMetaMap.put(createdMeta.getName(), createdMeta); + logger.debug("Got stored procedure: {} from reader", createdMeta.getName()); inputStream.close(); } } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java index 09ffb4b5eede..bde5b87175f5 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java @@ -57,8 +57,13 @@ public List getStoredProcedures() throws IOException { GraphConfig.GRAPH_STORED_PROCEDURES_ENABLE_LISTS.get(configs)); List procedureInputs = Lists.newArrayList(); if (enableProcedureList.isEmpty()) { + logger.info("Load all procedures in {}", procedurePath); for (File file : procedureDir.listFiles()) { - procedureInputs.add(new FileInputStream(file)); + // if file is .yaml or .yml file + logger.info("Found procedure config {}", file.getName()); + if (file.getName().endsWith(".yaml") || file.getName().endsWith(".yml")) { + procedureInputs.add(new FileInputStream(file)); + } } } else { Map procedureInputMap = @@ -81,8 +86,12 @@ private Map getProcedureNameWithInputStream(File procedureD throws IOException { Map procedureInputMap = Maps.newHashMap(); for (File file : procedureDir.listFiles()) { + if (!file.getName().endsWith(".yaml") && !file.getName().endsWith(".yml")) { + continue; + } String procedureName = getProcedureName(file); procedureInputMap.put(procedureName, new FileInputStream(file)); + logger.debug("load procedure {}", procedureName); } return procedureInputMap; } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java index 9e5229b7a44c..bdec124ad9fe 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java @@ -36,6 +36,7 @@ public class IrGraphSchema implements GraphSchema { private final boolean isColumnId; public IrGraphSchema(MetaDataReader dataReader) throws Exception { + this.isColumnId = false; SchemaInputStream schemaInputStream = dataReader.getGraphSchema(); String content = new String( @@ -44,19 +45,19 @@ public IrGraphSchema(MetaDataReader dataReader) throws Exception { switch (schemaInputStream.getFormatType()) { case YAML: this.graphSchema = Utils.buildSchemaFromYaml(content); - this.schemeJson = IrSchemaParser.getInstance().parse(this.graphSchema); + this.schemeJson = + IrSchemaParser.getInstance().parse(this.graphSchema, this.isColumnId); break; case JSON: default: this.graphSchema = Utils.buildSchemaFromJson(content); this.schemeJson = content; } - this.isColumnId = false; } public IrGraphSchema(GraphSchema graphSchema, boolean isColumnId) { this.graphSchema = graphSchema; - this.schemeJson = IrSchemaParser.getInstance().parse(graphSchema); + this.schemeJson = IrSchemaParser.getInstance().parse(graphSchema, isColumnId); this.isColumnId = isColumnId; } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java index 58990d564689..264d66742a03 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java @@ -86,17 +86,19 @@ public static final void builderGraphElementFromYaml( "type_id not exist in yaml config"); List propertyList = Lists.newArrayList(); List propertyNodes = (List) elementMap.get("properties"); - for (Object property : propertyNodes) { - if (property instanceof Map) { - Map propertyMap = (Map) property; - String propertyName = (String) propertyMap.get("property_name"); - int propertyId = (int) propertyMap.get("property_id"); - propNameToIdMap.put(propertyName, propertyId); - propertyList.add( - new DefaultGraphProperty( - propertyId, - propertyName, - toDataType(propertyMap.get("property_type")))); + if (propertyNodes != null) { + for (Object property : propertyNodes) { + if (property instanceof Map) { + Map propertyMap = (Map) property; + String propertyName = (String) propertyMap.get("property_name"); + int propertyId = (int) propertyMap.get("property_id"); + propNameToIdMap.put(propertyName, propertyId); + propertyList.add( + new DefaultGraphProperty( + propertyId, + propertyName, + toDataType(propertyMap.get("property_type")))); + } } } List primaryKeyNodes = (List) elementMap.get("primary_keys"); @@ -149,6 +151,8 @@ public static DataType toDataType(Object type) { return DataType.DOUBLE; case "DT_STRING": return DataType.STRING; + case "DT_DATE32": + return DataType.DATE; default: throw new UnsupportedOperationException( "unsupported primitive type: " + value);