diff --git a/.github/workflows/flex.yml b/.github/workflows/flex.yml index 77267b8e6463..d454d2713416 100644 --- a/.github/workflows/flex.yml +++ b/.github/workflows/flex.yml @@ -54,8 +54,8 @@ jobs: cd flex/engines/graph_db/grin mkdir build && cd build cmake .. && sudo make -j$(nproc) - export FLEX_DATA_DIR=../../../../storages/rt_mutable_graph/modern_graph/ - ./run_grin_test + export FLEX_DATA_DIR=../../../../interactive/examples/modern_graph/ + ./run_grin_test 'flex://schema_file=../../../../interactive/examples/modern_graph/modern_graph.yaml&bulk_load_file=../../../../interactive/examples/modern_graph/bulk_load.yaml' - name: Test Graph Loading on modern graph env: diff --git a/.github/workflows/hqps-db-ci.yml b/.github/workflows/hqps-db-ci.yml index 56f2a0a5fb92..87578a0d5200 100644 --- a/.github/workflows/hqps-db-ci.yml +++ b/.github/workflows/hqps-db-ci.yml @@ -81,12 +81,16 @@ jobs: cd ${GIE_HOME}/compiler make build - - name: Prepare dataset + - name: Prepare dataset and workspace env: GS_TEST_DIR: ${{ github.workspace }}/gstest + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | # download dataset - git clone -b master --single-branch --depth=1 https://github.com/GraphScope/gstest.git ${GS_TEST_DIR} + git clone -b master --single-branch --depth=1 https://github.com/zhanglei1949/gstest.git ${GS_TEST_DIR} + mkdir -p ${INTERACTIVE_WORKSPACE}/data/ldbc + GRAPH_SCHEMA_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml + cp ${GRAPH_SCHEMA_YAML} ${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml - name: Sample Query test env: @@ -102,25 +106,16 @@ jobs: env: GS_TEST_DIR: ${{ github.workspace }}/gstest HOME : /home/graphscope/ + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | GIE_HOME=${GITHUB_WORKSPACE}/interactive_engine - - # create tmp ir.compiler.properties - touch /tmp/ir.compiler.properties - echo "engine.type: hiactor" >> /tmp/ir.compiler.properties - echo "graph.schema: ${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" >> /tmp/ir.compiler.properties - echo "graph.store: exp" >> /tmp/ir.compiler.properties - echo "graph.planner.is.on: true" >> /tmp/ir.compiler.properties - echo "graph.planner.opt: RBO" >> /tmp/ir.compiler.properties - echo "graph.planner.rules: FilterMatchRule,NotMatchToAntiJoinRule" >> /tmp/ir.compiler.properties - cd ${GITHUB_WORKSPACE}/flex/bin for i in 1 2 3 4 5 6 7 8 9 10 11 12; do cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/ic${i}_adhoc.cypher -w=/tmp/codgen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=/tmp/ir.compiler.properties " - cmd=${cmd}" --graph_schema_path=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" cmd=${cmd}" --gie_home=${GIE_HOME}" echo $cmd eval ${cmd} @@ -129,8 +124,8 @@ jobs: for i in 1 2 3 4 5 6 7 8 9; do cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/simple_match_${i}.cypher -w=/tmp/codgen/" - cmd=${cmd}" -o=/tmp/plugin --ir_conf=/tmp/ir.compiler.properties " - cmd=${cmd}" --graph_schema_path=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" + cmd=${cmd}" -o=/tmp/plugin --ir_conf=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml " + cmd=${cmd}" --graph_schema_path=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml" cmd=${cmd}" --gie_home=${GIE_HOME}" echo $cmd eval ${cmd} @@ -140,9 +135,10 @@ jobs: env: GS_TEST_DIR: ${{ github.workspace }}/gstest HOME : /home/graphscope/ + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | cd ${GITHUB_WORKSPACE}/flex/tests/hqps/ export FLEX_DATA_DIR=${GS_TEST_DIR}/flex/ldbc-sf01-long-date export ENGINE_TYPE=hiactor - bash hqps_cypher_test.sh ${GS_TEST_DIR} + bash hqps_cypher_test.sh ${GS_TEST_DIR} ${INTERACTIVE_WORKSPACE} diff --git a/.gitignore b/.gitignore index 50f7e42c339a..477bf177b5ef 100644 --- a/.gitignore +++ b/.gitignore @@ -90,8 +90,8 @@ core.* # Flex related flex/docs/ -flex/interactive/data/*/indices/ -flex/interactive/data/*/plugins/ +flex/interactive/data/* flex/interactive/logs/* flex/interactive/examples/sf0.1-raw/ -flex/interactive/.running \ No newline at end of file +flex/interactive/.running +flex/interactive/.env \ No newline at end of file diff --git a/flex/bin/load_plan_and_gen.sh b/flex/bin/load_plan_and_gen.sh index 354112fe9499..21626f302538 100755 --- a/flex/bin/load_plan_and_gen.sh +++ b/flex/bin/load_plan_and_gen.sh @@ -68,9 +68,11 @@ fi #fi cypher_to_plan() { - if [ $# -ne 7 ]; then - echo "Usage: $0 " - echo " , but receive: "$# + if [ $# -ne 9 ]; then + echo "Usage: cypher_to_plan " + echo " " + echo " " + echo " but receive: "$# exit 1 fi query_name=$1 @@ -81,6 +83,10 @@ cypher_to_plan() { graph_schema_path=$6 GIE_HOME=$7 + # get procedure_name and procedure_description + procedure_name=$8 + procedure_description=$9 + # find java executable echo "IR compiler properties = ${ir_compiler_properties}" #check file exists @@ -122,8 +128,8 @@ cypher_to_plan() { exit 1 fi # add extrac_key_value_config - extra_config="name:${query_name}" - extra_config="${extra_config},description:Autogenerated stored procedure configuration yaml for ${query_name}" + extra_config="name:${procedure_name}" + extra_config="${extra_config},description:${procedure_description}" cmd="java -cp ${GIE_HOME}/compiler/target/libs/*:${compiler_jar}" cmd="${cmd} -Dgraph.schema=${graph_schema_path}" @@ -150,8 +156,10 @@ cypher_to_plan() { compile_hqps_so() { #check input params size eq 2 or 3 - if [ $# -ne 5 ] && [ $# -ne 6 ]; then - echo "Usage: $0 [output_dir]" + if [ $# -gt 8 ] || [ $# -lt 5 ]; then + echo "Usage: $0 " + echo " " + echo " [output_dir] [stored_procedure_name] [stored_procedure_description]" exit 1 fi input_path=$1 @@ -159,17 +167,32 @@ compile_hqps_so() { ir_compiler_properties=$3 graph_schema_path=$4 gie_home=$5 - if [ $# -eq 6 ]; then + if [ $# -ge 6 ]; then output_dir=$6 else output_dir=${work_dir} fi + + if [ $# -ge 7 ]; then + procedure_name=$7 + else + procedure_name="" + fi + + if [ $# -ge 8 ]; then + procedure_description=$8 + else + procedure_description="" + fi + echo "Input path = ${input_path}" echo "Work dir = ${work_dir}" echo "ir compiler properties = ${ir_compiler_properties}" echo "graph schema path = ${graph_schema_path}" echo "GIE_HOME = ${gie_home}" echo "Output dir = ${output_dir}" + echo "Procedure name = ${procedure_name}" + echo "Procedure description = ${procedure_description}" last_file_name=$(basename ${input_path}) @@ -188,15 +211,24 @@ compile_hqps_so() { echo "Expect a .pb or .cc file" exit 1 fi + # if procedure_name is not set, use query_name + if [ -z "${procedure_name}" ]; then + procedure_name=${query_name} + fi + # if procedure_description is not set, use query_name + if [ -z "${procedure_description}" ]; then + procedure_description="Stored procedure for ${procedure_name}" + fi cur_dir=${work_dir} mkdir -p ${cur_dir} - output_cc_path="${cur_dir}/${query_name}.cc" + output_cc_path="${cur_dir}/${procedure_name}.cc" + dst_yaml_path="${output_dir}/${procedure_name}.yaml" if [[ $(uname) == "Linux" ]]; then - output_so_path="${cur_dir}/lib${query_name}.so" - dst_so_path="${output_dir}/lib${query_name}.so" + output_so_path="${cur_dir}/lib${procedure_name}.so" + dst_so_path="${output_dir}/lib${procedure_name}.so" elif [[ $(uname) == "Darwin" ]]; then - output_so_path="${cur_dir}/lib${query_name}.dylib" - dst_so_path="${output_dir}/lib${query_name}.dylib" + output_so_path="${cur_dir}/lib${procedure_name}.dylib" + dst_so_path="${output_dir}/lib${procedure_name}.dylib" else echo "Not support OS." exit 1 @@ -209,11 +241,14 @@ compile_hqps_so() { eval ${cmd} echo "----------------------------" elif [[ $last_file_name == *.cypher ]]; then - echo "Generating code from cypher query" + echo "Generating code from cypher query, procedure name: ${procedure_name}, description: ${procedure_description}" # first do .cypher to .pb - output_pb_path="${cur_dir}/${query_name}.pb" - output_yaml_path="${cur_dir}/${query_name}.yaml" - cypher_to_plan ${query_name} ${input_path} ${output_pb_path} ${output_yaml_path} ${ir_compiler_properties} ${graph_schema_path} ${gie_home} + output_pb_path="${cur_dir}/${procedure_name}.pb" + output_yaml_path="${cur_dir}/${procedure_name}.yaml" + cypher_to_plan ${procedure_name} ${input_path} ${output_pb_path} \ + ${output_yaml_path} ${ir_compiler_properties} ${graph_schema_path} ${gie_home} \ + ${procedure_name} "${procedure_description}" + echo "----------------------------" echo "Codegen from cypher query done." echo "----------------------------" @@ -235,7 +270,7 @@ compile_hqps_so() { cp ${CMAKE_TEMPLATE_PATH} ${cur_dir}/CMakeLists.txt # run cmake and make in output path. pushd ${cur_dir} - cmd="cmake . -DQUERY_NAME=${query_name} -DFLEX_INCLUDE_PREFIX=${FLEX_INCLUDE} -DFLEX_LIB_DIR=${FLEX_LIB_DIR}" + cmd="cmake . -DQUERY_NAME=${procedure_name} -DFLEX_INCLUDE_PREFIX=${FLEX_INCLUDE} -DFLEX_LIB_DIR=${FLEX_LIB_DIR}" # if CMAKE_CXX_COMPILER is set, use it. if [ ! -z ${CMAKE_CXX_COMPILER} ]; then cmd="${cmd} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" @@ -284,7 +319,7 @@ compile_hqps_so() { fi # check output_dir doesn't contains output_so_name if [ -f ${dst_so_path} ]; then - echo "Output dir ${output_dir} already contains ${query_name}.so" + echo "Output dir ${output_dir} already contains ${procedure_name}.so" echo "Please remove it first." exit 1 fi @@ -294,6 +329,12 @@ compile_hqps_so() { echo "Copy failed, ${dst_so_path} not exists." exit 1 fi + # copy the generated yaml + cp ${output_yaml_path} ${output_dir} + if [ ! -f ${dst_yaml_path} ]; then + echo "Copy failed, ${dst_yaml_path} not exists." + exit 1 + fi echo "Finish copying, output to ${dst_so_path}" } @@ -461,6 +502,14 @@ run() { OUTPUT_DIR="${i#*=}" shift # past argument=value ;; + --procedure_name=*) + PROCEDURE_NAME="${i#*=}" + shift # past argument=value + ;; + --procedure_desc=*) + PROCEDURE_DESCRIPTION="${i#*=}" + shift # past argument=value + ;; -* | --*) echo "Unknown option $i" exit 1 @@ -477,6 +526,8 @@ run() { echo "graph_schema_path ="${GRAPH_SCHEMA_PATH} echo "GIE_HOME ="${GIE_HOME} echo "Output path ="${OUTPUT_DIR} + echo "Procedure name ="${PROCEDURE_NAME} + echo "Procedure description ="${PROCEDURE_DESCRIPTION} # check input exist if [ ! -f ${INPUT} ]; then @@ -484,10 +535,25 @@ run() { exit 1 fi + if [ -z "${OUTPUT_DIR}" ]; then + OUTPUT_DIR=${WORK_DIR} + fi + # if engine_type equals hqps if [ ${ENGINE_TYPE} == "hqps" ]; then echo "Engine type is hqps, generating dynamic library for hqps engine." - compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${GIE_HOME} ${OUTPUT_DIR} + # if PROCEDURE_DESCRIPTION is not set, use empty string + if [ -z ${PROCEDURE_DESCRIPTION} ]; then + PROCEDURE_DESCRIPTION="Automatic generated description for stored procedure ${PROCEDURE_NAME}." + fi + # if PROCEDURE_NAME is not set, use input file name + if [ -z "${PROCEDURE_NAME}" ]; then + #remove the suffix of input file, the suffix is .cc or .cypher + PROCEDURE_NAME=$(basename ${INPUT}) + PROCEDURE_NAME="${PROCEDURE_NAME%.cc}" + PROCEDURE_NAME="${PROCEDURE_NAME%.pb}" + fi + compile_hqps_so ${INPUT} ${WORK_DIR} ${IR_CONF} ${GRAPH_SCHEMA_PATH} ${GIE_HOME} ${OUTPUT_DIR} ${PROCEDURE_NAME} "${PROCEDURE_DESCRIPTION}" # else if engine_type equals pegasus elif [ ${ENGINE_TYPE} == "pegasus" ]; then diff --git a/flex/bin/sync_server.cc b/flex/bin/sync_server.cc index 67b3d5c59f3d..616a6ca5e0e5 100644 --- a/flex/bin/sync_server.cc +++ b/flex/bin/sync_server.cc @@ -120,11 +120,11 @@ std::tuple parse_from_server_config( << engine_type_str; } } - auto shard_num_node = engine_node["shared_num"]; + auto shard_num_node = engine_node["shard_num"]; if (shard_num_node) { shard_num = shard_num_node.as(); } else { - LOG(INFO) << "shared_num not found, use default value " + LOG(INFO) << "shard_num not found, use default value " << DEFAULT_SHARD_NUM; } auto host_node = engine_node["hosts"]; @@ -151,23 +151,9 @@ std::tuple parse_from_server_config( } } -void load_plugins(const bpo::variables_map& vm) { - if (vm.count("plugin-dir") == 0) { - LOG(INFO) << "plugin-dir is not specified"; - return; - } - std::string plugin_dir = vm["plugin-dir"].as(); - if (!std::filesystem::exists(plugin_dir)) { - LOG(FATAL) << "plugin dir not exists: " << plugin_dir; - } - LOG(INFO) << "plugin dir: " << plugin_dir; - if (!plugin_dir.empty()) { - LOG(INFO) << "Load plugins from dir: " << plugin_dir; - server::StoredProcedureManager::get().LoadFromPluginDir(plugin_dir); - } -} - -void init_codegen_proxy(const bpo::variables_map& vm) { +void init_codegen_proxy(const bpo::variables_map& vm, + const std::string& graph_schema_file, + const std::string& engine_config_file) { std::string codegen_dir = parse_codegen_dir(vm); std::string codegen_bin; std::string gie_home; @@ -181,25 +167,6 @@ void init_codegen_proxy(const bpo::variables_map& vm) { LOG(FATAL) << "codegen bin not exists: " << codegen_bin; } } - std::string ir_compiler_properties; - std::string compiler_graph_schema; - if (vm.count("ir-compiler-prop") == 0) { - LOG(FATAL) << "ir-compiler-prop is not specified"; - } else { - ir_compiler_properties = vm["ir-compiler-prop"].as(); - if (!std::filesystem::exists(ir_compiler_properties)) { - LOG(FATAL) << "ir-compiler-prop not exists: " << ir_compiler_properties; - } - } - if (vm.count("compiler-graph-schema") == 0) { - LOG(FATAL) << "compiler-graph-schema is not specified"; - } else { - compiler_graph_schema = vm["compiler-graph-schema"].as(); - if (!std::filesystem::exists(compiler_graph_schema)) { - LOG(FATAL) << "compiler-graph-schema not exists: " - << compiler_graph_schema; - } - } if (vm.count("gie-home") == 0) { LOG(FATAL) << "gie-home is not specified"; } else { @@ -208,9 +175,8 @@ void init_codegen_proxy(const bpo::variables_map& vm) { LOG(FATAL) << "gie-home not exists: " << gie_home; } } - server::CodegenProxy::get().Init(codegen_dir, codegen_bin, - ir_compiler_properties, - compiler_graph_schema, gie_home); + server::CodegenProxy::get().Init(codegen_dir, codegen_bin, engine_config_file, + graph_schema_file, gie_home); } } // namespace gs @@ -226,12 +192,7 @@ int main(int argc, char** argv) { "graph-config,g", bpo::value(), "graph schema config file")( "data-path,a", bpo::value(), "data directory path")( "bulk-load,l", bpo::value(), "bulk-load config file")( - "plugin-dir,p", bpo::value(), "plugin directory path")( - "gie-home,h", bpo::value(), "path to gie home")( - "ir-compiler-prop,i", bpo::value(), - "ir compiler property file")("compiler-graph-schema,z", - bpo::value(), - "compiler graph schema file"); + "gie-home,h", bpo::value(), "path to gie home"); setenv("TZ", "Asia/Shanghai", 1); tzset(); @@ -251,9 +212,10 @@ int main(int argc, char** argv) { std::string data_path; std::string bulk_load_config_path; std::string plugin_dir; + std::string server_config_path; if (vm.count("server-config") != 0) { - std::string server_config_path = vm["server-config"].as(); + server_config_path = vm["server-config"].as(); // check file exists if (!std::filesystem::exists(server_config_path)) { LOG(ERROR) << "server-config not exists: " << server_config_path; @@ -294,8 +256,12 @@ int main(int argc, char** argv) { LOG(INFO) << "Finished loading graph, elapsed " << t0 << " s"; // loading plugin - gs::load_plugins(vm); - gs::init_codegen_proxy(vm); + if (!schema.GetPluginDir().empty() && !schema.GetPluginsList().empty()) { + server::StoredProcedureManager::get().LoadFromPluginDir( + schema.GetPluginDir(), schema.GetPluginsList()); + } + + gs::init_codegen_proxy(vm, graph_schema_path, server_config_path); server::HQPSService::get().init(shard_num, http_port, false); server::HQPSService::get().run_and_wait_for_exit(); diff --git a/flex/codegen/src/hqps/hqps_scan_builder.h b/flex/codegen/src/hqps/hqps_scan_builder.h index 22264acfc0f4..c8144e4554ee 100644 --- a/flex/codegen/src/hqps/hqps_scan_builder.h +++ b/flex/codegen/src/hqps/hqps_scan_builder.h @@ -89,6 +89,53 @@ class ScanOpBuilder { return *this; } + ScanOpBuilder& idx_predicate(const algebra::IndexPredicate& predicate) { + // check query_params not has predicate. + if (query_params_.has_predicate()) { + VLOG(10) << "query params already has predicate"; + return *this; + } + // Currently we only support one predicate. + if (predicate.or_predicates_size() < 1) { + VLOG(10) << "No predicate in index predicate"; + return *this; + } + if (predicate.or_predicates_size() != 1) { + throw std::runtime_error( + std::string("Currently only support one predicate")); + } + auto or_predicate = predicate.or_predicates(0); + if (or_predicate.predicates_size() != 1) { + throw std::runtime_error( + std::string("Currently only support one and predicate")); + } + auto triplet = or_predicate.predicates(0); + // add index predicate to query params + auto* new_predicate = query_params_.mutable_predicate(); + { + auto first_op = new_predicate->add_operators(); + common::Variable variable; + auto& property = triplet.key(); + *(variable.mutable_property()) = property; + variable.mutable_node_type()->set_data_type(common::DataType::INT64); + *(first_op->mutable_var()) = variable; + } + { + auto second = new_predicate->add_operators(); + second->set_logical(common::Logical::EQ); + second->mutable_node_type()->set_data_type(common::DataType::BOOLEAN); + } + { + auto third = new_predicate->add_operators(); + auto& value = triplet.value(); + third->mutable_node_type()->set_data_type(common::DataType::INT64); + *(third->mutable_const_()) = value; + } + VLOG(10) << "Add index predicate to query params: " + << query_params_.DebugString(); + return *this; + } + std::string Build() const { std::string label_name; std::vector labels_ids; @@ -275,7 +322,9 @@ static std::string BuildScanOp( } else { builder.resAlias(-1); } - return builder.queryParams(scan_pb.params()).Build(); + return builder.queryParams(scan_pb.params()) + .idx_predicate(scan_pb.idx_predicate()) + .Build(); } } // namespace gs diff --git a/flex/engines/graph_db/grin/src/topology/structure.cc b/flex/engines/graph_db/grin/src/topology/structure.cc index 5698fdc5291a..809b9f932d8a 100644 --- a/flex/engines/graph_db/grin/src/topology/structure.cc +++ b/flex/engines/graph_db/grin/src/topology/structure.cc @@ -35,9 +35,33 @@ GRIN_GRAPH grin_get_graph_from_storage(const char* uri) { return GRIN_NULL_GRAPH; } _uri = _uri.substr(pos + 3); - std::string graph_schema_path = _uri + "/modern_graph.yaml"; - std::string data_path = uri; - std::string bulk_load_config_path = _uri + "/bulk_load.yaml"; + LOG(INFO) << "Params: " << _uri; + std::string graph_schema_path, bulk_load_config_path; + if (pos != std::string::npos) { + auto params = _uri; + std::vector param_list; + boost::split(param_list, params, boost::is_any_of("&")); + for (auto& param : param_list) { + std::vector kv; + boost::split(kv, param, boost::is_any_of("=")); + if (kv.size() != 2) { + return GRIN_NULL_GRAPH; + } + if (kv[0] == "schema_file") { + graph_schema_path = kv[1]; + } else if (kv[0] == "bulk_load_file") { + bulk_load_config_path = kv[1]; + } + } + } else { + return GRIN_NULL_GRAPH; + } + VLOG(10) << "Schema file: " << graph_schema_path; + VLOG(10) << "Bulk load file: " << bulk_load_config_path; + if (graph_schema_path.empty() || bulk_load_config_path.empty()) { + return GRIN_NULL_GRAPH; + } + // get schema_file from if (!std::filesystem::exists(graph_schema_path) || !(std::filesystem::exists(bulk_load_config_path))) { return GRIN_NULL_GRAPH; diff --git a/flex/engines/graph_db/grin/test/test.c b/flex/engines/graph_db/grin/test/test.c index 4612d041885b..d645945e99a1 100644 --- a/flex/engines/graph_db/grin/test/test.c +++ b/flex/engines/graph_db/grin/test/test.c @@ -1050,11 +1050,17 @@ void test_vertex_property_value(const char* uri_str) { } void test_perf(const char* uri_str) { test_vertex_property_value(uri_str); } - +// uri_str = +//"flex://" +// "../../../../storages/rt_mutable_graph/modern_graph/?schema_file={schema_file}&bulk_load_file={bulk_load_file}"; int main(int argc, char** argv) { - const char* uri_str = - "flex://" - "../../../../storages/rt_mutable_graph/modern_graph/"; + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + const char* uri_str = argv[1]; + //print uri + printf("uri: %s\n", uri_str); test_index(uri_str); test_property(uri_str); diff --git a/flex/engines/hqps_db/core/operator/edge_expand.h b/flex/engines/hqps_db/core/operator/edge_expand.h index 4d365ed38aa9..cfa6cf081b0d 100644 --- a/flex/engines/hqps_db/core/operator/edge_expand.h +++ b/flex/engines/hqps_db/core/operator/edge_expand.h @@ -731,20 +731,25 @@ class EdgeExpand { // Expand from multi label vertices and though multi edge labels. // result in general edge set. auto src_label = cur_vertex_set.GetLabel(); - LOG(INFO) << "[EdgeExpandEMultiTriplet] real labels: " - << gs::to_string(edge_labels); + LOG(INFO) << "[EdgeExpandEMultiTriplet] real labels: "; + for (auto i = 0; i < edge_labels.size(); ++i) { + LOG(INFO) << std::to_string(edge_labels[i][0]) << " " + << std::to_string(edge_labels[i][1]) << " " + << std::to_string(edge_labels[i][2]); + } // for each triplet, returns a vector of edge iters. auto& vertices = cur_vertex_set.GetVertices(); using sub_graph_t = typename GRAPH_INTERFACE::sub_graph_t; using edge_iter_t = typename sub_graph_t::iterator; std::vector sub_graphs; + auto prop_names_vec = prop_names_to_vec(prop_names); for (auto i = 0; i < edge_labels.size(); ++i) { // Check whether the edge triplet match input vertices. // return a hanlder to get edges - auto sub_graph_vec = - graph.GetSubGraph(edge_labels[i][0], edge_labels[i][1], - edge_labels[i][2], gs::to_string(direction)); + auto sub_graph_vec = graph.GetSubGraph( + edge_labels[i][0], edge_labels[i][1], edge_labels[i][2], + gs::to_string(direction), prop_names_vec[i]); for (auto sub_graph : sub_graph_vec) { sub_graphs.emplace_back(sub_graph); } @@ -811,7 +816,8 @@ class EdgeExpand { } auto set = UnTypedEdgeSet( - vertices, label_indices, label_vec, std::move(label_to_subgraphs)); + vertices, label_indices, label_vec, std::move(label_to_subgraphs), + direction); return std::make_pair(std::move(set), std::move(offsets)); } @@ -854,12 +860,13 @@ class EdgeExpand { using sub_graph_t = typename GRAPH_INTERFACE::sub_graph_t; using edge_iter_t = typename sub_graph_t::iterator; std::vector sub_graphs; + auto prop_names_vec = prop_names_to_vec(prop_names); for (auto i = 0; i < edge_labels.size(); ++i) { // Check whether the edge triplet match input vertices. // return a hanlder to get edges - auto sub_graph_vec = - graph.GetSubGraph(edge_labels[i][0], edge_labels[i][1], - edge_labels[i][2], gs::to_string(direction)); + auto sub_graph_vec = graph.GetSubGraph( + edge_labels[i][0], edge_labels[i][1], edge_labels[i][2], + gs::to_string(direction), prop_names_vec[i]); for (auto sub_graph : sub_graph_vec) { sub_graphs.emplace_back(sub_graph); } @@ -936,7 +943,8 @@ class EdgeExpand { } auto set = UnTypedEdgeSet( - vertices, label_indices, label_vec, std::move(label_to_subgraphs)); + vertices, label_indices, label_vec, std::move(label_to_subgraphs), + direction); return std::make_pair(std::move(set), std::move(offsets)); } @@ -1490,7 +1498,26 @@ class EdgeExpand { << gs::to_string(edge_label_id) << ", new vertices count: " << tmp_offset.back(); } -}; // namespace gs + + template + static void emplace_prop_names_to_vec( + std::vector>& vec_vec_prop_names, + std::tuple...>& prop_names, + std::index_sequence) { + (vec_vec_prop_names.emplace_back(array_to_vec(std::get(prop_names))), + ...); + } + template + static std::vector> prop_names_to_vec( + std::tuple...>& prop_names) { + std::vector> vec_vec_prop_names; + vec_vec_prop_names.reserve(sizeof...(PropTuple)); + emplace_prop_names_to_vec( + vec_vec_prop_names, prop_names, + std::make_index_sequence()); + return vec_vec_prop_names; + } +}; } // namespace gs diff --git a/flex/engines/hqps_db/core/operator/limit.h b/flex/engines/hqps_db/core/operator/limit.h index 1ffd3f4fac32..06dc7947d59e 100644 --- a/flex/engines/hqps_db/core/operator/limit.h +++ b/flex/engines/hqps_db/core/operator/limit.h @@ -35,6 +35,7 @@ class LimitOp { size_t cur_ind = 0; std::vector new_offsets; new_offsets.emplace_back(0); + upper_bound = std::min((size_t) upper_bound, cur_.Size()); for (auto iter : ctx) { if (cur_ind >= lower_bound && cur_ind < upper_bound) { cur_offset += 1; diff --git a/flex/engines/hqps_db/core/utils/keyed.h b/flex/engines/hqps_db/core/utils/keyed.h index 4b4602088e20..7827f45dc8d0 100644 --- a/flex/engines/hqps_db/core/utils/keyed.h +++ b/flex/engines/hqps_db/core/utils/keyed.h @@ -383,6 +383,21 @@ struct KeyedAggT, } }; +template +struct KeyedAggT, AggFunc::COUNT, + std::tuple, + std::integer_sequence> { + using agg_res_t = Collection; + using aggregate_res_builder_t = CountBuilder; + + static aggregate_res_builder_t create_agg_builder( + const FlatEdgeSet& set, const GI& graph, + std::tuple>& selectors) { + return CountBuilder(); + } +}; + template static inline auto insert_into_builder_v2_impl( diff --git a/flex/engines/hqps_db/database/adj_list.h b/flex/engines/hqps_db/database/adj_list.h index 2f28afcdde78..fb8f8e996a01 100644 --- a/flex/engines/hqps_db/database/adj_list.h +++ b/flex/engines/hqps_db/database/adj_list.h @@ -49,9 +49,14 @@ class EdgeIter { inline label_id_t GetSrcLabel() const { return label_triplet_[0]; } inline Any GetData() const { return ptr1_->get_data(); } - inline bool IsValid() const { return ptr1_->is_valid(); } + inline bool IsValid() const { return ptr1_ && ptr1_->is_valid(); } - size_t Size() const { return ptr1_->size(); } + size_t Size() const { + if (ptr1_) { + return ptr1_->size(); + } + return 0; + } private: std::shared_ptr ptr1_; @@ -66,21 +71,28 @@ class SubGraph { using iterator = EdgeIter; using label_id_t = LabelT; SubGraph(const MutableCsrBase* first, - const std::array& label_triplet) - : first_(first), label_triplet_(label_triplet) {} + const std::array& label_triplet, + const std::vector& prop_names) + : first_(first), label_triplet_(label_triplet), prop_names_(prop_names) {} inline iterator get_edges(VID_T vid) const { - return iterator(label_triplet_, first_->edge_iter(vid)); + if (first_) { + return iterator(label_triplet_, first_->edge_iter(vid)); + } + return iterator(label_triplet_, nullptr); } label_id_t GetSrcLabel() const { return label_triplet_[0]; } label_id_t GetEdgeLabel() const { return label_triplet_[2]; } label_id_t GetDstLabel() const { return label_triplet_[1]; } + const std::vector& GetPropNames() const { return prop_names_; } + private: const MutableCsrBase* first_; // We assume first is out edge, second is in edge. std::array label_triplet_; + std::vector prop_names_; }; template diff --git a/flex/engines/hqps_db/database/mutable_csr_interface.h b/flex/engines/hqps_db/database/mutable_csr_interface.h index 5531246ddbf2..0f1b9f3c89fa 100644 --- a/flex/engines/hqps_db/database/mutable_csr_interface.h +++ b/flex/engines/hqps_db/database/mutable_csr_interface.h @@ -498,21 +498,21 @@ class MutableCSRInterface { // get edges with input vids. return a edge list. std::vector> GetSubGraph(const label_id_t src_label_id, const label_id_t dst_label_id, - const label_id_t edge_label_id, - const std::string& direction_str) const { + const label_id_t edge_label_id, const std::string& direction_str, + const std::vector& prop_names) const { const MutableCsrBase *csr = nullptr, *other_csr = nullptr; if (direction_str == "out" || direction_str == "Out" || direction_str == "OUT") { csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, edge_label_id); - return std::vector{ - sub_graph_t{csr, {src_label_id, dst_label_id, edge_label_id}}}; + return std::vector{sub_graph_t{ + csr, {src_label_id, dst_label_id, edge_label_id}, prop_names}}; } else if (direction_str == "in" || direction_str == "In" || direction_str == "IN") { csr = db_session_.graph().get_ie_csr(src_label_id, dst_label_id, edge_label_id); - return std::vector{ - sub_graph_t{csr, {src_label_id, dst_label_id, edge_label_id}}}; + return std::vector{sub_graph_t{ + csr, {src_label_id, dst_label_id, edge_label_id}, prop_names}}; } else if (direction_str == "both" || direction_str == "Both" || direction_str == "BOTH") { csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, @@ -520,8 +520,11 @@ class MutableCSRInterface { other_csr = db_session_.graph().get_ie_csr(src_label_id, dst_label_id, edge_label_id); return std::vector{ - sub_graph_t{csr, {src_label_id, dst_label_id, edge_label_id}}, - sub_graph_t{other_csr, {dst_label_id, src_label_id, edge_label_id}}}; + sub_graph_t{ + csr, {src_label_id, dst_label_id, edge_label_id}, prop_names}, + sub_graph_t{other_csr, + {dst_label_id, src_label_id, edge_label_id}, + prop_names}}; } else { throw std::runtime_error("Not implemented - " + direction_str); } diff --git a/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h index 64ff2f1501c3..fcdc9d1a42a8 100644 --- a/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h +++ b/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h @@ -171,12 +171,14 @@ class UnTypedEdgeSet { const std::vector& src_v, const std::vector& label_indices, const std::vector& labels, - std::unordered_map>&& adj_lists) + std::unordered_map>&& adj_lists, + const Direction& direction) : src_vertices_(src_v), label_indices_(label_indices), src_labels_(labels), adj_lists_(std::move(adj_lists)), - size_(0) { + size_(0), + direction_(direction) { sanity_check(); } @@ -368,6 +370,67 @@ class UnTypedEdgeSet { LOG(FATAL) << "not implemented, and should not be called"; } + template ::type* = nullptr> + auto ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + using dst_ele_tuple_t = std::tuple; + CHECK(repeat_array.size() == Size()); + size_t real_size = 0; + for (auto v : repeat_array) { + real_size += v; + } + std::vector dst_eles; + dst_eles.reserve(real_size); + auto edge_label_triplets = get_edge_triplets(); + auto edge_iters = generate_iters(); + std::vector label_triplet_indices; + label_triplet_indices.reserve(real_size); + std::vector sizes; + sizes.emplace_back(0); + for (auto i = 0; i < edge_label_triplets.size(); ++i) { + sizes.emplace_back(sizes.back() + edge_label_triplets[i].size()); + } + + // 0,2,4 + size_t cur_ind = 0; + for (auto i = 0; i < src_vertices_.size(); ++i) { + auto src_vid = src_vertices_[i]; + auto& cur_edge_iters = edge_iters[i]; + auto src_label_ind = label_indices_[i]; + auto src_label = src_labels_[src_label_ind]; + auto cur_triplets_vec = edge_label_triplets[src_label_ind]; + CHECK(cur_triplets_vec.size() == cur_edge_iters.size()); + + for (auto j = 0; j < cur_edge_iters.size(); ++j) { + auto& cur_iter = cur_edge_iters[j]; + while (cur_iter.IsValid()) { + auto dst_vid = cur_iter.GetDstId(); + auto data = cur_iter.GetData(); + for (auto k = 0; k < repeat_array[cur_ind]; ++k) { + dst_eles.emplace_back(std::make_tuple(src_vid, dst_vid, data)); + label_triplet_indices.emplace_back(sizes[src_label_ind] + j); + } + cur_iter.Next(); + cur_ind += 1; + } + } + } + std::vector> res_label_triplets; + // put edge_label_triplets into res_label_triplets + for (auto i = 0; i < edge_label_triplets.size(); ++i) { + auto& cur_triplets_vec = edge_label_triplets[i]; + for (auto j = 0; j < cur_triplets_vec.size(); ++j) { + res_label_triplets.emplace_back(cur_triplets_vec[j]); + } + } + std::vector> prop_names = get_prop_namees(); + CHECK(prop_names.size() == res_label_triplets.size()); + return FlatEdgeSet( + std::move(dst_eles), std::move(res_label_triplets), prop_names, + std::move(label_triplet_indices), direction_); + } + private: std::pair, std::unordered_map> preprocess_getting_labels(const std::vector& req_labels, @@ -420,6 +483,35 @@ class UnTypedEdgeSet { << " vertices, with " << edge_iter_vecs.size() << " iters"; return edge_iter_vecs; } + + std::vector>> get_edge_triplets() const { + std::vector>> ret; + for (auto iter : adj_lists_) { + auto& sub_graphs = iter.second; + std::vector> tmp; + for (auto i = 0; i < sub_graphs.size(); ++i) { + auto& sub_graph = sub_graphs[i]; + tmp.emplace_back(std::array({sub_graph.GetSrcLabel(), + sub_graph.GetDstLabel(), + sub_graph.GetEdgeLabel()})); + } + ret.emplace_back(std::move(tmp)); + } + return ret; + } + + std::vector> get_prop_namees() const { + std::vector> ret; + for (auto iter : adj_lists_) { + auto& sub_graphs = iter.second; + for (auto i = 0; i < sub_graphs.size(); ++i) { + auto& sub_graph = sub_graphs[i]; + ret.push_back(sub_graph.GetPropNames()); + } + } + return ret; + } + void sanity_check() { CHECK(src_vertices_.size() == label_indices_.size()); for (auto v : label_indices_) { @@ -437,6 +529,7 @@ class UnTypedEdgeSet { std::unordered_map> adj_lists_; // match src_label to all triplet. mutable size_t size_; // computed lazily + Direction direction_; }; } // namespace gs diff --git a/flex/engines/http_server/stored_procedure.cc b/flex/engines/http_server/stored_procedure.cc index 8f8d37747376..a4601d5d55f0 100644 --- a/flex/engines/http_server/stored_procedure.cc +++ b/flex/engines/http_server/stored_procedure.cc @@ -83,22 +83,10 @@ void close_lib(void* handle, const char* lib_path) { } } -std::vector get_yaml_files(const std::string& plugin_dir) { - std::filesystem::path dir_path = plugin_dir; - std::string suffix = ".yaml"; - std::vector res_yaml_files; - - for (auto& entry : std::filesystem::directory_iterator(dir_path)) { - if (entry.is_regular_file() && entry.path().extension() == suffix) { - res_yaml_files.emplace_back(entry.path()); - } - } - return res_yaml_files; -} - std::vector parse_from_multiple_yamls( const std::string& plugin_dir, - const std::vector& stored_procedure_yamls) { + const std::vector& stored_procedure_yamls, + const std::vector& valid_procedure_names) { std::vector stored_procedures; for (auto cur_yaml : stored_procedure_yamls) { LOG(INFO) << "Loading for: " << cur_yaml; @@ -109,17 +97,21 @@ std::vector parse_from_multiple_yamls( LOG(ERROR) << "Expect path in pre_installed procedure"; } else { std::string name = root["name"].as(); - std::string path = root["library"].as(); - if (!std::filesystem::exists(path)) { - // in case the path is relative to plugin_dir, prepend plugin_dir - path = plugin_dir + "/" + path; + if (find(valid_procedure_names.begin(), valid_procedure_names.end(), + name) != valid_procedure_names.end()) { + VLOG(10) << "Find valid procedure: " << name; + std::string path = root["library"].as(); if (!std::filesystem::exists(path)) { - LOG(ERROR) << "plugin - " << path << " file not found..."; + // in case the path is relative to plugin_dir, prepend plugin_dir + path = plugin_dir + "/" + path; + if (!std::filesystem::exists(path)) { + LOG(ERROR) << "plugin - " << path << " file not found..."; + } else { + stored_procedures.push_back({name, path}); + } } else { stored_procedures.push_back({name, path}); } - } else { - stored_procedures.push_back({name, path}); } } } @@ -151,7 +143,7 @@ std::vector parse_stored_procedures( } } } else { - LOG(WARNING) << "Expect ntry : " << stored_procedure_yaml; + LOG(WARNING) << "Expect entry : " << stored_procedure_yaml; } return stored_procedures; } diff --git a/flex/engines/http_server/stored_procedure.h b/flex/engines/http_server/stored_procedure.h index 3f6fcf85fc0d..7aad2cce6eed 100644 --- a/flex/engines/http_server/stored_procedure.h +++ b/flex/engines/http_server/stored_procedure.h @@ -34,6 +34,7 @@ #include "flex/engines/hqps_db/app/hqps_app_base.h" #include "flex/engines/hqps_db/database/mutable_csr_interface.h" #include "flex/utils/app_utils.h" +#include "flex/utils/yaml_utils.h" #include @@ -83,7 +84,9 @@ struct StoredProcedureMeta { std::vector parse_stored_procedures( const std::string& stored_procedure_yaml); std::vector parse_from_multiple_yamls( - const std::string& plugin_dir, const std::vector& stored_procedure_yamls); + const std::string& plugin_dir, + const std::vector& stored_procedure_yamls, + const std::vector& valid_procedure_names); enum class StoredProcedureType { kCypher = 0, @@ -142,17 +145,18 @@ class CypherStoredProcedure; std::shared_ptr create_stored_procedure_impl( int32_t procedure_id, const std::string& procedure_path); -std::vector get_yaml_files(const std::string& plugin_dir); - class StoredProcedureManager { public: static StoredProcedureManager& get(); StoredProcedureManager() {} // expect multiple query.yaml under this directory. - void LoadFromPluginDir(const std::string& plugin_dir) { - auto yaml_files = get_yaml_files(plugin_dir); - auto stored_procedures = parse_from_multiple_yamls(plugin_dir, yaml_files); + void LoadFromPluginDir( + const std::string& plugin_dir, + const std::vector& valid_procedure_names) { + auto yaml_files = gs::get_yaml_files(plugin_dir); + auto stored_procedures = parse_from_multiple_yamls(plugin_dir, yaml_files, + valid_procedure_names); CreateStoredProcedures(stored_procedures); } diff --git a/flex/interactive/README.md b/flex/interactive/README.md index fd6ca0e3e239..7713349e6142 100755 --- a/flex/interactive/README.md +++ b/flex/interactive/README.md @@ -3,3 +3,61 @@ GraphScope Interactive is a specialized construction of [GraphScope Flex](https://github.com/alibaba/GraphScope/tree/main/flex), designed to handle concurrent graph queries at an impressive speed. Its primary goal is to process as many queries as possible within a given timeframe, emphasizing a high query throughput rate. For the full documentation of GraphScope Interactive, please refer to [GraphScope-Interactive](https://graphscope.io/docs/interactive_engine/graphscope_interactive). +## Minimal tutorial + +In this minimal tutorial, we will show you how to run graph service on builtin modern graph. + +### Preparation + +Set `location` to `/home/graphscope/default_graph`. + +### init database + +```bash +./bin/gs_interactive init -c ./conf/interactive.yaml +``` + +### Start service + +```bash +./bin/gs_interactive service start +``` + +### Stop service +```bash +./bin/gs_interactive service stop +``` + +### Restart service +```bash +./bin/gs_interactive service restart +``` + +### Get service status +```bash +./bin/gs_interactive service status +``` + +### Compile stored procedure +```bash +./bin/gs_interactive procedure compile -g modern -i ./examples/modern_graph/count_vertex_num.cypher +``` + +### Disable stored procedure +```bash +./bin/gs_interactive procedure disable -g modern -n count_vertex_num +``` + +### Enable stored procedure +```bash +./bin/gs_interactive procedure enable -g modern -n count_vertex_num +``` + +### Use user defined graph +```bash +./bin/gs_interactive service stop +./bin/gs_interactive database remove -g modern +./bin/gs_interactive database create -g test -c ./examples/modern_graph/modern_graph.yaml +./bin/gs_interactive database import -g test -c ./examples/modern_graph/bulk_load.yaml +./bin/gs_interactive service start -g test +``` \ No newline at end of file diff --git a/flex/interactive/bin/db_admin.sh b/flex/interactive/bin/db_admin.sh deleted file mode 100755 index 8f98befcc53d..000000000000 --- a/flex/interactive/bin/db_admin.sh +++ /dev/null @@ -1,654 +0,0 @@ -#!/bin/bash -# Copyright 2020 Alibaba Group Holding Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# The product name -DB_PROD_NAME="interactive" - -# colored error and info functions to wrap messages. -RED='\033[0;31m' -GREEN='\033[0;32m' -NC='\033[0m' # No Color -err() { - echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] -ERROR- $* ${NC}" >&2 -} - -info() { - echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" -} - -################## Some Util Functions ################## -function parse_yaml { - local prefix=$2 - local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034') - sed -ne "s|^\($s\):|\1|" \ - -e "s|^\($s\)\($w\)$s:$s[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p" \ - -e "s|^\($s\)\($w\)$s:$s\(.*\)$s\$|\1$fs\2$fs\3|p" $1 | - awk -F$fs '{ - indent = length($1)/2; - vname[indent] = $2; - for (i in vname) {if (i > indent) {delete vname[i]}} - if (length($3) > 0) { - vn=""; for (i=0; i/dev/null 2>&1 - pwd -P -)" -info "HOST_DB_HOME = ${HOST_DB_HOME}" - -#################### DEFINE CONSTANTS #################### -GRAPHSCOPE_GROUP_ID=1001 - -# the configuration directory -HOST_DB_CONF_DIR="${HOST_DB_HOME}/conf" -# the data directory -HOST_DB_DATA_DIR="${HOST_DB_HOME}/data" -# the log directory -HOST_DB_LOG_DIR="${HOST_DB_HOME}/logs" -HOST_DB_SERVER_OUTPUT_LOG="${HOST_DB_LOG_DIR}/server.log" -HOST_DB_COMPILER_OUTPUT_LOG="${HOST_DB_LOG_DIR}/compiler.log" -HOST_DB_INTERACTIVE_YAML="${HOST_DB_CONF_DIR}/interactive.yaml" -HOST_DB_EXAMPLE_DATASET_DIR=${HOST_DB_HOME}/"examples/sf0.1-raw/" -HOST_DB_RUNNING_FILE="${HOST_DB_HOME}/.running" -# will export DOCKER_DB_HOME, if not set, exist -get_docker_workspace_from_yaml "${HOST_DB_INTERACTIVE_YAML}" - -DOCKER_DB_GRAPHSCOPE_HOME="/home/graphscope/GraphScope" -DOCKER_DB_DATA_DIR="${DOCKER_DB_HOME}/data" -DOCKER_DB_LOG_DIR="${DOCKER_DB_HOME}/logs" -DOCKER_DB_CONF_DIR="${DOCKER_DB_HOME}/conf" -DOCKER_DB_IR_CONF_FILE="${DOCKER_DB_HOME}/conf/interactive.properties" -DOCKER_DB_GIE_HOME="${DOCKER_DB_GRAPHSCOPE_HOME}/interactive_engine/" -DOCKER_DB_INTERACTIVE_YAML="${DOCKER_DB_HOME}/conf/interactive.yaml" -DOCKER_DB_SERVER_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/build/bin/sync_server" -DOCKER_DB_COMPILER_BIN="com.alibaba.graphscope.GraphServer" -DOCKER_DB_GEN_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/bin/load_plan_and_gen.sh" -DOCKER_DB_SERVER_OUTPUT_LOG=${DOCKER_DB_LOG_DIR}/server.log -DOCKER_DB_COMPILER_OUTPUT_LOG=${DOCKER_DB_LOG_DIR}/compiler.log -export DOCKER_DB_CONNECTOR_PORT=7687 -DB_CONNECT_DEFAULT_PORT=7687 -# update the port by parsing the yaml file -DOCKER_DB_CONNECTOR_PORT=$(parse_yaml "${HOST_DB_INTERACTIVE_YAML}" | grep "compiler_endpoint_boltConnector_port" | awk -F "=" '{print $2}') -#remove "" and space -DOCKER_DB_CONNECTOR_PORT=$(echo "${DOCKER_DB_CONNECTOR_PORT}" | sed 's/^"//' | sed 's/"$//') - -EXAMPLE_DATA_SET_URL="https://github.com/GraphScope/gstest.git" - -################### IMAGE VERSION ################### -GIE_DB_IMAGE_VERSION="v0.0.1" -GIE_DB_IMAGE_NAME="registry.cn-hongkong.aliyuncs.com/graphscope/${DB_PROD_NAME}" -GIE_DB_CONTAINER_NAME="${DB_PROD_NAME}-server" - - -#################### DEFINE FUNCTIONS #################### -function check_running_containers_and_exit(){ - # check if there is any running containers - info "Check running containers and exit" - running_containers=$(docker ps -a --format "{{.Names}}" | grep "${GIE_DB_CONTAINER_NAME}") - if [ -n "${running_containers}" ]; then - err "There are running containers: ${running_containers}, please stop them first." - exit 1 - fi - info "finish check" -} - -function check_container_running(){ - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is running" - else - info "container ${GIE_DB_CONTAINER_NAME} is not running" - # start the container - docker start "${GIE_DB_CONTAINER_NAME}" - fi -} - -function ensure_container_running(){ - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is running" - else - info "container ${GIE_DB_CONTAINER_NAME} is not running" - # start the container - docker start "${GIE_DB_CONTAINER_NAME}" - fi -} - -function check_process_running_in_container(){ - local container_name=$1 - local process_name=$2 - local error_msg=$3 - local process_id=$(docker top "${container_name}" | grep "${process_name}" | awk '{print $2}\') - if [ -z "${process_id}" ]; then - err "process ${process_name} is not running in container ${container_name}" - err "${error_msg}" - exit 1 - fi - info "process ${process_name} is running in container ${container_name}, process id is ${process_id}" -} - - -#################### DEFINE USAGE #################### -# parse the args and set the variables. -function usage() { - init_usage - start_usage - stop_usage - restart_usage - compile_usage - show_stored_procedure_usage - download_dataset_usage - destroy_usage -} - -function init_usage() { - cat << EOF - db_admin.sh init -p[---publish] - -v[--volume] - --version - Init the database, create the containers. --publish and --volume can be used multiple times. -EOF -} - -function start_usage() { - cat << EOF - db_admin.sh start -n [--name] -b [--bulk-load] -r[--root-data-dir] - Start the database with the given graph. graph schema file should be placed at ./data/{graph_name}/graph.yaml. - If mode is override, we need to clear the data directory first. -EOF -} - -function stop_usage() { - cat << EOF - db_admin.sh stop - Stop the database with the given graph. -EOF -} - -function restart_usage() { - cat << EOF - db_admin.sh restart - Restart the database with current running graph. -EOF -} - -function compile_usage(){ - cat << EOF - db_admin.sh compile -g[--graph] -i ${DOCKER_DB_COMPILER_OUTPUT_LOG} 2>&1 &" - cmd=${cmd}"\"" - info "Running cmd: ${cmd}" - eval ${cmd} - sleep 6 - check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} "check ${HOST_DB_COMPILER_OUTPUT_LOG} to see more details" - info "Successfuly start compiler" - info "DataBase service is running..., port is open on :${DOCKER_DB_CONNECTOR_PORT}" - - # if do_start success, we should write current args to ${HOST_DB_RUNNING_FILE} - echo "GRAPH_NAME=${GRAPH_NAME}" > ${HOST_DB_RUNNING_FILE} - echo "BULK_LOAD_FILE=${BULK_LOAD_FILE}" >> ${HOST_DB_RUNNING_FILE} - echo "ROOT_DATA_DIR=${root_data_dir}" >> ${HOST_DB_RUNNING_FILE} -# info "Successfuly write running args to ${HOST_DB_RUNNING_FILE}" -} - - -#################### Stop database #################### -function do_stop(){ - # stop the container - docker stop ${GIE_DB_CONTAINER_NAME} - info "Successfuly stop database" -} - - -#################### Get database status #################### -function do_status() { - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is running" - else - info "container ${GIE_DB_CONTAINER_NAME} is not running" - info "Please start database first" - fi - # the container is running but the process is not running - check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_SERVER_BIN} "check ${HOST_DB_SERVER_OUTPUT_LOG} to see more details" - check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} "check ${HOST_DB_COMPILER_OUTPUT_LOG} to see more details" - info "Database service is running..., port is open on :${DOCKER_DB_CONNECTOR_PORT}" -} - - -#################### Download dataset #################### -function do_download_dataset(){ - git clone ${EXAMPLE_DATA_SET_URL} ${HOST_DB_EXAMPLE_DATASET_DIR} - info "Successfuly download dataset to: ${HOST_DB_EXAMPLE_DATASET_DIR}" -} - - -#################### Restart #################### -function do_restart() { - # if the container is not running, exit - if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "false" ]; then - info "container ${GIE_DB_CONTAINER_NAME} is not running" - info "Please start database first" - exit 1 - fi - info "Stopping database first..." - do_stop - info "Successfuly stop database" - # read args from cached file. - # get num lines in file ${HOST_DB_RUNNING_FILE} - num_lines=$(wc -l < ${HOST_DB_RUNNING_FILE}) - if [ ${num_lines} -ne 3 ]; then - err "Error: ${HOST_DB_RUNNING_FILE} should have 3 lines, but got ${num_lines}, something wrong with the file ${HOST_DB_RUNNING_FILE}" - exit 1 - fi - # read args from file - GRAPH_NAME=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) - BULK_LOAD_FILE=$(sed -n '2p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) - ROOT_DATA_DIR=$(sed -n '3p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) - do_start -n ${GRAPH_NAME} -b ${BULK_LOAD_FILE} -r ${ROOT_DATA_DIR} - info "Finish restart database" -} - -# the compiled dynamic libs will be placed at data/${graph_name}/plugins/ -# after compilation, the user need to write the cooresponding yaml, telling the compiler about -# the input and output of the stored procedure -function do_compile() { - # check args num == 4 - # start container - ensure_container_running - if [ $# -ne 4 ]; then - err "stored_procedure command need 2 args, but got $#" - compile_usage - exit 1 - fi - graph_name="" - file_path="" # file path - output_dir="" - - while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -g | --graph) - graph_name="$2" - info "graph_name = ${graph_name}" - shift # past argument - shift - ;; - -i | --input) - file_path="$2" - shift # past argument - shift - ;; - *) - err "unknown option $1" - compile_usage - exit 1 - ;; - esac - done - - # check graph_name - if [ -z "${graph_name}" ]; then - err "graph_name is empty" - compile_usage - exit 1 - fi - - # check file_path - if [ -z "${file_path}" ]; then - err "file_path is empty" - compile_usage - exit 1 - fi - - # get real file_path - file_name=$(basename "${file_path}") - real_file_path=$(realpath "${file_path}") - # check exists - if [ ! -f "${real_file_path}" ]; then - err "file ${real_file_path} not exist" - exit 1 - fi - # check graph dir exists - graph_dir="${HOST_DB_HOME}/data/${graph_name}" - if [ ! -d "${graph_dir}" ]; then - err "graph ${graph_name} not exist" - exit 1 - fi - mkdir -p "${graph_dir}/plugins" - - DOCKER_OUTPUT_DIR="${DOCKER_DB_HOME}/data/${graph_name}/plugins" - HOST_OUTPUT_DIR="${HOST_DB_HOME}/data/${graph_name}/plugins" - DOCKER_DB_GRAPH_SCHEMA="${DOCKER_DB_HOME}/data/${graph_name}/graph.json" - DOCKER_REAL_FILE_PATH="/tmp/${file_name}" - # docker cp file to container - cmd="docker cp ${real_file_path} ${GIE_DB_CONTAINER_NAME}:${DOCKER_REAL_FILE_PATH}" - eval ${cmd} || exit 1 - - cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c \"" - cmd=${cmd}" ${DOCKER_DB_GEN_BIN}" - cmd=${cmd}" --engine_type=hqps" - cmd=${cmd}" --input=${DOCKER_REAL_FILE_PATH}" - cmd=${cmd}" --work_dir=/tmp/codegen/" - cmd=${cmd}" --ir_conf=${DOCKER_DB_IR_CONF_FILE}" - cmd=${cmd}" --graph_schema_path=${DOCKER_DB_GRAPH_SCHEMA}" - cmd=${cmd}" --gie_home=${DOCKER_DB_GIE_HOME}" - cmd=${cmd}" --output_dir=${DOCKER_OUTPUT_DIR}" - cmd=${cmd}" \"" - - echo "Running cmd: ${cmd}" - eval ${cmd} || exit 1 - # check output exists - # get the file_name of file_path - file_name="${file_name%.*}" - output_file="${HOST_OUTPUT_DIR}/lib${file_name}.so" - - if [ ! -f "${output_file}" ]; then - err "output file ${output_file} not exist, compilation failed" - exit 1 - fi - info "success generate dynamic lib ${output_file}, please create the cooresponding yaml file ${HOST_OUTPUT_DIR}/${file_name}.yaml." -} - -#################### Entry #################### -if [ $# -eq 0 ]; then - usage - exit 1 -fi - -while [[ $# -gt 0 ]]; do - key="$1" - - case $key in - -h | --help) - usage - exit - ;; - init) - shift - info "Start initiating database..." - do_init "$@" - exit 0 - ;; - start) - shift - info "Start database service..." - do_start "$@" - exit 0 - ;; - status) - shift - do_status "$@" - exit 0 - ;; - stop) - shift - do_stop "$@" - exit 0 - ;; - restart) - shift - do_restart # restart current graph - exit 0 - ;; - compile) - shift - do_compile "$@" - exit 0 - ;; - show_stored_procedure) - shift - do_show_stored_procedure "$@" - exit 0 - ;; - destroy) - shift - do_destroy "$@" - exit 0 - ;; - download_dataset) - shift - do_download_dataset - exit 0 - ;; - *) # unknown option - err "unknown option $1" - usage - exit 1 - ;; - esac -done - - - - diff --git a/flex/interactive/bin/gs_interactive b/flex/interactive/bin/gs_interactive new file mode 100755 index 000000000000..d80c06b5d42c --- /dev/null +++ b/flex/interactive/bin/gs_interactive @@ -0,0 +1,1993 @@ +#!/bin/bash +# Copyright 2020 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# The product name +DB_PROD_NAME="interactive" + +# colored error and info functions to wrap messages. +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +NC='\033[0m' # No Color +err() { + echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] -ERROR- $* ${NC}" >&2 +} + +info() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" +} + +emph(){ + echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" +} + +################## Some Util Functions ################## + +# source: https://github.com/mrbaseman/parse_yaml.git +function parse_yaml { + local prefix=$2 + local separator=${3:-_} + + local indexfix + # Detect awk flavor + if awk --version 2>&1 | grep -q "GNU Awk" ; then + # GNU Awk detected + indexfix=-1 + elif awk -Wv 2>&1 | grep -q "mawk" ; then + # mawk detected + indexfix=0 + fi + + local s='[[:space:]]*' sm='[ \t]*' w='[a-zA-Z0-9_]*' fs=${fs:-$(echo @|tr @ '\034')} i=${i:- } + cat $1 | \ + awk -F$fs "{multi=0; + if(match(\$0,/$sm\|$sm$/)){multi=1; sub(/$sm\|$sm$/,\"\");} + if(match(\$0,/$sm>$sm$/)){multi=2; sub(/$sm>$sm$/,\"\");} + while(multi>0){ + str=\$0; gsub(/^$sm/,\"\", str); + indent=index(\$0,str); + indentstr=substr(\$0, 0, indent+$indexfix) \"$i\"; + obuf=\$0; + getline; + while(index(\$0,indentstr)){ + obuf=obuf substr(\$0, length(indentstr)+1); + if (multi==1){obuf=obuf \"\\\\n\";} + if (multi==2){ + if(match(\$0,/^$sm$/)) + obuf=obuf \"\\\\n\"; + else obuf=obuf \" \"; + } + getline; + } + sub(/$sm$/,\"\",obuf); + print obuf; + multi=0; + if(match(\$0,/$sm\|$sm$/)){multi=1; sub(/$sm\|$sm$/,\"\");} + if(match(\$0,/$sm>$sm$/)){multi=2; sub(/$sm>$sm$/,\"\");} + } + print}" | \ + sed -e "s|^\($s\)?|\1-|" \ + -ne "s|^$s#.*||;s|$s#[^\"']*$||;s|^\([^\"'#]*\)#.*|\1|;t1;t;:1;s|^$s\$||;t2;p;:2;d" | \ + sed -ne "s|,$s\]$s\$|]|" \ + -e ":1;s|^\($s\)\($w\)$s:$s\(&$w\)\?$s\[$s\(.*\)$s,$s\(.*\)$s\]|\1\2: \3[\4]\n\1$i- \5|;t1" \ + -e "s|^\($s\)\($w\)$s:$s\(&$w\)\?$s\[$s\(.*\)$s\]|\1\2: \3\n\1$i- \4|;" \ + -e ":2;s|^\($s\)-$s\[$s\(.*\)$s,$s\(.*\)$s\]|\1- [\2]\n\1$i- \3|;t2" \ + -e "s|^\($s\)-$s\[$s\(.*\)$s\]|\1-\n\1$i- \2|;p" | \ + sed -ne "s|,$s}$s\$|}|" \ + -e ":1;s|^\($s\)-$s{$s\(.*\)$s,$s\($w\)$s:$s\(.*\)$s}|\1- {\2}\n\1$i\3: \4|;t1" \ + -e "s|^\($s\)-$s{$s\(.*\)$s}|\1-\n\1$i\2|;" \ + -e ":2;s|^\($s\)\($w\)$s:$s\(&$w\)\?$s{$s\(.*\)$s,$s\($w\)$s:$s\(.*\)$s}|\1\2: \3 {\4}\n\1$i\5: \6|;t2" \ + -e "s|^\($s\)\($w\)$s:$s\(&$w\)\?$s{$s\(.*\)$s}|\1\2: \3\n\1$i\4|;p" | \ + sed -e "s|^\($s\)\($w\)$s:$s\(&$w\)\(.*\)|\1\2:\4\n\3|" \ + -e "s|^\($s\)-$s\(&$w\)\(.*\)|\1- \3\n\2|" | \ + sed -ne "s|^\($s\):|\1|" \ + -e "s|^\($s\)\(---\)\($s\)||" \ + -e "s|^\($s\)\(\.\.\.\)\($s\)||" \ + -e "s|^\($s\)-${s}[\"']\(.*\)[\"']$s\$|\1$fs$fs\2|p;t" \ + -e "s|^\($s\)\($w\)$s:${s}[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p;t" \ + -e "s|^\($s\)-$s\(.*\)$s\$|\1$fs$fs\2|" \ + -e "s|^\($s\)\($w\)$s:${s}[\"']\?\(.*\)$s\$|\1$fs\2$fs\3|" \ + -e "s|^\($s\)[\"']\?\([^&][^$fs]\+\)[\"']$s\$|\1$fs$fs$fs\2|" \ + -e "s|^\($s\)[\"']\?\([^&][^$fs]\+\)$s\$|\1$fs$fs$fs\2|" \ + -e "s|$s\$||p" | \ + awk -F$fs "{ + gsub(/\t/,\" \",\$1); + if(NF>3){if(value!=\"\"){value = value \" \";}value = value \$4;} + else { + if(match(\$1,/^&/)){anchor[substr(\$1,2)]=full_vn;getline}; + indent = length(\$1)/length(\"$i\"); + vname[indent] = \$2; + value= \$3; + for (i in vname) {if (i > indent) {delete vname[i]; idx[i]=0}} + if(length(\$2)== 0){ vname[indent]= ++idx[indent] }; + vn=\"\"; for (i=0; i0)&&index(val, ref)==1){ + tmpval=assignment[val]; + sub(ref,full_vn,val); + if(match(val,\"$separator\$\")){ + gsub(ref,full_vn,tmpval); + } else if (length(tmpval) > 0) { + printf(\"%s=\\\"%s\\\"\n\", val, tmpval); + } + assignment[val]=tmpval; + } + } + } + } else if (length(value) > 0) { + printf(\"%s=\\\"%s\\\"\n\", full_vn, value); + } + }END{ + for(val in assignment){ + if(match(val,\"$separator\$\")) + printf(\"%s=\\\"%s\\\"\n\", val, assignment[val]); + } + }" +} + +# check if the file exists, if not, exit. +function check_file_exists(){ + if [ ! -f "$1" ]; then + err "file $1 not exists" + exit 1 + fi +} +function check_directory_exists(){ + if [ ! -d "$1" ]; then + err "directory $1 not exists" + exit 1 + fi +} + +HOST_DB_HOME="$( + cd "$(dirname "$0")/../" >/dev/null 2>&1 + pwd -P +)" +info "HOST_DB_HOME = ${HOST_DB_HOME}" +mkdir -p "${HOST_DB_HOME}/data" + +################### GET USER INFO ################### +# get uid +uid=$(id -u) +# get group id +gid=$(id -g) + + +#################### DEFINE CONSTANTS #################### + +# the log directory +# HOST_DB_INTERACTIVE_YAML="${HOST_DB_CONF_DIR}/interactive.yaml" +HOST_DB_RUNNING_FILE="${HOST_DB_HOME}/.running" +HOST_DB_ENV_FILE="${HOST_DB_HOME}/.env" + +DOCKER_DB_GRAPHSCOPE_HOME="/home/graphscope/GraphScope" +DOCKER_DB_GIE_HOME="${DOCKER_DB_GRAPHSCOPE_HOME}/interactive_engine/" +DOCKER_DB_SERVER_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/build/bin/sync_server" +DOCKER_DB_GRAPH_IMPORT_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/build/tests/rt_mutable_graph/test_graph_loading" +DOCKER_DB_COMPILER_BIN="com.alibaba.graphscope.GraphServer" +DOCKER_DB_GEN_BIN="${DOCKER_DB_GRAPHSCOPE_HOME}/flex/bin/load_plan_and_gen.sh" +HOST_DB_TMP_DIR="/tmp" + +#################### DEFINE DEFAULT CONSTATNS #################### +DATABASE_VERSION="v0.0.2" +DATABASE_DEFAULT_GRAPH_NAME="gs_interactive_default_graph" +DATABASE_CURRENT_GRAPH_NAME=${DATABASE_DEFAULT_GRAPH_NAME} +DATABASE_DEFAULT_GRAPH_DOCKER_PATH="/home/graphscope/${DATABASE_DEFAULT_GRAPH_NAME}" +DATABASE_DEFAULT_GRAPH_MOUNT_CMD="${HOST_DB_HOME}/examples/modern_graph/:${DATABASE_DEFAULT_GRAPH_DOCKER_PATH}" +DATABASE_VOLUMES="${DATABASE_DEFAULT_GRAPH_MOUNT_CMD}" +DATABASE_LOG_LEVEL="INFO" +DATABASE_PORTS="" + +## compiler related default configuration +DATABASE_COMPILER_PLANNER_IS_ON="true" +DATABASE_COMPILER_PLANNER_OPT="RBO" +DATABASE_COMPILER_PLANNER_RULES="FilterMatchRule,FilterIntoJoinRule,NotExistToAntiJoinRule" +DATABASE_COMPILER_ENDPOINT_ADDRESS="localhost" +DATABASE_COMPILER_BOLT_PORT="7687" +DATABASE_COMPILER_QUERY_TIMEOUT="20000" + +## hiactor related default configuration +DATABASE_COMPUTE_ENGINE_PORT="10000" +DATABASE_COMPUTE_ENGINE_SHARD_NUM=1 + +## directories +DATABASE_WORKSPACE="/home/graphscope/workspace/" +DATABASE_DATA_DIR_NAME="data" +DATABASE_LOG_DIR_NAME="logs" +DATABASE_CONF_DIR_NAME="conf" + + +################### IMAGE VERSION ################### +GIE_DB_IMAGE_VERSION="v0.0.2" +GIE_DB_IMAGE_NAME="registry.cn-hongkong.aliyuncs.com/graphscope/${DB_PROD_NAME}" +GIE_DB_CONTAINER_NAME="${DB_PROD_NAME}-server" + + +#################### Prepare uncreated directories #################### + +info "Finish create log dir" + +#################### DEFINE FUNCTIONS #################### + +function check_graph_name_valid() { + if [ $# -ne 1 ]; then + err "Expect graph name given." + exit 1 + fi + local graph_name=$1 + # check graph_name can be a valid directory name + if [[ ! "${graph_name}" =~ ^[a-zA-Z0-9_]+$ ]]; then + err "graph name [${graph_name}] is not valid, only [a-zA-Z0-9_] are allowed." + exit 1 + fi +} +function check_running_containers_and_exit(){ + # check if there is any running containers + info "Check running containers and exit" + running_containers=$(docker ps -a --format "{{.Names}}" | grep "${GIE_DB_CONTAINER_NAME}") + if [[ -n "${running_containers}" ]]; then + err "There are running containers: ${running_containers}, please stop them first." + exit 1 + fi + info "finish check" +} + +function check_container_running(){ + if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then + info "container ${GIE_DB_CONTAINER_NAME} is running" + else + info "container ${GIE_DB_CONTAINER_NAME} is not running" + # start the container + docker start "${GIE_DB_CONTAINER_NAME}" + fi +} + +function ensure_container_running(){ + if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then + info "container ${GIE_DB_CONTAINER_NAME} is running" + else + info "container ${GIE_DB_CONTAINER_NAME} is not running" + # start the container + docker start "${GIE_DB_CONTAINER_NAME}" + fi +} + +function check_process_running_in_container(){ + local container_name=$1 + local process_name=$2 + local error_msg=$3 + local process_id=$(docker top "${container_name}" | grep "${process_name}" | awk '{print $2}\') + if [ -z "${process_id}" ]; then + err "process ${process_name} is not running in container ${container_name}" + err "${error_msg}" + exit 1 + fi + info "process ${process_name} is running in container ${container_name}, process id is ${process_id}" +} + +function check_process_not_running_in_container(){ + local container_name=$1 + local process_name=$2 + local error_msg=$3 + local process_id=$(docker top "${container_name}" | grep "${process_name}" | awk '{print $2}\') + if [ -z "${process_id}" ]; then + info "process ${process_name} is not running in container ${container_name}" + else + err "process ${process_name} is running in container ${container_name}, process id is ${process_id}" + err "${error_msg}" + exit 1 + fi +} + +# check the given graph is locked or not. +function check_graph_not_running(){ + info "Check graph whether is not running" + if [ $# -ne 1 ]; then + err "Expect graph name given." + exit 1 + fi + local graph_name=$1 + # check whether .lock is presented in container's data/${graph_name}/ directory + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + local lock_file="${DATABASE_WORKSPACE}/data/${graph_name}/.lock" + info "Check lock file ${lock_file}" + # check lock_file whether exists in container, if not exists, exit 0, else exit 1 + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ ! -f ${lock_file} ]" +} + +function update_init_config_from_yaml(){ + if [ $# -ne 1 ]; then + err "Expect configuration file given" + exit 1 + fi + config_file=$1 + eval $(parse_yaml "${config_file}") + # update workspace if exists + if [[ -n "${workspace}" ]]; then + DATABASE_WORKSPACE="${workspace}" + fi + # update database version if exists + if [[ -n "${version}" ]]; then + DATABASE_VERSION="${version}" + fi + # append the found volumes to DATABASE_VOLUMES + # map the HOST_DB_HOME/data/ to ${DATABASE_WORKSPACE}/data + DATABASE_VOLUMES="${DATABASE_VOLUMES},${HOST_DB_HOME}/data:${DATABASE_WORKSPACE}/data" + + x=1 + while true; do + volume_x_key="volume_${x}" + volume_x=$(eval echo "\$${volume_x_key}") + if [ -z "${volume_x}" ]; then + break + fi + DATABASE_VOLUMES="${DATABASE_VOLUMES},${volume_x}" + x=$((x + 1)) + done + # append compiler port and engine port to DATABASE_PORTS + DATABASE_PORTS="${DATABASE_COMPILER_BOLT_PORT}:${DATABASE_COMPILER_BOLT_PORT}" + DATABASE_PORTS="${DATABASE_PORTS},${DATABASE_COMPUTE_ENGINE_PORT}:${DATABASE_COMPUTE_ENGINE_PORT}" +} + +function update_engine_config_from_yaml(){ + if [ $# -ne 1 ]; then + err "Expect configuration file given" + exit 1 + fi + config_file=$1 + eval $(parse_yaml "${config_file}") + if [[ -n "${log_level}" ]]; then + DATABASE_LOG_LEVEL="${log_level}" + fi + # default_graph + if [[ -n "${default_graph}" ]]; then + DATABASE_CURRENT_GRAPH_NAME="${default_graph}" + fi + # compiler + if [[ -n ${compiler_planner_is_on} ]]; then + DATABASE_COMPILER_PLANNER_IS_ON="${compiler_planner_is_on}" + fi + info "Found compiler planner opt: ${compiler_planner_is_on}, ${DATABASE_COMPILER_PLANNER_IS_ON}" + if [[ -n ${compiler_planner_opt} ]]; then + DATABASE_COMPILER_PLANNER_OPT="${compiler_planner_opt}" + fi + # append the founded compiler planner rules to DATABASE_COMPILER_PLANNER_RULES + x=1 + while true; do + compiler_planner_rules_x_key="compiler_planner_rules_${x}" + compiler_planner_rules_x=$(eval echo "\$${compiler_planner_rules_x_key}") + if [ -z "${compiler_planner_rules_x}" ]; then + break + fi + # check compiler_planner_rules_x present in DATABASE_COMPILER_PLANNER_RULES, if not, append + if [[ ! "${DATABASE_COMPILER_PLANNER_RULES}" =~ "${compiler_planner_rules_x}" ]]; then + DATABASE_COMPILER_PLANNER_RULES="${DATABASE_COMPILER_PLANNER_RULES},${compiler_planner_rules_x}" + fi + x=$((x + 1)) + done + if [[ -n "${compiler_endpoint_address}" ]]; then + DATABASE_COMPILER_ENDPOINT_ADDRESS="${compiler_endpoint_address}" + fi + if [[ -n "${compiler_endpoint_bolt_connector_port}" ]]; then + DATABASE_COMPILER_BOLT_PORT="${compiler_endpoint_bolt_connector_port}" + fi + if [[ -n "${compiler_query_timeout}" ]]; then + DATABASE_COMPILER_QUERY_TIMEOUT="${compiler_query_timeout}" + fi +} + +function amplify_graph_schema(){ + # two args, input schema_file and output schema_file + if [ $# -ne 2 ]; then + err "Expect two args, but got $#" + exit 1 + fi + input_schema_file=$1 + output_schema_file=$2 + # check input_schema_file exists + check_file_exists "${input_schema_file}" + # rm output_schema_file if exists + if [ -f "${output_schema_file}" ]; then + rm "${output_schema_file}" + fi + # read input_schema_file line by line, and append to output_schema_file + local cur_vertex_label_ind=0 + local cur_edge_label_ind=0 + local cur_prop_ind=0 + # the type id may not be specified in input_schema_file + local type_id_specified=false + local prop_id_specified=false + # if stored_type is not specified in input_schema_file's content, append it to output_schema_file + stored_type_specified=false + label_type="vertex" + IFS='' + while read -r line; do + if [[ "${line}" =~ ^[[:space:]]*# ]]; then + echo "found comment line ${line}" + else + # if line contains 'type_name', then prepend - type_id: 0 before it + if [[ "${line}" =~ vertex_types ]]; then + label_type="vertex" + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ edge_types ]]; then + label_type="edge" + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ type_id ]]; then + type_id_specified=true + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ property_id ]]; then + prop_id_specified=true + echo ${line} >> "${output_schema_file}" + elif [[ "${line}" =~ type_name ]] && [ "${type_id_specified}" == "false" ]; then + if [ "${label_type}" == "edge" ]; then + echo " - type_id: ${cur_edge_label_ind}" >> "${output_schema_file}" + cur_edge_label_ind=$((cur_edge_label_ind + 1)) + else + echo " - type_id: ${cur_vertex_label_ind}" >> "${output_schema_file}" + cur_vertex_label_ind=$((cur_vertex_label_ind + 1)) + fi + cur_prop_ind=0 + type_id_specified=false + echo "${line}" | sed 's/- / /g' >> "${output_schema_file}" + elif [[ "${line}" =~ property_name ]] && [ "${prop_id_specified}" == "false" ]; then + echo " - property_id: ${cur_prop_ind}" >> "${output_schema_file}" + cur_prop_ind=$((cur_prop_ind + 1)) + prop_id_specified=false + # replace the '- ' in ${line} and append to output_schema_file + echo "${line}" | sed 's/- / /g' >> "${output_schema_file}" + elif [[ "${line}" =~ store_type ]]; then + stored_type_specified=true + emph "stored type : ${stored_type_specified}" + echo "${line}" >> "${output_schema_file}" + else + echo "${line}" >> "${output_schema_file}" + fi + fi + done <<< $(cat "${input_schema_file}") + if [ "${stored_type_specified}" == "false" ]; then + emph "stored type : ${stored_type_specified}" + echo "store_type: mutable_csr" >> "${output_schema_file}" + fi + info "Finish amplify schema file ${input_schema_file} to ${output_schema_file}" +} + + +#################### DEFINE USAGE #################### + +function init_usage() { + cat << EOF + gs_interactive init -c [--config] + Init the database, create the containers. Specify the database version and volume mounting in the config yaml. +EOF +} + +function destroy_usage() { + cat << EOF + gs_interactive destroy + Destroy the current database, remove the container. +EOF +} + +function create_usage() { + cat << EOF + gs_interactive database create -g [--graph] -c [--config] + Create a graph in database, with the provided schema file. + User should import data to the created graph. +EOF +} + +function remove_usage() { + cat << EOF + gs_interactive database remove -g [--graph] + Remove the database with the given graph. +EOF +} + +function import_usage() { + cat << EOF + gs_interactive database import -g [--graph] -c [--config] + Load the raw data specified in bulk load file to the specified graph. +EOF +} + +function list_usage() { + cat << EOF + gs_interactive database list + List all graphs in the database. +EOF +} + +function database_usage(){ + create_usage + remove_usage + import_usage + list_usage +} + + +function start_usage() { + cat << EOF + gs_interactive service start -g [--graph] -c [--config] + Start the graph service on the specified graph, with the provided engine config file. +EOF +} + +function stop_usage() { + cat << EOF + gs_interactive service stop + Stop the database with the given graph. +EOF +} + +function restart_usage() { + cat << EOF + gs_interactive service restart -c [--config] [engine config file] + Restart the database with current running graph. Can update with new engine config file. +EOF +} + +function get_log_usage() { + cat << EOF + gs_interactive service get_log -o [--output] output directory + Get the log of the specified service/compiler, and write to the output file. +EOF +} + +function services_usage(){ + start_usage + stop_usage + restart_usage + get_log_usage +} + +function compile_usage(){ + cat << EOF + gs_interactive procedure compile -g[--graph] -i -d [--description] + --compile_only + Compile cypher/.cc to dynamic library, according to the schema of graph. The output library will be placed at ./data/{graph_name}/lib. + If --compile_only is specified, the library will not be loaded to the graph. +EOF +} + +## .enable and .disable file contols the stored procedure enable/disable + +function enable_proc_usage(){ + cat << EOF + gs_interactive procedure enable -g[--graph] -n[--name] + -c[--config] + Enable the stored procedure in the given graph, with the provided library. + stored_procedures.yaml contains the the stored procedure names at each line. +EOF +} + +function disable_proc_usage(){ + cat << EOF + gs_interactive procedure disable -g[--graph] -n[--name] + -c[--config] + Disable the stored procedure in the given graph, with the provided library. + stored_procedures.yaml contains the the stored procedure names at each line. +EOF +} + +function show_stored_procedure_usage(){ + cat << EOF + gs_interactive procedure show -g[--graph] graph_name + Show all stored procedure for the given graph. +EOF +} + + +function procedure_usage(){ + compile_usage + enable_proc_usage + disable_proc_usage + show_stored_procedure_usage +} + +# parse the args and set the variables. +function usage() { + init_usage + destroy_usage + database_usage + services_usage + procedure_usage +} + +################### Generate config file ################### +function do_gen_conf(){ + # receive only one args, the config file + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -o | --output) + output_config_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + exit 1 + ;; + esac + done + + #if output_config_file exists, remove + if [ -f "${output_config_file}" ]; then + rm "${output_config_file}" + fi + + # echo directories + echo "directories:" >> ${output_config_file} + echo " workspace: ${DATABASE_WORKSPACE}" >> ${output_config_file} + echo " subdirs:" >> ${output_config_file} + echo " data: ${DATABASE_DATA_DIR_NAME}" >> ${output_config_file} + echo " logs: ${DATABASE_LOG_DIR_NAME}" >> ${output_config_file} + echo " conf: ${DATABASE_CONF_DIR_NAME}" >> ${output_config_file} + + # log level + echo "log_level: ${DATABASE_LOG_LEVEL}" >> ${output_config_file} + + # current graph + echo "default_graph: ${DATABASE_CURRENT_GRAPH_NAME}" >> ${output_config_file} + + + #compute_engine + echo "compute_engine:" >> ${output_config_file} + echo " type: hiactor" >> ${output_config_file} + echo " hosts:" >> ${output_config_file} + echo " - localhost:${DATABASE_COMPUTE_ENGINE_PORT}" >> ${output_config_file} + echo " shard_num: ${DATABASE_COMPUTE_ENGINE_SHARD_NUM}" >> ${output_config_file} + + + #compiler + echo "compiler:" >> ${output_config_file} + echo " planner:" >> ${output_config_file} + echo " is_on: ${DATABASE_COMPILER_PLANNER_IS_ON}" >> ${output_config_file} + echo " opt: ${DATABASE_COMPILER_PLANNER_OPT}" >> ${output_config_file} + # split compiler planner rules and put as sequences in yaml + echo " rules:" >> ${output_config_file} + IFS=',' read -ra RULES_ARRAY <<<"${DATABASE_COMPILER_PLANNER_RULES}" + for rule in "${RULES_ARRAY[@]}"; do + echo " - ${rule}" >> ${output_config_file} + done + echo " endpoint:" >> ${output_config_file} + echo " default_listen_address: ${DATABASE_COMPILER_ENDPOINT_ADDRESS}" >> ${output_config_file} + echo " bolt_connector:" >> ${output_config_file} + echo " port: ${DATABASE_COMPILER_BOLT_PORT}" >> ${output_config_file} + echo " gremlin_connector:" >> ${output_config_file} + echo " disabled: true" >> ${output_config_file} + echo " port: 8182" >> ${output_config_file} + echo " query_timeout: ${DATABASE_COMPILER_QUERY_TIMEOUT}" >> ${output_config_file} + info "Finish generate config file ${output_config_file}" +} + +function generate_real_engine_conf(){ + # expect less than three args + if [ $# -gt 2 ] || [ $# -eq 0 ]; then + err "Expect one or two args, but got $#" + exit 1 + fi + if [ $# -eq 1 ]; then + real_engine_config_file=$1 + info "engine config file is not specified, using default engine config" + do_gen_conf -o ${real_engine_config_file} + else + engine_config_file=$1 + real_engine_config_file=$2 + check_file_exists "${engine_config_file}" + update_engine_config_from_yaml "${engine_config_file}" + do_gen_conf -o ${real_engine_config_file} + fi +} + +function update_graph_yaml_with_procedure_enabling(){ + # expect one args, graph name + if [ $# -ne 1 ]; then + err "Expect one args, but got $#" + exit 1 + fi + graph_name=$1 + + # gather .enable and append to graph yaml + + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # copy graph_name's graph0.yaml to ${HOST_DB_TMP_DIR} + docker cp "${GIE_DB_CONTAINER_NAME}:${DATABASE_WORKSPACE}/data/${graph_name}/graph0.yaml" "${HOST_DB_TMP_DIR}/graph0.yaml" + echo "" >> ${HOST_DB_TMP_DIR}/graph0.yaml + echo "stored_procedures:" >> ${HOST_DB_TMP_DIR}/graph0.yaml + echo " enable_lists:" >> ${HOST_DB_TMP_DIR}/graph0.yaml + # copy graph_name's .enable file to ${HOST_DB_TMP_DIR} + docker cp "${GIE_DB_CONTAINER_NAME}:${DATABASE_WORKSPACE}/data/${graph_name}/plugins/.enable" "${HOST_DB_TMP_DIR}/.enable" || true + # get .enable and .disable file + local enable_file="${HOST_DB_TMP_DIR}/.enable" + any_stored_procedures=false + # check enable_file and disable_file exists + if [ ! -f "${enable_file}" ]; then + info "enable file ${enable_file} not exists, all stored procedure are enabled" + else + # cat all lines in enable_file and split by '\n' + while IFS= read -r line; do + echo "Found enable procedure: $line" + echo " - ${line}" >> ${HOST_DB_TMP_DIR}/graph0.yaml + any_stored_procedures=true + done < ${enable_file} + fi + if [ "${any_stored_procedures}" = false ]; then + echo " directory: not-a-directory" >> ${HOST_DB_TMP_DIR}/graph0.yaml + else + echo " directory: plugins" >> ${HOST_DB_TMP_DIR}/graph0.yaml + fi + + # copy graph0.yaml to container + docker cp "${HOST_DB_TMP_DIR}/graph0.yaml" "${GIE_DB_CONTAINER_NAME}:${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + info "Finish update graph yaml with procedure enabling, add stored proc.edures of size ${#ENABLED_ARRAY[@]}." +} + + +function check_database_initialized(){ + # check if the container is running + running_containers=$(docker ps -a --format "{{.Names}}" | grep "${GIE_DB_CONTAINER_NAME}") + if [[ -z "${running_containers}" ]]; then + err "Database is not initialized, please run init command first." + exit 1 + fi +} + +#################### Init database #################### +# Init the current data base. +# create a user with same user id in container +function do_init(){ + # check running containers and exit + check_running_containers_and_exit + info "Ok, no running instance found, start init database..." + # if no containers running, procede to init + +# check args num 1, and get the first args as CONFIG_FILE + if [ $# -eq 0 ]; then + err "init command need 1 args, but got $#" + init_usage + exit 1 + fi + + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -c | --config) + config_file="$2" + shift # past argument + shift + ;; + *) + err "unknown option $1" + init_usage + exit 1 + ;; + esac + done + + check_file_exists "${config_file}" + + # parse yaml config + # eval $(parse_yaml "${config_file}") + + # Parse the configuration presented in yaml, and override the default values. + update_init_config_from_yaml "${config_file}" + + #0. Found workspace + info "Found docker db home: ${DATABASE_WORKSPACE}" + # put docker_workspace into env + echo "export DATABASE_WORKSPACE=${DATABASE_WORKSPACE}" >> ${HOST_DB_ENV_FILE} + echo "export DATABASE_DATA_DIR_NAME=${DATABASE_DATA_DIR_NAME}" >> ${HOST_DB_ENV_FILE} + info "Found databse version: ${DATABASE_VERSION}" + + #2. Found mounting volumes from yaml file + mount_cmd="" + # split DATABASE_VOLUMES and append to mount_cmd + IFS=',' read -ra VOLUME_ARRAY <<<"${DATABASE_VOLUMES}" + for volume in "${VOLUME_ARRAY[@]}"; do + # split with : and check host path exists + volume_value_array=(${volume//:/ }) + # if volume_value_array length is not 2, error + if [ ${#volume_value_array[@]} -ne 2 ]; then + err "volume ${volume_value_array} is not valid, should be :" + exit 1 + fi + # get host_path + host_path=${volume_value_array[0]} + docker_path=${volume_value_array[1]} + # check host_path exists + info "Found host path: ${host_path}" + check_directory_exists "${host_path}" || (err "host path ${host_path} not exists" && exit 1) + mount_cmd="${mount_cmd} -v ${volume}" + done +# mount_cmd="${mount_cmd} -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro" + + info "Found docker volumes: ${mount_cmd}" + + #3. get mapped port + port_cmd="" + # split the DATABASE_PORTS and append to port_cmd + IFS=',' read -ra DATABASE_PORTS_ARRAY <<<"${DATABASE_PORTS}" + for ports in "${DATABASE_PORTS_ARRAY[@]}"; do + port_x_value_array=(${ports//:/ }) + # if volume_x_value_array length is not 2, error + if [ ${#port_x_value_array[@]} -ne 2 ]; then + err "port ${port_x_value_array} is not valid, should be :" + exit 1 + fi + # get host_path + host_port=${port_x_value_array[0]} + docker_port=${port_x_value_array[1]} + #check port are int + if ! [[ "${host_port}" =~ ^[0-9]+$ ]]; then + err "host port ${host_port} is not valid" + exit 1 + fi + if ! [[ "${docker_port}" =~ ^[0-9]+$ ]]; then + err "docker port ${docker_port} is not valid" + exit 1 + fi + port_cmd="${port_cmd} -p ${host_port}:${docker_port}" + done + info "Found docker port: ${port_cmd}" + + # get uid + local uid=$(id -u) + local gid=$(id -g) + # get group name + local group_name=$(id -gn) + # get username + local username=$(id -un) + + GIE_DB_IMAGE_NAME_TAG="${GIE_DB_IMAGE_NAME}:${DATABASE_VERSION}" + cmd="docker run -it -d --privileged --name ${GIE_DB_CONTAINER_NAME}" + # create user in container + cmd="${cmd} ${port_cmd} ${mount_cmd} ${GIE_DB_IMAGE_NAME_TAG} bash" + + info "Running cmd: ${cmd}" + eval ${cmd} || docker rm "${GIE_DB_CONTAINER_NAME}" + + info "Finish init database" + + # create the workspace directory in container + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${DATABASE_WORKSPACE}" || exit 1 + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "sudo chown -R graphscope:graphscope ${DATABASE_WORKSPACE}" || exit 1 + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${DATABASE_WORKSPACE}/logs" || exit 1 + docker exec -u graphscope "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${DATABASE_WORKSPACE}/conf" || exit 1 + + # create default_graph + do_create -g ${DATABASE_DEFAULT_GRAPH_NAME} -c ${HOST_DB_HOME}/examples/modern_graph/modern_graph.yaml + # check whether do_import success + if ! do_import -g ${DATABASE_DEFAULT_GRAPH_NAME} -c ${HOST_DB_HOME}/examples/modern_graph/bulk_load.yaml; then + err "Fail to import default graph" + exit 1 + fi + info "Successfuly create and import default graph: [${DATABASE_DEFAULT_GRAPH_NAME}]" +} + + +#################### Create graph #################### +function do_create(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -c | --config) + schema_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + create_usage + exit 1 + ;; + esac + done + # check graph_name is set + if [ -z "${graph_name}" ]; then + err "graph name is not specified" + create_usage + exit 1 + fi + + if [ -z "${schema_file}" ]; then + err "graph schema is not specified, need graph_schema.yaml given by -c option" + create_usage + exit 1 + fi + + check_graph_name_valid "${graph_name}" + if [ -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] has already been created." + exit 1 + fi + check_file_exists "${schema_file}" + amplify_schema_file="${HOST_DB_TMP_DIR}/graph0.yaml" + # add some default settings and non-user-awared settings to schema file. + amplify_graph_schema ${schema_file} ${amplify_schema_file} + # check graph is running inside docker + check_graph_not_running ${graph_name} || (err "Can not create graph ${graph_name}, since a graph with same nameing running." && exit 1) + # create the graph directory in the docker's workspace + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker_graph_schema_file="${docker_graph_dir}/graph.yaml" + docker_graph_schema_file_back="${docker_graph_dir}/graph0.yaml" # used for later adding/removing stored procedure + # check docker_graph_schema_file exists in the container, if exists, tell user to remove it first + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -f ${docker_graph_schema_file} ] && echo -e \"${RED} Graph ${graph_name} already exists, please remove it first. ${NC}\" && exit 1 || exit 0" || exit 1 + # create the graph directory in the docker's workspace + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${docker_graph_dir}" || exit 1 + # create plugins dir + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "mkdir -p ${docker_graph_dir}/plugins" || exit 1 + # copy the schema file to the docker's workspace + docker cp "${amplify_schema_file}" "${GIE_DB_CONTAINER_NAME}:${docker_graph_schema_file}" || exit 1 + docker cp "${amplify_schema_file}" "${GIE_DB_CONTAINER_NAME}:${docker_graph_schema_file_back}" || exit 1 + info "Successfuly create graph [${graph_name}]" + #TODO: support creating an empty graph +} + +###################Remove graph#################### +function do_remove(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + *) + err "unknown option $1" + create_usage + exit 1 + ;; + esac + done + # check graph_name is set + if [ -z "${graph_name}" ]; then + err "graph name is not specified" + remove_usage + exit 1 + fi + check_graph_name_valid "${graph_name}" + # check if the graph is created + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be removed, since it is not created." + exit 1 + fi + # check graph is running inside docker + if ! check_graph_not_running ${graph_name}; then + err "Can not remove graph ${graph_name}, since a graph with same nameing running!" + exit 1; + fi + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + ### let user confirm to remove the graph + read -p "Are you sure to remove graph [${graph_name}?] [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort removing graph [${graph_name}]" + exit 1 + fi + + # rm -rf the graph directory in the docker's workspace + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "rm -rf ${docker_graph_dir}" || ( echo "Fail to delete graph ${graph_name}" && exit 1) + info "Successfuly remove graph [${graph_name}]" + # if graph_name is current graph, set current graph to .running + if [ "${DATABASE_CURRENT_GRAPH_NAME}" = "${graph_name}" ]; then + info "Remove current graph [${graph_name}], set current graph to default_graph: ${DATABASE_DEFAULT_GRAPH_NAME}" + echo "export DATABASE_CURRENT_GRAPH_NAME=${DATABASE_DEFAULT_GRAPH_NAME}" >> ${HOST_DB_ENV_FILE} + fi + #local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + # if the removed graph is in .running, remove it + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + if [ "${graph_name}" = "${graph_name}" ]; then + rm ${HOST_DB_RUNNING_FILE} + fi + fi +} + +#################### Import #################### +function do_import(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -c | --config) + bulk_load_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + import_usage + exit 1 + ;; + esac + done + # check graph_name is set + if [ -z "${graph_name}" ]; then + err "graph name is not specified" + import_usage + exit 1 + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + info "Import data to graph [${graph_name}] from ${bulk_load_file}" + # check if the container is running + check_container_running + # check if the bulk_load_file exists + check_file_exists "${bulk_load_file}" + info "bulk_load_file ${bulk_load_file} exists" + + check_graph_not_running ${graph_name} || info "Can not import data to graph [${graph_name}], since it is already running." + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # check graph_schema_file exists in container, if not, let user create graph first + docker_graph_schema_file="${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -f ${docker_graph_schema_file} ] || (echo -e \"${RED}graph [${graph_name}] not exists, please create it first.${NC}\" && exit 1)" + info "Graph Schema exists" + # copy the bulk_load_file to container + bulk_load_file_name=$(basename "${bulk_load_file}") + docker_bulk_load_file="/tmp/${bulk_load_file_name}" + docker cp "${bulk_load_file}" "${GIE_DB_CONTAINER_NAME}:${docker_bulk_load_file}" + + docker_graph_data_dir="${DATABASE_WORKSPACE}/data//${graph_name}/indices" + # currently we can only overwrite the indices, so if it exists, remove it first + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_data_dir} ] && rm -rf ${docker_graph_data_dir} || exit 0" || exit 1 + + cmd="${DOCKER_DB_GRAPH_IMPORT_BIN} ${docker_graph_schema_file} ${docker_bulk_load_file} ${docker_graph_data_dir}" + info "Running cmd: ${cmd}" + # docker exec the cmd, fail if cmd fail + if ! docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "${cmd} ||(echo -e \"${RED} Fail to import graph [${graph_name}]${NC}\" && exit 1 )"; then + err "Fail to import graph [${graph_name}]" + exit 1 + fi + info "Successfuly import data to graph [${graph_name}]" +} + +#################### List Graph #################### +function do_list() { + # check if the container is running + check_container_running + # get all graph names + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + host_data_dir="${HOST_DB_HOME}/${DATABASE_DATA_DIR_NAME}" + # get all graph names into a array + graph_names=($(ls ${host_data_dir})) + # if graph_names is empty, no graph exists + if [ ${#graph_names[@]} -eq 0 ]; then + info "No graph has been created." + exit 0 + fi + # print all graph names + for graph_name in "${graph_names[@]}"; do + echo "Found graph: [${graph_name}]" + done + +} + +#################### Destroy #################### +function do_destroy() { + + # if container not exists, exit + if ! docker ps -a --format '{{.Names}}' | grep -Eq "^${GIE_DB_CONTAINER_NAME}$"; then + info "Database has not been created, exit" + exit 0 + fi + + # let user confirm to destroy the database + read -p "Are you sure to destroy the database? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort destroying database" + exit 1 + fi + + info "Destroying database..." + docker stop "${GIE_DB_CONTAINER_NAME}" + docker rm "${GIE_DB_CONTAINER_NAME}" + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # rm host data/* + sudo rm -rf ${HOST_DB_HOME}/data/* + + #rm .running + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + rm ${HOST_DB_RUNNING_FILE} + fi + if [ -f "${HOST_DB_ENV_FILE}" ]; then + rm ${HOST_DB_ENV_FILE} + fi + + info "Finish destroy database" +} + +#################### Start database #################### +function do_start(){ + check_database_initialized + if [ -f "${HOST_DB_ENV_FILE}" ];then + . ${HOST_DB_ENV_FILE} + fi + info "Starting database..." + + # check whether the .running file exists, if exists, exit + local compiler_process_id=$(docker top "${GIE_DB_CONTAINER_NAME}" | grep "${DOCKER_DB_COMPILER_BIN}" | awk '{print $2}\') + local server_process_id=$(docker top "${GIE_DB_CONTAINER_NAME}" | grep "${DOCKER_DB_SERVER_BIN}" | awk '{print $2}\') + # if both process are running, exit + # if only one process is running, kill the process + if [ -n "${compiler_process_id}" ] && [ -n "${server_process_id}" ]; then + local old_graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + err "Database is already running on [${old_graph_name}], please stop it first" + exit 1 + fi + if [ -n "${compiler_process_id}" ]; then + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "kill -9 ${compiler_process_id}" + fi + if [ -n "${server_process_id}" ]; then + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "kill -9 ${server_process_id}" + fi + + + # set trap to do_stop + trap do_stop SIGINT SIGTERM + + graph_name="" + engine_config_file="" + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -c | --config) + engine_config_file="$2" + shift + shift + ;; + *) + err "unknown option $1" + start_usage + exit 1 + ;; + esac + done + # try parse default_graph from engine_config_file + # generate real engine config file, put it at /tmp/real_engine_config.yaml + if [ -z "${graph_name}" ]; then + graph_name=${DATABASE_CURRENT_GRAPH_NAME} + else + DATABASE_CURRENT_GRAPH_NAME=${graph_name} + info "Using user specified graph [${graph_name}]" + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it and import data first." + exit 1 + fi + + real_engine_config_file="/tmp/real_engine_config.yaml" + if [ -z "${engine_config_file}" ]; then + generate_real_engine_conf "${real_engine_config_file}" + else + generate_real_engine_conf "${engine_config_file}" "${real_engine_config_file}" + fi + + # copy engine config file to container + dst_engine_config_file="${DATABASE_WORKSPACE}/conf/engine_config.yaml" + docker cp "${real_engine_config_file}" "${GIE_DB_CONTAINER_NAME}:${dst_engine_config_file}" || (echo "fail to copy $engine_config_file to container" && exit 1) + + + # check if modern_graph exists in container, get the result as bool + docker_graph_schema_file="${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + wal_file="${DATABASE_WORKSPACE}/data/${graph_name}/indices/init_snapshot.bin" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "( [ -f ${docker_graph_schema_file} ] && echo \"true\" e) || echo \"false\"" > /tmp/graph_exists + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "( [ -f ${wal_file} ] && echo \"true\" e) || echo \"false\"" > /tmp/graph_loaded + graph_exists=$(cat /tmp/graph_exists) + graph_loaded=$(cat /tmp/graph_loaded) + if [ "${graph_exists}" = "false" ]; then + # if graph_name is default_graph, we should create it first + # otherwise, we should tell user to create it first + if [ "${graph_name}" != "${DATABASE_DEFAULT_GRAPH_NAME}" ]; then + err "graph [${graph_name}] not exists, please create it first" + exit 1 + fi + fi + + if [ "${graph_loaded}" = "false" ]; then + err "graph [${graph_name}] is empty, please import data first" + exit 1 + fi + + info "graph [${graph_name}] exists, start it" + + do_stop + ensure_container_running + # regenerate graph.yaml from graph0.yaml and override graph.yaml with stored procedure enable and disable + update_graph_yaml_with_procedure_enabling ${graph_name} + + # the bulk_load_file shoud place inside ${DATABASE_WORKSPACE}. and should use relative path + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + info "In start datebase, received graph_name = ${graph_name}, engine_config_file = ${engine_config_file}" + docker_server_log_path="${DATABASE_WORKSPACE}/logs/server.log" + graph_schema_file="${DATABASE_WORKSPACE}/data/${graph_name}/graph.yaml" + csr_data_dir="${DATABASE_WORKSPACE}/data/${graph_name}/indices" + cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c \"" + cmd="${cmd} ${DOCKER_DB_SERVER_BIN} -c ${dst_engine_config_file}" + cmd="${cmd} -g ${graph_schema_file} --data-path ${csr_data_dir}" + cmd="${cmd} --gie-home ${DOCKER_DB_GIE_HOME}" + cmd="${cmd} > ${docker_server_log_path} 2>&1 & \"" + echo "Running cmd: ${cmd}" + # eval command, if fails exist + eval ${cmd} || (echo "Fail to launch hqps server" && exit 1) + sleep 4 + # check whether the process is running + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_SERVER_BIN} ", use gs_interactive service get_log -o [dir] to see get logs" + info "Successfuly start server" + + # start compiler + docker_compiler_log_path="${DATABASE_WORKSPACE}/logs/compiler.log" + cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c \"" + cmd=${cmd}"java -cp \"${DOCKER_DB_GIE_HOME}/compiler/target/libs/*:${DOCKER_DB_GIE_HOME}/compiler/target/compiler-0.0.1-SNAPSHOT.jar\" " + cmd=${cmd}" -Djna.library.path=${DOCKER_DB_GIE_HOME}/executor/ir/target/release" + cmd=${cmd}" -Dgraph.schema=${graph_schema_file}" + # should error be reported? + # cmd=${cmd}" -Dgraph.stored.procedures.uri=file:${docker_graph_plugin_dir}" + cmd=${cmd}" ${DOCKER_DB_COMPILER_BIN} ${dst_engine_config_file} > ${docker_compiler_log_path} 2>&1 &" + cmd=${cmd}"\"" + info "Running cmd: ${cmd}" + eval ${cmd} + sleep 6 + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} ", use gs_interactive service get_log -o [dir] to see more details" + info "Successfuly start compiler" + # get cypher port from engine config file + # bolt_connector_port=$(parse_yaml "${engine_config_file}" | grep "compiler_endpoint_bolt_connector_port" | awk -F "=" '{print $2}') + emph "DataBase service is running, port is open on :${DATABASE_COMPILER_BOLT_PORT}" + + # if do_start success, we should write current args to ${HOST_DB_RUNNING_FILE} + echo "GRAPH_NAME=${graph_name}" > ${HOST_DB_RUNNING_FILE} + echo "ENGINE_CONFIG_FILE=${engine_config_file}" >> ${HOST_DB_RUNNING_FILE} + # create .lock file + docker_graph_lock_file="${DATABASE_WORKSPACE}/data/${graph_name}/.lock" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "touch ${docker_graph_lock_file}" || exit 1 +} + + +#################### Stop database #################### +function do_stop(){ + # if container is not running, do nothing + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + else + info "No running database found, do nothing" + fi + # get graph_name from ${HOST_DB_RUNNING_FILE} + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + docker_graph_lock_file="${DATABASE_WORKSPACE}/data/${graph_name}/.lock" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "rm -f ${docker_graph_lock_file}" || exit 1 + info "Successfuly remove ${docker_graph_lock_file} file" + else + info "No graph is running" + fi + + # stop the SERVER_BIN process and graph_server process + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "pkill -f ${DOCKER_DB_SERVER_BIN}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "pkill -f ${DOCKER_DB_COMPILER_BIN}" + sleep 6 + info "Successfuly stop database" +} + + +#################### Get database status #################### +function do_status() { + if [ "$(docker inspect -f '{{.State.Running}}' "${GIE_DB_CONTAINER_NAME}")" = "true" ]; then + info "container ${GIE_DB_CONTAINER_NAME} is running" + else + info "container ${GIE_DB_CONTAINER_NAME} is not running" + info "Please start database first" + fi + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # the container is running but the process is not running + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_SERVER_BIN} "The service is stopped or down. Use gs_interactive service get_log -o [dir] to see more details" + check_process_running_in_container ${GIE_DB_CONTAINER_NAME} ${DOCKER_DB_COMPILER_BIN} "The service is stopped or down. Use gs_interactive service get_log -o [dir] to see more details" + # get cypher port from engine config file in container + + docker_engine_config_file="${DATABASE_WORKSPACE}/conf/engine_config.yaml" + # copy the engine config file to host's tmp directory + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_engine_config_file}" "${HOST_DB_TMP_DIR}/engine_config.yaml" || exit 1 + eval $(parse_yaml "${HOST_DB_TMP_DIR}/engine_config.yaml") + emph "Database service is running, port is open on :${compiler_endpoint_bolt_connector_port}" + # print current running graph name + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + local graph_name=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + emph "Current running graph is [${graph_name}]" + else + err "No graph is running" + fi +} + + + +#################### Restart #################### +function do_restart() { + # read args from cached file. + # get num lines in file ${HOST_DB_RUNNING_FILE} + num_lines=$(wc -l < ${HOST_DB_RUNNING_FILE}) + if [ ${num_lines} -ne 2 ]; then + err "Error: ${HOST_DB_RUNNING_FILE} should have 2 lines, but got ${num_lines}, something wrong with the file ${HOST_DB_RUNNING_FILE}" + exit 1 + fi + # read args from file + GRAPH_NAME=$(sed -n '1p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + ENGINE_CONFIG_FILE=$(sed -n '2p' ${HOST_DB_RUNNING_FILE} | cut -d '=' -f 2) + # parse current args, override the args from file + info "Restarting database..." + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + GRAPH_NAME="$2" + shift # past argument + shift + ;; + -c | --config) + ENGINE_CONFIG_FILE="$2" + shift + shift + ;; + *) + err "unknown option $1" + restart_usage + exit 1 + ;; + esac + done + do_stop + info "Successfuly stop database" + do_start -g ${GRAPH_NAME} -c ${ENGINE_CONFIG_FILE} + info "Finish restarting database..." +} + +#################### Get log #################### +function do_log(){ + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -o | --output) + directory="$2" + shift # past argument + shift + ;; + *) + err "unknown option $1" + get_log_usage + exit 1 + ;; + esac + done + # check directory is set + if [ -z "${directory}" ]; then + err "output directory is not specified" + get_log_usage + exit 1 + fi + # get log directory in container + docker_log_dir="${DATABASE_WORKSPACE}/logs" + # copy ${docker_log_dir}/compiler.log and ${docker_log_dir}/server.log to ${directory} + docker_compiler_log="${docker_log_dir}/compiler.log" + docker_server_log="${docker_log_dir}/server.log" + # docker cp + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_compiler_log}" "${directory}/compiler.log" || exit 1 + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_server_log}" "${directory}/server.log" || exit 1 + info "Successfuly get log to ${directory}, please check compiler.log and server.log" +} + +# the compiled dynamic libs will be placed at data/${graph_name}/plugins/ +# after compilation, the user need to write the cooresponding yaml, telling the compiler about +# the input and output of the stored procedure +function do_compile() { + ensure_container_running + if [ $# -lt 4 ]; then + err "compile stored_procedure command at least 4 args, but got $#" + compile_usage + exit 1 + fi + compile_only=false + + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + info "graph_name = ${graph_name}" + shift # past argument + shift + ;; + -i | --input) + file_path="$2" + shift # past argument + shift + ;; + -n | --name) + stored_procedure_name="$2" + shift + shift + ;; + -d | --description) + stored_procedure_desc="$2" + shift + shift + ;; + --compile_only) + compile_only=true + shift + shift + ;; + *) + err "unknown option $1" + compile_usage + exit 1 + ;; + esac + done + + # check graph_name + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + compile_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + # check graph exists + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + + # check file_path + check_file_exists "${file_path}" + # get real file_path + file_name=$(basename "${file_path}") + # get file_name and assign to stored_procedure_name if stored_procedure_name is not set + if [ -z "${stored_procedure_name}" ]; then + stored_procedure_name="${file_name%.*}" + fi + real_file_path=$(realpath "${file_path}") + # check exists + if [ ! -f "${real_file_path}" ]; then + err "file ${real_file_path} not exist" + exit 1 + fi + + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + + real_engine_config_file="/tmp/real_engine_config.yaml" + # update default graph name + DATABASE_CURRENT_GRAPH_NAME=${graph_name} + generate_real_engine_conf "${real_engine_config_file}" + # copy to container + docker_engine_config="${DATABASE_WORKSPACE}/conf/engine_config.yaml" + docker cp "${real_engine_config_file}" "${GIE_DB_CONTAINER_NAME}:${docker_engine_config}" || exit 1 + + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker_graph_schema="${docker_graph_dir}/graph.yaml" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo -e \"${RED} Graph [${graph_name}] not exists, please create it first.${NC}\" && exit 1)" + + container_output_dir="${DATABASE_WORKSPACE}/data/${graph_name}/plugins" + cotainer_input_path="/tmp/${file_name}" + # docker cp file to container + cmd="docker cp ${real_file_path} ${GIE_DB_CONTAINER_NAME}:${cotainer_input_path}" + eval ${cmd} || exit 1 + + cmd="docker exec ${GIE_DB_CONTAINER_NAME} bash -c '" + cmd=${cmd}" ${DOCKER_DB_GEN_BIN}" + cmd=${cmd}" --engine_type=hqps" + cmd=${cmd}" --input=${cotainer_input_path}" + cmd=${cmd}" --work_dir=/tmp/codegen/" + cmd=${cmd}" --ir_conf=${docker_engine_config}" + cmd=${cmd}" --graph_schema_path=${docker_graph_schema}" + cmd=${cmd}" --gie_home=${DOCKER_DB_GIE_HOME}" + cmd=${cmd}" --output_dir=${container_output_dir}" + cmd=${cmd}" --procedure_name=${stored_procedure_name}" + if [ ! -z "${stored_procedure_desc}" ]; then + cmd=${cmd}" --procedure_desc=\"${stored_procedure_desc}\"" + fi + cmd=${cmd}" '" + + echo "Running cmd: ${cmd}" + eval ${cmd} || exit 1 + # check output exists + # remove the suffix of file_name + output_file="${HOST_DB_HOME}/data/${graph_name}/plugins/lib${stored_procedure_name}.so" + + if [ ! -f "${output_file}" ]; then + err "output file ${output_file} not exist, compilation failed" + exit 1 + fi + info "success generate dynamic lib ${output_file}." + + # if not compile_only, we should add the stored_procedure_name to .enable + docker_graph_enable_file="${docker_graph_dir}/plugins/.enable" + # copy container to host + rm -f /tmp/.enable + # if docker_graph_enable_file exists. copy it to host + docker exec "${GIE_DB_CONTAINER_NAME}" test -e "${docker_graph_enable_file}" && (docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable") + + if [ ! -f "/tmp/.enable" ]; then + touch "/tmp/.enable" + fi + # if compile_only equal to false + if [ "${compile_only}" = false ]; then + echo "${stored_procedure_name}" >> /tmp/.enable + fi + # copy back + docker cp "/tmp/.enable" "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" || exit 1 +} + +function do_enable(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -n | --name) + stored_procedure_names="$2" + shift + shift + ;; + -c | --config) + stored_procedure_names_yaml="$2" + shift + shift + ;; + *) + err "unknown option $1" + enable_proc_usage + exit 1 + ;; + esac + done + # check graph_name not empty + # check graph_name + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + enable_proc_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + + # --name and --config can not be set at the same time + if [ ! -z "${stored_procedure_names}" ] && [ ! -z "${stored_procedure_names_yaml}" ]; then + err "--name and --config can not be set at the same time" + enable_proc_usage + exit 1 + fi + # use stored_procedures_names_yaml if it is set + if [ ! -z "${stored_procedure_names_yaml}" ]; then + check_file_exists "${stored_procedure_names_yaml}" + # cat the file and get each line as a stored_procedure_name, join them with ',' + stored_procedure_names=$(< ${stored_procedure_names_yaml} tr '\n' ',' | sed 's/,$//') + fi + info "stored_procedure_names = ${stored_procedure_names}" + # add the names to .enable file for graph_name + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo -e \"${RED} Graph ${graph_name} not exists, please create it first.${NC}\" && exit 1)" + docker_graph_plugin_dir="${docker_graph_dir}/plugins" + docker_graph_enable_file="${docker_graph_plugin_dir}/.enable" + rm -f /tmp/.enable + # copy the .enable file to host, and append the stored_procedure_names to it; if the stored_procedure_names already exists, do nothing + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable" || true + if [ ! -f "/tmp/.enable" ]; then + touch "/tmp/.enable" + fi + old_line_num=$(wc -l < /tmp/.enable) + # split the stored_procedure_names by ',' and append them to .enable file + IFS=',' read -ra stored_procedure_names_array <<< "${stored_procedure_names}" + for stored_procedure_name in "${stored_procedure_names_array[@]}"; do + # check if the stored_procedure_name already exists in .enable file + if grep -q "${stored_procedure_name}" "/tmp/.enable"; then + info "stored_procedure_name ${stored_procedure_name} already exists in .enable file, skip" + else + echo "${stored_procedure_name}" >> /tmp/.enable + fi + done + # copy the .enable file back to container + docker cp "/tmp/.enable" "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" || exit 1 + new_line_num=$(wc -l < /tmp/.enable) + info "Successfuly enable stored_procedures ${stored_procedure_names} for graph [${graph_name}], ${old_line_num} -> ${new_line_num}" +} + +function do_disable(){ + disable_all=false + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift # past argument + shift + ;; + -n | --name) + stored_procedure_names="$2" + shift + shift + ;; + -c | --config) + stored_procedure_names_yaml="$2" + shift + shift + ;; + -a | --all) + disable_all=true + shift + ;; + *) + err "unknown option $1" + disable_proc_usage + exit 1 + ;; + esac + done + + # --name and --config can not be set at the same time + if [ ! -z "${stored_procedure_names}" ] && [ ! -z "${stored_procedure_names_yaml}" ] && [ "${disable_all}" = true ]; then + err "--name, --config and --all can not be set at the same time" + disable_proc_usage + exit 1 + fi + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # check graph_name not empty + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + disable_proc_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + + info "graph_name = ${graph_name}" + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo \"graph ${graph_name} not exists, please create it first\" && exit 1)" + docker_graph_plugin_dir="${docker_graph_dir}/plugins" + docker_graph_enable_file="${docker_graph_plugin_dir}/.enable" + echo "disable_all = ${disable_all}" + if [ "${disable_all}" = true ]; then + # clear the .enable file + info "disable all stored_procedures for graph [${graph_name}]" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "(rm \"\" > ${docker_graph_enable_file}) || exit 0" || exit 1 + info "Successfuly disable all stored_procedures for graph [${graph_name}]" + exit 0 + fi + + # use stored_procedures_names_yaml if it is set + if [ ! -z "${stored_procedure_names_yaml}" ]; then + check_file_exists "${stored_procedure_names_yaml}" + # cat the file and get each line as a stored_procedure_name, join them with ',' + stored_procedure_names=$(< ${stored_procedure_names_yaml} tr '\n' ',' | sed 's/,$//') + fi + info "stored_procedure_names = ${stored_procedure_names}" + # add the names to .enable file for graph_name + + # copy the .enable file to host, and remove the stored_procedure_names from it + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable" || exit 1 + old_line_num=$(wc -l < /tmp/.enable) + # split the stored_procedure_names by ',' and remove them from .enable file + IFS=',' read -ra stored_procedure_names_array <<< "${stored_procedure_names}" + for stored_procedure_name in "${stored_procedure_names_array[@]}"; do + sed -i "/${stored_procedure_name}/d" /tmp/.enable + done + # copy the .enable file back to container + docker cp "/tmp/.enable" "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" || exit 1 + new_line_num=$(wc -l < /tmp/.enable) + info "Successfuly disable stored_procedures ${stored_procedure_names} for graph [${graph_name}], ${old_line_num} -> ${new_line_num}" +} + +function do_show(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -g | --graph) + graph_name="$2" + shift + shift + ;; + *) + err "unknown option $1" + show_stored_procedure_usage + exit 1 + ;; + esac + done + if [ -f "${HOST_DB_RUNNING_FILE}" ]; then + . ${HOST_DB_ENV_FILE} + fi + # check graph_name + if [ -z "${graph_name}" ]; then + # let user confirm that no graph_name is specified, will use default graph. + read -p "No graph_name is specified, will use default graph, continue? [y/n]" -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + info "Abort compile stored_procedure" + show_stored_procedure_usage + exit 1 + fi + info "Using default graph [${DATABASE_DEFAULT_GRAPH_NAME}]" + graph_name=${DATABASE_DEFAULT_GRAPH_NAME} + fi + check_graph_name_valid "${graph_name}" + if [ ! -d "${HOST_DB_HOME}/data/${graph_name}" ]; then + err "graph [${graph_name}] can not be found, please create it first" + exit 1 + fi + info "graph_name = ${graph_name}" + docker_graph_dir="${DATABASE_WORKSPACE}/data/${graph_name}" + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -d ${docker_graph_dir} ] || (echo -e \"${RED}Graph [${graph_name}] not exists, please create it first. ${NC}\" && exit 1)" || exit 1 + docker_graph_plugin_dir="${docker_graph_dir}/plugins" + docker_graph_enable_file="${docker_graph_plugin_dir}/.enable" + # check if docker_graph_enable_file exists, if not ,exit + docker exec "${GIE_DB_CONTAINER_NAME}" bash -c "[ -f ${docker_graph_enable_file} ] || (echo -e \"${RED}Graph [${graph_name}] has no procedures registered. ${NC}\" && exit 1)" || exit 1 + docker cp "${GIE_DB_CONTAINER_NAME}:${docker_graph_enable_file}" "/tmp/.enable" || exit 1 + info "Enabled stored_procedures for graph: [${graph_name}]" + # iterate the .enable file and print the stored_procedure_name + while read -r line; do + emph "Procedure: ${line}" + done < /tmp/.enable +} + +function do_database(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + create) + shift + do_create "$@" + exit 0 + ;; + remove) + shift + do_remove "$@" + exit 0 + ;; + import) + shift + do_import "$@" + exit 0 + ;; + list) + shift + do_list "$@" + exit 0 + ;; + *) + err "unknown option $1" + database_usage + exit 1 + ;; + esac + done +} + +function do_service(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + start) + shift + do_start "$@" + exit 0 + ;; + stop) + shift + do_stop "$@" + exit 0 + ;; + restart) + shift + do_restart "$@" + exit 0 + ;; + status) + shift + do_status "$@" + exit 0 + ;; + get_log) + shift + do_log "$@" + exit 0 + ;; + *) + err "unknown option $1" + services_usage + exit 1 + ;; + esac + done +} + +function do_procedure(){ + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + compile) + shift + do_compile "$@" + exit 0 + ;; + enable) + shift + do_enable "$@" + exit 0 + ;; + disable) + shift + do_disable "$@" + exit 0 + ;; + show) + shift + do_show "$@" + exit 1 + ;; + *) + err "unknown option $1" + procedure_usage + exit 1 + ;; + esac + done + procedure_usage +} + +#################### Entry #################### +if [ $# -eq 0 ]; then + usage + exit 1 +fi + +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + -h | --help) + usage + exit + ;; + init) + shift + info "Start initiating database..." + do_init "$@" + exit 0 + ;; + database) + shift + check_database_initialized + do_database "$@" + exit 0 + ;; + service) + shift + check_database_initialized + do_service "$@" + exit 0 + ;; + procedure) + shift + check_database_initialized + do_procedure "$@" + exit 0 + ;; + destroy) + shift + do_destroy "$@" + exit 0 + ;; + *) # unknown option + err "unknown option $1" + usage + exit 1 + ;; + esac +done + + + + diff --git a/flex/interactive/conf/engine_config.yaml b/flex/interactive/conf/engine_config.yaml new file mode 100644 index 000000000000..0bb4487bda46 --- /dev/null +++ b/flex/interactive/conf/engine_config.yaml @@ -0,0 +1,13 @@ +log_level: INFO # default INFO +default_graph: modern # configure the graph to be loaded while starting the service, if graph name not specified +compute_engine: + shard_num: 1 # the number of shared workers, default 1 +compiler: + planner: + is_on: true + opt: RBO + rules: + - FilterMatchRule + - FilterIntoJoinRule + - NotExistToAntiJoinRule + query_timeout: 20000 # query timeout in milliseconds, default 2000 \ No newline at end of file diff --git a/flex/interactive/conf/interactive.properties b/flex/interactive/conf/interactive.properties deleted file mode 100755 index 9dbf43af1ece..000000000000 --- a/flex/interactive/conf/interactive.properties +++ /dev/null @@ -1,8 +0,0 @@ -engine.type: hiactor -hiactor.hosts: localhost:10000 -graph.store: exp -graph.schema: file:../data/ldbc/graph.json -graph.stored.procedures.uri: file:/tmp -graph.planner: {"isOn":true,"opt":"RBO","rules":["FilterIntoJoinRule", "FilterMatchRule", "NotMatchToAntiJoinRule"]} -gremlin.server.disabled: true -neo4j.bolt.server.port: 7687 \ No newline at end of file diff --git a/flex/interactive/conf/interactive.yaml b/flex/interactive/conf/interactive.yaml index 969b6ca80401..815c4752be24 100755 --- a/flex/interactive/conf/interactive.yaml +++ b/flex/interactive/conf/interactive.yaml @@ -1,23 +1,3 @@ ---- -version: 0.0.1 -directories: - workspace: /home/graphscope/interactive/ - subdirs: - data: data # by default data, relative to ${workspace} - conf: conf # by default conf, relative to ${workspace} - logs: logs # by default logs, relative to ${workspace} -logLevel: INFO # default INFO -default_graph: modern # configure the graph to be loaded while starting the service, if graph name not specified - # may include other configuration items of other engines -compute_engine: - type: hiactor - hosts: - - localhost:10000 # currently only one host can be specified - shared_num: 1 # the number of shared workers, default 1 -compiler: - planner: {"isOn":true,"opt":"RBO","rules":["FilterMatchRule"]} # Confirm这个配置 - endpoint: - default_listen_address: localhost # default localhost - bolt_connector: # for cypher, there may be other connectors, such as bolt_connector, https_connector - enabled: true # default false - port: 7687 +version: v0.0.2 +#volume: +# - {HOST_PATH}:${CONTAINER_PATH} \ No newline at end of file diff --git a/flex/interactive/data/ldbc/graph.json b/flex/interactive/data/ldbc/graph.json deleted file mode 100755 index f16dd1710336..000000000000 --- a/flex/interactive/data/ldbc/graph.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "entities": [ - { - "label": { - "id": 1, - "name": "software" - }, - "columns": [ - { - "key": { - "id": 4, - "name": "id" - }, - "data_type": 1, - "is_primary_key": false - }, - { - "key": { - "id": 0, - "name": "name" - }, - "data_type": 4, - "is_primary_key": false - }, - { - "key": { - "id": 2, - "name": "lang" - }, - "data_type": 4, - "is_primary_key": false - } - ] - }, - { - "label": { - "id": 0, - "name": "person" - }, - "columns": [ - { - "key": { - "id": 4, - "name": "id" - }, - "data_type": 1, - "is_primary_key": false - }, - { - "key": { - "id": 0, - "name": "name" - }, - "data_type": 4, - "is_primary_key": false - }, - { - "key": { - "id": 1, - "name": "age" - }, - "data_type": 1, - "is_primary_key": false - } - ] - } - ], - "relations": [ - { - "label": { - "id": 0, - "name": "knows" - }, - "entity_pairs": [ - { - "src": { - "id": 0, - "name": "person" - }, - "dst": { - "id": 0, - "name": "person" - } - } - ], - "columns": [ - { - "key": { - "id": 3, - "name": "weight" - }, - "data_type": 3, - "is_primary_key": false - } - ] - }, - { - "label": { - "id": 1, - "name": "created" - }, - "entity_pairs": [ - { - "src": { - "id": 0, - "name": "person" - }, - "dst": { - "id": 1, - "name": "software" - } - } - ], - "columns": [ - { - "key": { - "id": 3, - "name": "weight" - }, - "data_type": 3, - "is_primary_key": false - } - ] - } - ], - "is_table_id": true, - "is_column_id": false -} \ No newline at end of file diff --git a/flex/interactive/data/ldbc/graph.yaml b/flex/interactive/data/ldbc/graph.yaml deleted file mode 100755 index c37d4731b071..000000000000 --- a/flex/interactive/data/ldbc/graph.yaml +++ /dev/null @@ -1,70 +0,0 @@ -name: modern # then must have a modern dir under ${data} directory -store_type: mutable_csr # v6d, groot, gart -stored_procedures: - directory: plugins # default plugins, relative to ${workspace}/${name} -schema: - vertex_types: - - type_name: person - x_csr_params: - max_vertex_num: 100 - properties: - - property_id: 0 - property_name: id - property_type: - primitive_type: DT_SIGNED_INT64 - - property_id: 1 - property_name: name - property_type: - primitive_type: DT_STRING - - property_id: 2 - property_name: age - property_type: - primitive_type: DT_SIGNED_INT32 - primary_keys: - - id - - type_name: software - x_csr_params: - max_vertex_num: 100 - properties: - - property_id: 0 - property_name: id - property_type: - primitive_type: DT_SIGNED_INT64 - x_csr_params: - - property_id: 1 - property_name: name - property_type: - primitive_type: DT_STRING - - property_id: 2 - property_name: lang - property_type: - primitive_type: DT_STRING - primary_keys: - - id - edge_types: - - type_name: knows - x_csr_params: - incoming_edge_strategy: None - outgoing_edge_strategy: Multiple - vertex_type_pair_relations: - source_vertex: person - destination_vertex: person - relation: MANY_TO_MANY - properties: - - property_id: 0 - property_name: weight - property_type: - primitive_type: DT_DOUBLE - - type_name: created - x_csr_params: - incoming_edge_strategy: None - outgoing_edge_strategy: Single - vertex_type_pair_relations: - source_vertex: person - destination_vertex: software - relation: ONE_TO_MANY - properties: - - property_id: 0 - property_name: weight - property_type: - primitive_type: DT_DOUBLE diff --git a/flex/interactive/docker/interactive-runtime.Dockerfile b/flex/interactive/docker/interactive-runtime.Dockerfile index 1b8ee02de93e..399b67f91d4a 100755 --- a/flex/interactive/docker/interactive-runtime.Dockerfile +++ b/flex/interactive/docker/interactive-runtime.Dockerfile @@ -9,4 +9,5 @@ RUN cd /home/graphscope/ && git clone -b main --single-branch https://github.com cd GraphScope/flex && mkdir build && cd build && cmake .. -DBUILD_DOC=OFF && sudo make -j install # install graphscope GIE -RUN . /home/graphscope/.cargo/env && cd /home/graphscope/GraphScope/interactive_engine/compiler && make build +RUN . /home/graphscope/.cargo/env && cd /home/graphscope/GraphScope/interactive_engine && \ + mvn clean install -DskipTests -Drevision=0.0.1-SNAPSHOT -Pexperimental diff --git a/flex/interactive/examples/modern_graph b/flex/interactive/examples/modern_graph deleted file mode 120000 index 8ed59122aab3..000000000000 --- a/flex/interactive/examples/modern_graph +++ /dev/null @@ -1 +0,0 @@ -../../storages/rt_mutable_graph/modern_graph/ \ No newline at end of file diff --git a/flex/storages/rt_mutable_graph/modern_graph/bulk_load.yaml b/flex/interactive/examples/modern_graph/bulk_load.yaml similarity index 96% rename from flex/storages/rt_mutable_graph/modern_graph/bulk_load.yaml rename to flex/interactive/examples/modern_graph/bulk_load.yaml index 8d9085d81aca..739302554d6e 100644 --- a/flex/storages/rt_mutable_graph/modern_graph/bulk_load.yaml +++ b/flex/interactive/examples/modern_graph/bulk_load.yaml @@ -2,7 +2,7 @@ graph: modern loading_config: data_source: scheme: file # file, oss, s3, hdfs; only file is supported now - # location: # specify it or use FLEX_DATA_DIR env. + #location: /home/graphscope/gs_interactive_default_graph/ import_option: init # append, overwrite, only init is supported now format: type: csv diff --git a/flex/interactive/examples/modern_graph/count_vertex_num.cypher b/flex/interactive/examples/modern_graph/count_vertex_num.cypher new file mode 100644 index 000000000000..cca16c40269d --- /dev/null +++ b/flex/interactive/examples/modern_graph/count_vertex_num.cypher @@ -0,0 +1 @@ +MATCH(v:person { id: $personId}) RETURN COUNT(v); \ No newline at end of file diff --git a/flex/interactive/examples/modern_graph/get_person_name.cypher b/flex/interactive/examples/modern_graph/get_person_name.cypher new file mode 100644 index 000000000000..ce4ee61d64ea --- /dev/null +++ b/flex/interactive/examples/modern_graph/get_person_name.cypher @@ -0,0 +1 @@ +MATCH(p : person {id: $personId}) RETURN p.firstName, p.lastName; \ No newline at end of file diff --git a/flex/storages/rt_mutable_graph/modern_graph/modern_graph.yaml b/flex/interactive/examples/modern_graph/modern_graph.yaml similarity index 92% rename from flex/storages/rt_mutable_graph/modern_graph/modern_graph.yaml rename to flex/interactive/examples/modern_graph/modern_graph.yaml index 7823b3fd7561..7d6308bb96b0 100644 --- a/flex/storages/rt_mutable_graph/modern_graph/modern_graph.yaml +++ b/flex/interactive/examples/modern_graph/modern_graph.yaml @@ -1,9 +1,5 @@ name: modern # then must have a modern dir under ${data} directory store_type: mutable_csr # v6d, groot, gart -stored_procedures: - directory: plugins # default plugins, relative to ${workspace}/${name} - enable_lists: - - ldbc_ic1 schema: vertex_types: - type_id: 0 diff --git a/flex/storages/rt_mutable_graph/modern_graph/person.csv b/flex/interactive/examples/modern_graph/person.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/person.csv rename to flex/interactive/examples/modern_graph/person.csv diff --git a/flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv b/flex/interactive/examples/modern_graph/person_created_software.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/person_created_software.csv rename to flex/interactive/examples/modern_graph/person_created_software.csv diff --git a/flex/storages/rt_mutable_graph/modern_graph/person_knows_person.csv b/flex/interactive/examples/modern_graph/person_knows_person.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/person_knows_person.csv rename to flex/interactive/examples/modern_graph/person_knows_person.csv diff --git a/flex/storages/rt_mutable_graph/modern_graph/software.csv b/flex/interactive/examples/modern_graph/software.csv similarity index 100% rename from flex/storages/rt_mutable_graph/modern_graph/software.csv rename to flex/interactive/examples/modern_graph/software.csv diff --git a/flex/interactive/examples/movies/ACTED_IN.csv b/flex/interactive/examples/movies/ACTED_IN.csv new file mode 100644 index 000000000000..e9494b38ca6d --- /dev/null +++ b/flex/interactive/examples/movies/ACTED_IN.csv @@ -0,0 +1,173 @@ +start|end +1|0 +2|0 +3|0 +4|0 +8|0 +1|9 +2|9 +3|9 +4|9 +1|10 +2|10 +3|10 +4|10 +1|11 +12|11 +13|11 +16|15 +17|15 +18|15 +19|15 +20|15 +21|15 +22|15 +23|15 +24|15 +25|15 +26|15 +28|15 +16|29 +30|29 +31|29 +32|29 +33|29 +34|29 +16|37 +22|37 +38|37 +39|37 +40|37 +41|37 +42|37 +43|37 +44|37 +49|46 +47|46 +40|46 +48|46 +50|46 +20|46 +51|46 +17|52 +53|52 +54|52 +22|52 +60|56 +22|56 +57|56 +58|56 +59|56 +63|62 +64|62 +58|62 +65|62 +71|67 +34|67 +54|67 +68|67 +69|67 +70|67 +71|73 +34|73 +74|73 +75|73 +76|73 +77|73 +71|78 +34|78 +80|78 +82|81 +34|81 +83|81 +84|81 +71|85 +86|85 +12|85 +1|87 +88|87 +89|87 +90|87 +51|92 +93|92 +94|92 +70|92 +60|95 +80|95 +89|95 +98|97 +99|97 +89|97 +1|100 +101|100 +102|100 +103|100 +71|105 +4|105 +106|105 +107|105 +71|111 +112|111 +113|111 +114|111 +4|116 +117|116 +118|116 +119|116 +120|116 +122|121 +123|121 +124|121 +125|121 +126|121 +127|121 +120|121 +127|128 +129|128 +64|128 +120|128 +71|130 +131|130 +132|130 +42|130 +65|130 +133|130 +134|130 +135|130 +138|137 +139|137 +19|137 +140|137 +133|137 +17|141 +142|141 +24|141 +143|141 +71|144 +19|144 +145|144 +146|144 +134|144 +146|147 +53|147 +94|147 +148|147 +71|150 +53|150 +17|152 +142|152 +17|154 +155|154 +1|154 +60|157 +140|157 +71|159 +160|159 +148|159 +71|161 +71|162 +164|162 +165|162 +77|162 +163|162 +146|162 diff --git a/flex/interactive/examples/movies/DIRECTED.csv b/flex/interactive/examples/movies/DIRECTED.csv new file mode 100644 index 000000000000..8063205f7546 --- /dev/null +++ b/flex/interactive/examples/movies/DIRECTED.csv @@ -0,0 +1,45 @@ +start|end +5|0 +6|0 +5|9 +6|9 +5|10 +6|10 +14|11 +27|15 +35|29 +45|37 +27|46 +55|52 +61|56 +66|62 +72|67 +72|73 +79|78 +27|81 +71|85 +91|87 +59|92 +96|95 +99|97 +104|100 +108|105 +5|105 +6|105 +115|111 +25|116 +5|121 +6|121 +25|128 +136|130 +115|137 +142|141 +115|144 +149|147 +151|150 +153|152 +156|154 +158|157 +96|159 +151|161 +166|162 diff --git a/flex/interactive/examples/movies/FOLLOWS.csv b/flex/interactive/examples/movies/FOLLOWS.csv new file mode 100644 index 000000000000..f8bb4bce99a4 --- /dev/null +++ b/flex/interactive/examples/movies/FOLLOWS.csv @@ -0,0 +1,4 @@ +start|end +170|169 +168|169 +167|168 diff --git a/flex/interactive/examples/movies/Movie.csv b/flex/interactive/examples/movies/Movie.csv new file mode 100644 index 000000000000..500cfd6e0aac --- /dev/null +++ b/flex/interactive/examples/movies/Movie.csv @@ -0,0 +1,39 @@ +id|released|tagline|title +0|1999|Welcome to the Real World|The Matrix +9|2003|Free your mind|The Matrix Reloaded +10|2003|Everything that has a beginning has an end|The Matrix Revolutions +11|1997|Evil has its winning ways|The Devil's Advocate +15|1992|In the heart of the nation's capital, in a courthouse of the U.S. government, one man will stop at nothing to keep his honor, and one will stop at nothing to find the truth.|A Few Good Men +29|1986|I feel the need, the need for speed.|Top Gun +37|2000|The rest of his life begins now.|Jerry Maguire +46|1986|For some, it's the last real taste of innocence, and the first real taste of life. But for everyone, it's the time that memories are made of.|Stand By Me +52|1997|A comedy from the heart that goes for the throat.|As Good as It Gets +56|1998|After life there is more. The end is just the beginning.|What Dreams May Come +62|1999|First loves last. Forever.|Snow Falling on Cedars +67|1998|At odds in life... in love on-line.|You've Got Mail +73|1993|What if someone you never met, someone you never saw, someone you never knew was the only someone for you?|Sleepless in Seattle +78|1990|A story of love, lava and burning desire.|Joe Versus the Volcano +81|1998|Can two friends sleep together and still love each other in the morning?|When Harry Met Sally +85|1996|In every life there comes a time when that thing you dream becomes that thing you do|That Thing You Do +87|2000|Pain heals, Chicks dig scars... Glory lasts forever|The Replacements +92|2006|Based on the extraordinary true story of one man's fight for freedom|RescueDawn +95|1996|Come as you are|The Birdcage +97|1992|It's a hell of a thing, killing a man|Unforgiven +100|1995|The hottest data on earth. In the coolest head in town|Johnny Mnemonic +105|2012|Everything is connected|Cloud Atlas +111|2006|Break The Codes|The Da Vinci Code +116|2006|Freedom! Forever!|V for Vendetta +121|2008|Speed has no limits|Speed Racer +128|2009|Prepare to enter a secret world of assassins|Ninja Assassin +130|1999|Walk a mile you'll never forget.|The Green Mile +137|2008|400 million people were waiting for the truth.|Frost/Nixon +141|1992|He didn't want law. He wanted justice.|Hoffa +144|1995|Houston, we have a problem.|Apollo 13 +147|1996|Don't Breathe. Don't Look Back.|Twister +150|2000|At the edge of the world, his journey begins.|Cast Away +152|1975|If he's crazy, what does that make you?|One Flew Over the Cuckoo's Nest +154|2003|nan|Something's Gotta Give +157|1999|One robot's 200 year journey to become an ordinary man.|Bicentennial Man +159|2007|A stiff drink. A little mascara. A lot of nerve. Who said they couldn't bring down the Soviet empire.|Charlie Wilson's War +161|2004|This Holiday Season... Believe|The Polar Express +162|1992|Once in a lifetime you get a chance to do something different.|A League of Their Own diff --git a/flex/interactive/examples/movies/PRODUCED.csv b/flex/interactive/examples/movies/PRODUCED.csv new file mode 100644 index 000000000000..5a58e7fd9f51 --- /dev/null +++ b/flex/interactive/examples/movies/PRODUCED.csv @@ -0,0 +1,16 @@ +start|end +7|0 +7|9 +7|10 +45|37 +27|81 +72|81 +110|105 +5|116 +6|116 +7|116 +7|121 +5|128 +6|128 +7|128 +156|154 diff --git a/flex/interactive/examples/movies/Person.csv b/flex/interactive/examples/movies/Person.csv new file mode 100644 index 000000000000..ccec81a744cf --- /dev/null +++ b/flex/interactive/examples/movies/Person.csv @@ -0,0 +1,134 @@ +id|born|name +1|1964|Keanu Reeves +2|1967|Carrie-Anne Moss +3|1961|Laurence Fishburne +4|1960|Hugo Weaving +5|1967|Lilly Wachowski +6|1965|Lana Wachowski +7|1952|Joel Silver +8|1978|Emil Eifrem +12|1975|Charlize Theron +13|1940|Al Pacino +14|1944|Taylor Hackford +16|1962|Tom Cruise +17|1937|Jack Nicholson +18|1962|Demi Moore +19|1958|Kevin Bacon +20|1966|Kiefer Sutherland +21|1971|Noah Wyle +22|1968|Cuba Gooding Jr. +23|1957|Kevin Pollak +24|1943|J.T. Walsh +25|1967|James Marshall +26|1948|Christopher Guest +27|1947|Rob Reiner +28|1961|Aaron Sorkin +30|1957|Kelly McGillis +31|1959|Val Kilmer +32|1962|Anthony Edwards +33|1933|Tom Skerritt +34|1961|Meg Ryan +35|1944|Tony Scott +36|1941|Jim Cash +38|1969|Renee Zellweger +39|1962|Kelly Preston +40|1974|Jerry O'Connell +41|1970|Jay Mohr +42|1961|Bonnie Hunt +43|1971|Regina King +44|1996|Jonathan Lipnicki +45|1957|Cameron Crowe +47|1970|River Phoenix +48|1971|Corey Feldman +49|1972|Wil Wheaton +50|1966|John Cusack +51|1942|Marshall Bell +53|1963|Helen Hunt +54|1963|Greg Kinnear +55|1940|James L. Brooks +57|1960|Annabella Sciorra +58|1929|Max von Sydow +59|1942|Werner Herzog +60|1951|Robin Williams +61|1956|Vincent Ward +63|1970|Ethan Hawke +64|1971|Rick Yune +65|1940|James Cromwell +66|1953|Scott Hicks +68|1968|Parker Posey +69|1973|Dave Chappelle +70|1967|Steve Zahn +71|1956|Tom Hanks +72|1941|Nora Ephron +74|1956|Rita Wilson +75|1953|Bill Pullman +76|1949|Victor Garber +77|1962|Rosie O'Donnell +79|1950|John Patrick Stanley +80|1956|Nathan Lane +82|1948|Billy Crystal +83|1956|Carrie Fisher +84|1949|Bruno Kirby +86|1977|Liv Tyler +88|1970|Brooke Langton +89|1930|Gene Hackman +90|1968|Orlando Jones +91|1950|Howard Deutch +93|1974|Christian Bale +94|1954|Zach Grenier +96|1931|Mike Nichols +98|1930|Richard Harris +99|1930|Clint Eastwood +101|1947|Takeshi Kitano +102|1968|Dina Meyer +103|1958|Ice-T +104|1953|Robert Longo +106|1966|Halle Berry +107|1949|Jim Broadbent +108|1965|Tom Tykwer +109|1969|David Mitchell +110|1961|Stefan Arndt +112|1939|Ian McKellen +113|1976|Audrey Tautou +114|1971|Paul Bettany +115|1954|Ron Howard +117|1981|Natalie Portman +118|1946|Stephen Rea +119|1940|John Hurt +120|1967|Ben Miles +122|1985|Emile Hirsch +123|1960|John Goodman +124|1946|Susan Sarandon +125|1966|Matthew Fox +126|1980|Christina Ricci +127|1982|Rain +129|2003|Naomie Harris +131|1957|Michael Clarke Duncan +132|1953|David Morse +133|1968|Sam Rockwell +134|1955|Gary Sinise +135|1959|Patricia Clarkson +136|1959|Frank Darabont +138|1938|Frank Langella +139|1969|Michael Sheen +140|1960|Oliver Platt +142|1944|Danny DeVito +143|1965|John C. Reilly +145|1950|Ed Harris +146|1955|Bill Paxton +148|1967|Philip Seymour Hoffman +149|1943|Jan de Bont +151|1951|Robert Zemeckis +153|1932|Milos Forman +155|1946|Diane Keaton +156|1949|Nancy Meyers +158|1958|Chris Columbus +160|1967|Julia Roberts +163|1954|Madonna +164|1956|Geena Davis +165|1963|Lori Petty +166|1943|Penny Marshall +167|2003|Paul Blythe +168|2003|Angela Scope +169|2003|Jessica Thompson +170|2003|James Thompson diff --git a/flex/interactive/examples/movies/REVIEWED.csv b/flex/interactive/examples/movies/REVIEWED.csv new file mode 100644 index 000000000000..ab81554f3d8e --- /dev/null +++ b/flex/interactive/examples/movies/REVIEWED.csv @@ -0,0 +1,10 @@ +start|end|review|rating +169|105|yes|95 +169|87|bravo|65 +170|87|good|100 +168|87|bad|62 +169|97|cool|85 +169|95|poor|45 +169|111|bad|68 +170|111|bad|65 +169|37|bravo|92 diff --git a/flex/interactive/examples/movies/WROTE.csv b/flex/interactive/examples/movies/WROTE.csv new file mode 100644 index 000000000000..794dde4cb6e2 --- /dev/null +++ b/flex/interactive/examples/movies/WROTE.csv @@ -0,0 +1,11 @@ +start|end +28|15 +36|29 +45|37 +72|81 +109|105 +5|116 +6|116 +5|121 +6|121 +156|154 diff --git a/flex/interactive/examples/movies/graph.yaml b/flex/interactive/examples/movies/graph.yaml new file mode 100644 index 000000000000..c90fa0d7fc5a --- /dev/null +++ b/flex/interactive/examples/movies/graph.yaml @@ -0,0 +1,67 @@ +name: movies +schema: + vertex_types: + - type_name: Movie + properties: + - property_name: id + property_type: + primitive_type: DT_SIGNED_INT64 + - property_name: release + property_type: + primitive_type: DT_SIGNED_INT32 + - property_name: tagline + property_type: + primitive_type: DT_STRING + - property_name: title + property_type: + primitive_type: DT_STRING + primary_keys: + - id + - type_name: Person + properties: + - property_name: id + property_type: + primitive_type: DT_SIGNED_INT64 + - property_name: born + property_type: + primitive_type: DT_SIGNED_INT32 + - property_name: name + property_type: + primitive_type: DT_STRING + primary_keys: + - id + edge_types: + - type_name: ACTED_IN + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + - type_name: DIRECTED + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + - type_name: REVIEW + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + properties: + - property_name: rating + property_type: + primitive_type: DT_SIGNED_INT32 + - type_name: FOLLOWS + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Person + relation: MANY_TO_MANY + - type_name: WROTE + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY + - type_name: PRODUCED + vertex_type_pair_relations: + - source_vertex: Person + destination_vertex: Movie + relation: MANY_TO_MANY diff --git a/flex/interactive/examples/movies/import.yaml b/flex/interactive/examples/movies/import.yaml new file mode 100644 index 000000000000..778a1d9c1e1e --- /dev/null +++ b/flex/interactive/examples/movies/import.yaml @@ -0,0 +1,115 @@ +graph: movies +loading_config: + data_source: + scheme: file # file, oss, s3, hdfs; only file is supported now + location: /home/graphscope/movies/ + import_option: init # append, overwrite, only init is supported now + format: + type: csv + metadata: + delimiter: "|" # other loading configuration places here + header_row: true # whether to use the first row as the header + quoting: false + quote_char: '"' + double_quote: true + escape_char: '\' + escaping: false + block_size: 4MB + batch_reader: true +vertex_mappings: + - type_name: Person # must align with the schema + inputs: + - Person.csv + - type_name: Movie + inputs: + - Movie.csv +edge_mappings: + - type_triplet: + edge: ACTED_IN + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - ACTED_IN.csv + - type_triplet: + edge: DIRECTED + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - DIRECTED.csv + - type_triplet: + edge: FOLLOWS + source_vertex: Person + destination_vertex: Person + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - FOLLOWS.csv + - type_triplet: + edge: PRODUCED + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - PRODUCED.csv + - type_triplet: + edge: REVIEW + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + column_mappings: + - column: + index: 3 + name: rating + property: rating + inputs: + - REVIEWED.csv + - type_triplet: + edge: WROTE + source_vertex: Person + destination_vertex: Movie + source_vertex_mappings: + - column: + index: 0 + name: id + destination_vertex_mappings: + - column: + index: 1 + name: id + inputs: + - WROTE.csv diff --git a/flex/storages/rt_mutable_graph/loading_config.cc b/flex/storages/rt_mutable_graph/loading_config.cc index 39e99ecb13e0..cfd8336a4dd5 100644 --- a/flex/storages/rt_mutable_graph/loading_config.cc +++ b/flex/storages/rt_mutable_graph/loading_config.cc @@ -62,11 +62,11 @@ static bool fetch_src_dst_column_mapping(const Schema& schema, YAML::Node node, return false; } } - + return true; } else { - LOG(ERROR) << "No primary key column mapping for [" << key << "]"; + LOG(WARNING) << "No primary key column mapping for [" << key << "]"; + return false; } - return true; } // Function to parse memory size represented as a string @@ -124,7 +124,10 @@ static bool parse_column_mappings( std::string property_name; // property name is optional. if (!get_scalar(node[i], "property", property_name)) { - LOG(ERROR) << "Expect property name for column mapping"; + LOG(ERROR) << "Expect property name for column mapping, when parsing " + "column mapping for label: " + << label_name << ", column_id: " << column_id + << ", column_name: " << column_name; return false; } if (!condition(label_name, property_name)) { @@ -301,17 +304,18 @@ static bool parse_edge_files( if (!fetch_src_dst_column_mapping(schema, node, src_label_id, "source_vertex_mappings", src_columns)) { - LOG(ERROR) << "Field [source_vertex_mappings] is not set for edge [" - << src_label << "->[" << edge_label << "]->" << dst_label - << "]"; - return false; + LOG(WARNING) << "Field [source_vertex_mappings] is not set for edge [" + << src_label << "->[" << edge_label << "]->" << dst_label + << "], using default choice: column_id 0"; + src_columns.push_back(0); } if (!fetch_src_dst_column_mapping(schema, node, dst_label_id, "destination_vertex_mappings", dst_columns)) { - LOG(ERROR) << "Field [destination_vertex_mappings] is not set for edge[" - << src_label << "->[" << edge_label << "]->" << dst_label; - return false; + LOG(WARNING) << "Field [destination_vertex_mappings] is not set for edge[" + << src_label << "->[" << edge_label << "]->" << dst_label + << "], using default choice: column_id 1"; + dst_columns.push_back(1); } VLOG(10) << "src: " << src_label << ", dst: " << dst_label diff --git a/flex/storages/rt_mutable_graph/schema.cc b/flex/storages/rt_mutable_graph/schema.cc index 95e5e94fabd7..a552020e815a 100644 --- a/flex/storages/rt_mutable_graph/schema.cc +++ b/flex/storages/rt_mutable_graph/schema.cc @@ -705,7 +705,8 @@ static bool parse_schema_config_file(const std::string& path, Schema& schema) { return false; } if (!expect_config(graph_node, "store_type", std::string("mutable_csr"))) { - return false; + LOG(WARNING) << "store_type is not set properly, use default value: " + << "mutable_csr"; } auto schema_node = graph_node["schema"]; @@ -723,21 +724,60 @@ static bool parse_schema_config_file(const std::string& path, Schema& schema) { return false; } } + // get the directory of path + auto parent_dir = std::filesystem::path(path).parent_path().string(); if (graph_node["stored_procedures"]) { auto stored_procedure_node = graph_node["stored_procedures"]; auto directory = stored_procedure_node["directory"].as(); // check is directory if (!std::filesystem::exists(directory)) { - LOG(WARNING) << "plugin directory - " << directory << " not found..."; + LOG(ERROR) << "plugin directory - " << directory + << " not found, try with parent dir:" << parent_dir; + directory = parent_dir + "/" + directory; + if (!std::filesystem::exists(directory)) { + LOG(ERROR) << "plugin directory - " << directory << " not found..."; + return true; + } } + schema.SetPluginDir(directory); std::vector files_got; if (!get_sequence(stored_procedure_node, "enable_lists", files_got)) { LOG(ERROR) << "stored_procedures is not set properly"; + return true; } + std::vector all_procedure_yamls = get_yaml_files(directory); + std::vector all_procedure_names; + { + // get all procedure names + for (auto& f : all_procedure_yamls) { + YAML::Node procedure_node = YAML::LoadFile(f); + if (!procedure_node || !procedure_node.IsMap()) { + LOG(ERROR) << "procedure is not set properly"; + return false; + } + std::string procedure_name; + if (!get_scalar(procedure_node, "name", procedure_name)) { + LOG(ERROR) << "name is not set properly for " << f; + return false; + } + all_procedure_names.push_back(procedure_name); + } + } + for (auto& f : files_got) { - if (!std::filesystem::exists(f)) { - LOG(ERROR) << "plugin - " << f << " file not found..."; + auto real_file = directory + "/" + f; + if (!std::filesystem::exists(real_file)) { + LOG(ERROR) << "plugin - " << real_file << " file not found..."; + // it seems that f is not the filename, but the plugin name, try to find + // the plugin in the directory + if (std::find(all_procedure_names.begin(), all_procedure_names.end(), + f) == all_procedure_names.end()) { + LOG(ERROR) << "plugin - " << f << " not found..."; + } else { + VLOG(1) << "plugin - " << f << " found..."; + schema.EmplacePlugin(f); + } } else { schema.EmplacePlugin(std::filesystem::canonical(f)); } @@ -757,6 +797,10 @@ void Schema::EmplacePlugin(const std::string& plugin) { plugin_list_.emplace_back(plugin); } +void Schema::SetPluginDir(const std::string& dir) { plugin_dir_ = dir; } + +std::string Schema::GetPluginDir() const { return plugin_dir_; } + // check whether prop in vprop_names, or is the primary key bool Schema::vertex_has_property(const std::string& label, const std::string& prop) const { diff --git a/flex/storages/rt_mutable_graph/schema.h b/flex/storages/rt_mutable_graph/schema.h index c0a017088d79..81e6bff50ce7 100644 --- a/flex/storages/rt_mutable_graph/schema.h +++ b/flex/storages/rt_mutable_graph/schema.h @@ -148,6 +148,10 @@ class Schema { void EmplacePlugin(const std::string& plugin_name); + void SetPluginDir(const std::string& plugin_dir); + + std::string GetPluginDir() const; + private: label_t vertex_label_to_index(const std::string& label); @@ -169,6 +173,7 @@ class Schema { std::map ie_strategy_; std::vector max_vnum_; std::vector plugin_list_; + std::string plugin_dir_; }; } // namespace gs diff --git a/flex/storages/rt_mutable_graph/types.h b/flex/storages/rt_mutable_graph/types.h index 40c68b7d74a2..2b73eeaf83f4 100644 --- a/flex/storages/rt_mutable_graph/types.h +++ b/flex/storages/rt_mutable_graph/types.h @@ -35,7 +35,7 @@ static constexpr const char* DT_SIGNED_INT32 = "DT_SIGNED_INT32"; static constexpr const char* DT_SIGNED_INT64 = "DT_SIGNED_INT64"; static constexpr const char* DT_DOUBLE = "DT_DOUBLE"; static constexpr const char* DT_STRING = "DT_STRING"; -static constexpr const char* DT_DATE = "DT_DATE"; +static constexpr const char* DT_DATE = "DT_DATE32"; } // namespace gs diff --git a/flex/tests/hqps/hqps_cypher_test.sh b/flex/tests/hqps/hqps_cypher_test.sh index da5ce441b545..1cb4837dfccf 100644 --- a/flex/tests/hqps/hqps_cypher_test.sh +++ b/flex/tests/hqps/hqps_cypher_test.sh @@ -19,24 +19,29 @@ SERVER_BIN=${FLEX_HOME}/build/bin/sync_server GIE_HOME=${FLEX_HOME}/../interactive_engine/ # -if [ $# -lt 1 ]; then - echo "only receives: $# args, need 1" - echo "Usage: $0 " +if [ $# -lt 2 ]; then + echo "only receives: $# args, need 2" + echo "Usage: $0 " exit 1 fi GS_TEST_DIR=$1 +INTERACTIVE_WORKSPACE=$2 if [ ! -d ${GS_TEST_DIR} ]; then echo "GS_TEST_DIR: ${GS_TEST_DIR} not exists" exit 1 fi +if [ ! -d ${INTERACTIVE_WORKSPACE} ]; then + echo "INTERACTIVE_WORKSPACE: ${INTERACTIVE_WORKSPACE} not exists" + exit 1 +fi -GRAPH_CONFIG_PATH=${FLEX_HOME}/interactive/conf/interactive.yaml -GRAPH_SCHEMA_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml +ENGINE_CONFIG_PATH=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml +ORI_GRAPH_SCHEMA_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml +GRAPH_SCHEMA_YAML=${INTERACTIVE_WORKSPACE}/data/ldbc/graph.yaml GRAPH_BULK_LOAD_YAML=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_bulk_load.yaml -COMPILER_GRAPH_SCHEMA=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json +COMPILER_GRAPH_SCHEMA=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/audit_graph_schema.yaml GRAPH_CSR_DATA_DIR=${HOME}/csr-data-dir/ -HQPS_IR_CONF=/tmp/hqps.ir.properties # check if GRAPH_SCHEMA_YAML exists if [ ! -f ${GRAPH_SCHEMA_YAML} ]; then echo "GRAPH_SCHEMA_YAML: ${GRAPH_SCHEMA_YAML} not found" @@ -79,21 +84,6 @@ kill_service(){ # kill service when exit trap kill_service EXIT -create_ir_conf(){ - rm ${HQPS_IR_CONF} || true - echo "engine.type: hiactor" >> ${HQPS_IR_CONF} - echo "hiactor.hosts: localhost:10000" >> ${HQPS_IR_CONF} - echo "graph.store: exp" >> ${HQPS_IR_CONF} - echo "graph.schema: ${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ldbc_schema_csr_ic.json" >> ${HQPS_IR_CONF} - echo "graph.planner.is.on: true" >> ${HQPS_IR_CONF} - echo "graph.planner.opt: RBO" >> ${HQPS_IR_CONF} - echo "graph.planner.rules: FilterMatchRule,NotMatchToAntiJoinRule" >> ${HQPS_IR_CONF} - echo "gremlin.server.disabled: true" >> ${HQPS_IR_CONF} - echo "neo4j.bolt.server.port: 7687" >> ${HQPS_IR_CONF} - - echo "Finish generate HQPS_IR_CONF" - cat ${HQPS_IR_CONF} -} # start engine service and load ldbc graph start_engine_service(){ @@ -105,9 +95,9 @@ start_engine_service(){ # export FLEX_DATA_DIR export FLEX_DATA_DIR=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/ - cmd="${SERVER_BIN} -c ${GRAPH_CONFIG_PATH} -g ${GRAPH_SCHEMA_YAML} " + cmd="${SERVER_BIN} -c ${ENGINE_CONFIG_PATH} -g ${GRAPH_SCHEMA_YAML} " cmd="${cmd} --data-path ${GRAPH_CSR_DATA_DIR} -l ${GRAPH_BULK_LOAD_YAML} " - cmd="${cmd} -i ${HQPS_IR_CONF} -z ${COMPILER_GRAPH_SCHEMA} --gie-home ${GIE_HOME}" + cmd="${cmd} --gie-home ${GIE_HOME}" echo "Start engine service with command: ${cmd}" ${cmd} & @@ -122,10 +112,12 @@ start_engine_service(){ start_compiler_service(){ echo "try to start compiler service" pushd ${GIE_HOME}/compiler - cmd="make run graph.schema:=${COMPILER_GRAPH_SCHEMA} config.path=${HQPS_IR_CONF}" + cmd="make run graph.schema=${COMPILER_GRAPH_SCHEMA} config.path=${ENGINE_CONFIG_PATH}" echo "Start compiler service with command: ${cmd}" ${cmd} & sleep 5 + # check if Graph Server is running, if not exist + ps -ef | grep "com.alibaba.graphscope.GraphServer" | grep -v grep info "Start compiler service success" popd } @@ -151,7 +143,6 @@ run_simple_test(){ } kill_service -create_ir_conf start_engine_service start_compiler_service run_ldbc_test diff --git a/flex/utils/property/types.h b/flex/utils/property/types.h index 629e2a79a0f0..56b178297756 100644 --- a/flex/utils/property/types.h +++ b/flex/utils/property/types.h @@ -153,6 +153,28 @@ struct Any { return AnyConverter::to_any(value); } + bool operator==(const Any& other) const { + if (type == other.type) { + if (type == PropertyType::kInt32) { + return value.i == other.value.i; + } else if (type == PropertyType::kInt64) { + return value.l == other.value.l; + } else if (type == PropertyType::kDate) { + return value.d.milli_second == other.value.d.milli_second; + } else if (type == PropertyType::kString) { + return value.s == other.value.s; + } else if (type == PropertyType::kEmpty) { + return true; + } else if (type == PropertyType::kDouble) { + return value.db == other.value.db; + } else { + return false; + } + } else { + return false; + } + } + PropertyType type; AnyValue value; }; diff --git a/flex/utils/yaml_utils.cc b/flex/utils/yaml_utils.cc new file mode 100644 index 000000000000..2413e3b07fab --- /dev/null +++ b/flex/utils/yaml_utils.cc @@ -0,0 +1,32 @@ + +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "flex/utils/yaml_utils.h" +namespace gs { +std::vector get_yaml_files(const std::string& plugin_dir) { + std::filesystem::path dir_path = plugin_dir; + std::vector res_yaml_files; + + for (auto& entry : std::filesystem::directory_iterator(dir_path)) { + if (entry.is_regular_file() && (entry.path().extension() == ".yaml") || + (entry.path().extension() == ".yml")) { + res_yaml_files.emplace_back(entry.path()); + } + } + return res_yaml_files; +} + +} // namespace gs diff --git a/flex/utils/yaml_utils.h b/flex/utils/yaml_utils.h index ea0f37d8b7aa..fc4572fa1c40 100644 --- a/flex/utils/yaml_utils.h +++ b/flex/utils/yaml_utils.h @@ -21,7 +21,12 @@ #include #include +#include "glog/logging.h" + namespace gs { + +std::vector get_yaml_files(const std::string& plugin_dir); + namespace config_parsing { template bool get_scalar(YAML::Node node, const std::string& key, T& value) { diff --git a/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java b/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java index b9994a984a92..bdcedca48c1d 100644 --- a/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java +++ b/interactive_engine/common/src/main/java/com/alibaba/graphscope/groot/common/util/IrSchemaParser.java @@ -38,7 +38,7 @@ public static IrSchemaParser getInstance() { private IrSchemaParser() {} - public String parse(GraphSchema graphSchema) { + public String parse(GraphSchema graphSchema, boolean isColumnId) { List vertices = graphSchema.getVertexList(); List edges = graphSchema.getEdgeList(); List entities = new ArrayList(); @@ -49,7 +49,7 @@ public String parse(GraphSchema graphSchema) { schemaMap.put("entities", entities); schemaMap.put("relations", relations); schemaMap.put("is_table_id", true); - schemaMap.put("is_column_id", true); + schemaMap.put("is_column_id", isColumnId); return JSON.toJson(schemaMap); } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java index 267113925e02..badc9894fca2 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/procedure/GraphStoredProcedures.java @@ -20,11 +20,14 @@ import com.google.common.collect.Maps; import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.InputStream; import java.util.Map; public class GraphStoredProcedures implements StoredProcedures { + private static Logger logger = LoggerFactory.getLogger(GraphStoredProcedures.class); private final Map storedProcedureMetaMap; public GraphStoredProcedures(MetaDataReader reader) throws Exception { @@ -32,6 +35,7 @@ public GraphStoredProcedures(MetaDataReader reader) throws Exception { for (InputStream inputStream : reader.getStoredProcedures()) { StoredProcedureMeta createdMeta = StoredProcedureMeta.Deserializer.perform(inputStream); this.storedProcedureMetaMap.put(createdMeta.getName(), createdMeta); + logger.debug("Got stored procedure: {} from reader", createdMeta.getName()); inputStream.close(); } } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java index 09ffb4b5eede..bde5b87175f5 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/LocalMetaDataReader.java @@ -57,8 +57,13 @@ public List getStoredProcedures() throws IOException { GraphConfig.GRAPH_STORED_PROCEDURES_ENABLE_LISTS.get(configs)); List procedureInputs = Lists.newArrayList(); if (enableProcedureList.isEmpty()) { + logger.info("Load all procedures in {}", procedurePath); for (File file : procedureDir.listFiles()) { - procedureInputs.add(new FileInputStream(file)); + // if file is .yaml or .yml file + logger.info("Found procedure config {}", file.getName()); + if (file.getName().endsWith(".yaml") || file.getName().endsWith(".yml")) { + procedureInputs.add(new FileInputStream(file)); + } } } else { Map procedureInputMap = @@ -81,8 +86,12 @@ private Map getProcedureNameWithInputStream(File procedureD throws IOException { Map procedureInputMap = Maps.newHashMap(); for (File file : procedureDir.listFiles()) { + if (!file.getName().endsWith(".yaml") && !file.getName().endsWith(".yml")) { + continue; + } String procedureName = getProcedureName(file); procedureInputMap.put(procedureName, new FileInputStream(file)); + logger.debug("load procedure {}", procedureName); } return procedureInputMap; } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java index 9e5229b7a44c..bdec124ad9fe 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/IrGraphSchema.java @@ -36,6 +36,7 @@ public class IrGraphSchema implements GraphSchema { private final boolean isColumnId; public IrGraphSchema(MetaDataReader dataReader) throws Exception { + this.isColumnId = false; SchemaInputStream schemaInputStream = dataReader.getGraphSchema(); String content = new String( @@ -44,19 +45,19 @@ public IrGraphSchema(MetaDataReader dataReader) throws Exception { switch (schemaInputStream.getFormatType()) { case YAML: this.graphSchema = Utils.buildSchemaFromYaml(content); - this.schemeJson = IrSchemaParser.getInstance().parse(this.graphSchema); + this.schemeJson = + IrSchemaParser.getInstance().parse(this.graphSchema, this.isColumnId); break; case JSON: default: this.graphSchema = Utils.buildSchemaFromJson(content); this.schemeJson = content; } - this.isColumnId = false; } public IrGraphSchema(GraphSchema graphSchema, boolean isColumnId) { this.graphSchema = graphSchema; - this.schemeJson = IrSchemaParser.getInstance().parse(graphSchema); + this.schemeJson = IrSchemaParser.getInstance().parse(graphSchema, isColumnId); this.isColumnId = isColumnId; } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java index 58990d564689..264d66742a03 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/schema/Utils.java @@ -86,17 +86,19 @@ public static final void builderGraphElementFromYaml( "type_id not exist in yaml config"); List propertyList = Lists.newArrayList(); List propertyNodes = (List) elementMap.get("properties"); - for (Object property : propertyNodes) { - if (property instanceof Map) { - Map propertyMap = (Map) property; - String propertyName = (String) propertyMap.get("property_name"); - int propertyId = (int) propertyMap.get("property_id"); - propNameToIdMap.put(propertyName, propertyId); - propertyList.add( - new DefaultGraphProperty( - propertyId, - propertyName, - toDataType(propertyMap.get("property_type")))); + if (propertyNodes != null) { + for (Object property : propertyNodes) { + if (property instanceof Map) { + Map propertyMap = (Map) property; + String propertyName = (String) propertyMap.get("property_name"); + int propertyId = (int) propertyMap.get("property_id"); + propNameToIdMap.put(propertyName, propertyId); + propertyList.add( + new DefaultGraphProperty( + propertyId, + propertyName, + toDataType(propertyMap.get("property_type")))); + } } } List primaryKeyNodes = (List) elementMap.get("primary_keys"); @@ -149,6 +151,8 @@ public static DataType toDataType(Object type) { return DataType.DOUBLE; case "DT_STRING": return DataType.STRING; + case "DT_DATE32": + return DataType.DATE; default: throw new UnsupportedOperationException( "unsupported primitive type: " + value);