-
Notifications
You must be signed in to change notification settings - Fork 117
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEAT] Support building or writing graph from/to GAR format data (#1185)
What do these changes do? ------------------------- The [GAR format](https://github.com/alibaba/GraphAr) is a data format for graph that keeps the CSR/CSC structure information. And so it is easy to building the CSR/CSC of graph from the GAR format data. This PR changes bring the support of loading property graph or writing the in-memory property graph from/to GAR format data. The PR mainly includes: - Add `GARFragmentLoader` and `GARArrowFragmentBuilder` to support loading arrow property fragment from GAR format data. - Add `ArrowFragmentWriter` to support dumping arrow property graph to GAR format data. - Make [`GraphAr`](https://github.com/alibaba/GraphAr) project as submodule to provide API to access GAR format data There are some items need to do: - [x] Add `GraphAr` as submodule and use compile option to check if compile with GraphAr - [x] Add test to check the correctness of loading and writing Related issue number -------------------- apache/incubator-graphar#40 apache/incubator-graphar#39 --------- Signed-off-by: acezen <qiaozi.zwb@alibaba-inc.com> Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com> Co-authored-by: Tao He <linzhu.ht@alibaba-inc.com>
- Loading branch information
1 parent
55a22bc
commit 0eda206
Showing
26 changed files
with
2,680 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/** Copyright 2020-2023 Alibaba Group Holding Limited. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
#ifndef MODULES_GRAPH_FRAGMENT_GAR_FRAGMENT_BUILDER_H_ | ||
#define MODULES_GRAPH_FRAGMENT_GAR_FRAGMENT_BUILDER_H_ | ||
|
||
#ifdef ENABLE_GAR | ||
|
||
#include <memory> | ||
#include <vector> | ||
|
||
#include "arrow/api.h" | ||
#include "arrow/io/api.h" | ||
|
||
#include "grape/worker/comm_spec.h" | ||
|
||
#include "client/client.h" | ||
|
||
#include "graph/loader/fragment_loader_utils.h" | ||
|
||
namespace vineyard { | ||
|
||
template <typename OID_T, typename VID_T, | ||
typename VERTEX_MAP_T = | ||
ArrowVertexMap<typename InternalType<OID_T>::type, VID_T>> | ||
class GARFragmentBuilder | ||
: public ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T> { | ||
using Base = ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T>; | ||
|
||
using oid_t = OID_T; | ||
using vid_t = VID_T; | ||
using internal_oid_t = typename InternalType<oid_t>::type; | ||
using eid_t = property_graph_types::EID_TYPE; | ||
using label_id_t = property_graph_types::LABEL_ID_TYPE; | ||
using vertex_map_t = VERTEX_MAP_T; | ||
using nbr_unit_t = property_graph_utils::NbrUnit<vid_t, eid_t>; | ||
using vid_array_t = vineyard::ArrowArrayType<vid_t>; | ||
|
||
public: | ||
explicit GARFragmentBuilder(vineyard::Client& client, | ||
std::shared_ptr<vertex_map_t> vm_ptr) | ||
: ArrowFragmentBaseBuilder<oid_t, vid_t, vertex_map_t>(client), | ||
client_(client), | ||
vm_ptr_(vm_ptr) {} | ||
|
||
vineyard::Status Build(vineyard::Client& client) override; | ||
|
||
boost::leaf::result<void> Init( | ||
fid_t fid, fid_t fnum, | ||
std::vector<std::shared_ptr<arrow::Table>>&& vertex_tables, | ||
std::vector<EdgeTableInfo>&& csr_edge_tables, | ||
std::vector<EdgeTableInfo>&& csc_edge_tables, bool directed = true, | ||
int concurrency = 1); | ||
|
||
boost::leaf::result<void> SetPropertyGraphSchema( | ||
PropertyGraphSchema&& schema); | ||
|
||
private: | ||
// | prop_0 | prop_1 | ... | | ||
boost::leaf::result<void> initVertices( | ||
std::vector<std::shared_ptr<arrow::Table>>&& vertex_tables); | ||
|
||
// | src_id(generated) | dst_id(generated) | prop_0 | prop_1 | ||
// | ... | | ||
boost::leaf::result<void> initEdges( | ||
std::vector<EdgeTableInfo>&& csr_edge_tables, | ||
std::vector<EdgeTableInfo>&& csc_edge_tables, int concurrency); | ||
|
||
vineyard::Client& client_; | ||
std::vector<vid_t> ivnums_, ovnums_, tvnums_; | ||
|
||
std::vector<std::shared_ptr<arrow::Table>> vertex_tables_; | ||
std::vector<std::shared_ptr<vid_array_t>> ovgid_lists_; | ||
std::vector<typename ArrowFragment<OID_T, VID_T>::ovg2l_map_t> ovg2l_maps_; | ||
|
||
std::vector<std::shared_ptr<arrow::Table>> edge_tables_; | ||
std::vector<std::shared_ptr<arrow::Int64Array>> offset_arrays_; | ||
|
||
std::vector<std::vector<std::shared_ptr<PodArrayBuilder<nbr_unit_t>>>> | ||
ie_lists_, oe_lists_; | ||
std::vector<std::vector<std::shared_ptr<arrow::Int64Array>>> | ||
ie_offsets_lists_, oe_offsets_lists_; | ||
|
||
std::shared_ptr<vertex_map_t> vm_ptr_; | ||
|
||
IdParser<vid_t> vid_parser_; | ||
}; | ||
|
||
} // namespace vineyard | ||
|
||
#endif // ENABLE_GAR | ||
#endif // MODULES_GRAPH_FRAGMENT_GAR_FRAGMENT_BUILDER_H_ |
Oops, something went wrong.