Skip to content

Commit

Permalink
Implements varint + delta encoding on CSR to optimize the fragment me…
Browse files Browse the repository at this point in the history
…mory usage (#1372)

Related issue number
--------------------

- Fixes #1373
- Part of alibaba/GraphScope#2628

---------

Signed-off-by: vegetableysm <yuanshumin.ysm@alibaba-inc.com>
Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com>
Co-authored-by: Tao He <linzhu.ht@alibaba-inc.com>
  • Loading branch information
vegetableysm and sighingnow committed May 26, 2023
1 parent a0b6fe3 commit 0c9c16e
Show file tree
Hide file tree
Showing 33 changed files with 1,279 additions and 354 deletions.
23 changes: 17 additions & 6 deletions modules/graph/fragment/arrow_fragment.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,16 @@ limitations under the License.

namespace vineyard {

template <typename OID_T, typename VID_T, typename VERTEX_MAP_T>
template <typename OID_T, typename VID_T, typename VERTEX_MAP_T, bool COMPACT>
class ArrowFragmentBaseBuilder;

template <typename OID_T, typename VID_T,
typename VERTEX_MAP_T =
ArrowVertexMap<typename InternalType<OID_T>::type, VID_T>>
ArrowVertexMap<typename InternalType<OID_T>::type, VID_T>,
bool COMPACT = false>
class BasicArrowFragmentBuilder
: public ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T> {
using Base = ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T>;
: public ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T, COMPACT> {
using Base = ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T, COMPACT>;

using oid_t = OID_T;
using vid_t = VID_T;
Expand All @@ -71,7 +72,7 @@ class BasicArrowFragmentBuilder
public:
explicit BasicArrowFragmentBuilder(vineyard::Client& client,
std::shared_ptr<vertex_map_t> vm_ptr)
: ArrowFragmentBaseBuilder<oid_t, vid_t, vertex_map_t>(client),
: ArrowFragmentBaseBuilder<oid_t, vid_t, vertex_map_t, COMPACT>(client),
client_(client),
vm_ptr_(vm_ptr) {}

Expand All @@ -81,7 +82,7 @@ class BasicArrowFragmentBuilder
fid_t fid, fid_t fnum,
std::vector<std::shared_ptr<arrow::Table>>&& vertex_tables,
std::vector<std::shared_ptr<arrow::Table>>&& edge_tables,
bool directed = true, int concurrency = 1);
bool directed = true, int concurrency = 1, bool compact_edges = false);

boost::leaf::result<void> SetPropertyGraphSchema(
PropertyGraphSchema&& schema) {
Expand Down Expand Up @@ -115,6 +116,16 @@ class BasicArrowFragmentBuilder
std::vector<std::vector<std::shared_ptr<FixedInt64Builder>>>
ie_offsets_lists_, oe_offsets_lists_;

std::vector<std::vector<std::shared_ptr<FixedUInt8Builder>>>
compact_ie_lists_, compact_oe_lists_;
std::vector<std::vector<std::shared_ptr<FixedInt64Builder>>>
compact_ie_offsets_lists_, compact_oe_offsets_lists_;

std::vector<std::vector<const uint8_t*>> compact_ie_ptr_lists_,
compact_oe_ptr_lists_;
std::vector<std::vector<const int64_t*>> compact_ie_offsets_ptr_lists_,
compact_oe_offsets_ptr_lists_;

std::shared_ptr<vertex_map_t> vm_ptr_;

IdParser<vid_t> vid_parser_;
Expand Down
73 changes: 61 additions & 12 deletions modules/graph/fragment/arrow_fragment.vineyard-mod
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,17 @@ inline std::string generate_name_with_suffix(
return prefix + "_" + std::to_string(v_label) + "_" + std::to_string(e_label);
}

template <typename OID_T, typename VID_T, typename VERTEX_MAP_T>
template <typename OID_T, typename VID_T, typename VERTEX_MAP_T, bool COMPACT>
class ArrowFragmentBaseBuilder;

template <typename OID_T, typename VID_T,
typename VERTEX_MAP_T =
ArrowVertexMap<typename InternalType<OID_T>::type, VID_T>>
ArrowVertexMap<typename InternalType<OID_T>::type, VID_T>,
bool COMPACT = false>
class [[vineyard]] ArrowFragment
: public ArrowFragmentBase,
public vineyard::BareRegistered<
ArrowFragment<OID_T, VID_T, VERTEX_MAP_T>> {
ArrowFragment<OID_T, VID_T, VERTEX_MAP_T, COMPACT>> {
public:
using oid_t = OID_T;
using vid_t = VID_T;
Expand All @@ -91,6 +92,7 @@ class [[vineyard]] ArrowFragment
using nbr_t = property_graph_utils::Nbr<vid_t, eid_t>;
using nbr_unit_t = property_graph_utils::NbrUnit<vid_t, eid_t>;
using adj_list_t = property_graph_utils::AdjList<vid_t, eid_t>;
using compact_adj_list_t = property_graph_utils::EncodedAdjList<vid_t, eid_t>;
using raw_adj_list_t = property_graph_utils::RawAdjList<vid_t, eid_t>;
using vertex_map_t = VERTEX_MAP_T;
using vertex_t = grape::Vertex<vid_t>;
Expand Down Expand Up @@ -433,8 +435,9 @@ class [[vineyard]] ArrowFragment
vid_parser_.GetOffset(v.GetValue()));
}

inline adj_list_t GetIncomingAdjList(const vertex_t& v,
label_id_t e_label) const {
template <bool COMPACT_ = COMPACT>
inline typename std::enable_if<!COMPACT_, adj_list_t>::type
GetIncomingAdjList(const vertex_t& v, label_id_t e_label) const {
vid_t vid = v.GetValue();
label_id_t v_label = vid_parser_.GetLabelId(vid);
int64_t v_offset = vid_parser_.GetOffset(vid);
Expand All @@ -445,8 +448,25 @@ class [[vineyard]] ArrowFragment
flatten_edge_tables_columns_[e_label]);
}

inline raw_adj_list_t GetIncomingRawAdjList(const vertex_t& v,
label_id_t e_label) const {
template <bool COMPACT_ = COMPACT>
inline typename std::enable_if<COMPACT_, compact_adj_list_t>::type
GetIncomingAdjList(const vertex_t& v, label_id_t e_label) const {
vid_t vid = v.GetValue();
label_id_t v_label = vid_parser_.GetLabelId(vid);
int64_t v_offset = vid_parser_.GetOffset(vid);
const int64_t* offset_array =
compact_ie_offsets_ptr_lists_[v_label][e_label];

const uint8_t* ptr = compact_ie_ptr_lists_[v_label][e_label];

return compact_adj_list_t(ptr, offset_array[v_offset],
offset_array[v_offset + 1],
flatten_edge_tables_columns_[e_label]);
}

template <bool COMPACT_ = COMPACT>
inline typename std::enable_if<!COMPACT_, raw_adj_list_t>::type
GetIncomingRawAdjList(const vertex_t& v, label_id_t e_label) const {
vid_t vid = v.GetValue();
label_id_t v_label = vid_parser_.GetLabelId(vid);
int64_t v_offset = vid_parser_.GetOffset(vid);
Expand All @@ -456,8 +476,9 @@ class [[vineyard]] ArrowFragment
&ie[offset_array[v_offset + 1]]);
}

inline adj_list_t GetOutgoingAdjList(const vertex_t& v,
label_id_t e_label) const {
template <bool COMPACT_ = COMPACT>
inline typename std::enable_if<!COMPACT_, adj_list_t>::type
GetOutgoingAdjList(const vertex_t& v, label_id_t e_label) const {
vid_t vid = v.GetValue();
label_id_t v_label = vid_parser_.GetLabelId(vid);
int64_t v_offset = vid_parser_.GetOffset(vid);
Expand All @@ -468,8 +489,25 @@ class [[vineyard]] ArrowFragment
flatten_edge_tables_columns_[e_label]);
}

inline raw_adj_list_t GetOutgoingRawAdjList(const vertex_t& v,
label_id_t e_label) const {
template <bool COMPACT_ = COMPACT>
inline typename std::enable_if<COMPACT_, compact_adj_list_t>::type
GetOutgoingAdjList(const vertex_t& v, label_id_t e_label) const {
vid_t vid = v.GetValue();
label_id_t v_label = vid_parser_.GetLabelId(vid);
int64_t v_offset = vid_parser_.GetOffset(vid);
const int64_t* offset_array =
compact_oe_offsets_ptr_lists_[v_label][e_label];

const uint8_t* ptr = compact_oe_ptr_lists_[v_label][e_label];

return compact_adj_list_t(ptr, offset_array[v_offset],
offset_array[v_offset + 1],
flatten_edge_tables_columns_[e_label]);
}

template <bool COMPACT_ = COMPACT>
inline typename std::enable_if<!COMPACT_, raw_adj_list_t>::type
GetOutgoingRawAdjList(const vertex_t& v, label_id_t e_label) const {
vid_t vid = v.GetValue();
label_id_t v_label = vid_parser_.GetLabelId(vid);
int64_t v_offset = vid_parser_.GetOffset(vid);
Expand Down Expand Up @@ -685,6 +723,7 @@ class [[vineyard]] ArrowFragment

[[shared]] fid_t fid_, fnum_;
[[shared]] bool directed_;
[[shared]] bool compact_edges_;
[[shared]] bool is_multigraph_;
[[shared]] property_graph_types::LABEL_ID_TYPE vertex_label_num_;
[[shared]] property_graph_types::LABEL_ID_TYPE edge_label_num_;
Expand Down Expand Up @@ -715,6 +754,16 @@ class [[vineyard]] ArrowFragment
std::vector<std::vector<const int64_t*>> ie_offsets_ptr_lists_,
oe_offsets_ptr_lists_;

[[shared]] List<List<std::shared_ptr<UInt8Array>>> compact_ie_lists_,
compact_oe_lists_;
[[shared]] List<List<std::shared_ptr<Int64Array>>> compact_ie_offsets_lists_,
compact_oe_offsets_lists_;

std::vector<std::vector<const uint8_t*>> compact_ie_ptr_lists_,
compact_oe_ptr_lists_;
std::vector<std::vector<const int64_t*>> compact_ie_offsets_ptr_lists_,
compact_oe_offsets_ptr_lists_;

std::vector<std::vector<std::vector<fid_t>>> idst_, odst_, iodst_;
std::vector<std::vector<std::vector<fid_t*>>> idoffset_, odoffset_,
iodoffset_;
Expand All @@ -726,7 +775,7 @@ class [[vineyard]] ArrowFragment
[[shared]] json schema_json_;
PropertyGraphSchema schema_;

friend class ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T>;
friend class ArrowFragmentBaseBuilder<OID_T, VID_T, VERTEX_MAP_T, COMPACT>;

template <typename _OID_T, typename _VID_T, typename VDATA_T,
typename EDATA_T, typename _VERTEX_MAP_T>
Expand Down
Loading

0 comments on commit 0c9c16e

Please sign in to comment.