In [1]:
import pandas as pd
import datetime
import pyvis



from graphreduce.node import GraphReduceNode
from graphreduce.enum import ComputeLayerEnum, PeriodUnit
from graphreduce.graph_reduce import GraphReduce

# define the Node-level feature implementations, if any

In [2]:
class CustomerNode(GraphReduceNode):
    def do_annotate(self):
        pass
    
    def do_filters(self):
        pass
    
    def do_clip_cols(self):
        pass
    
    def do_slice_data(self):
        pass
    
    def do_post_join_annotate(self):
        pass
    
    def do_reduce(self, reduce_key, *args, **kwargs):
        pass
    
    def do_labels(self, reduce_key, *args, **kwargs):
        pass
    

In [3]:
class OrderNode(GraphReduceNode):
    def do_annotate(self):
        pass
    
    def do_filters(self):
        pass
    
    def do_clip_cols(self):
        pass
    
    def do_slice_data(self):
        pass
    
    def do_post_join_annotate(self):
        pass
    
    def do_reduce(self, reduce_key):
        return self.prep_for_features().groupby(self.colabbr(reduce_key)).agg(
            **{
                self.colabbr(f'{self.pk}_count') : pd.NamedAgg(column=self.colabbr(self.pk), aggfunc='count')
            }
        ).reset_index()
    
    def do_labels(self, key):
        pass

# instantiate the nodes
# - specify primary keys
# - specify prefixes for columns
# - specify paths to data
# - specify file format
# - select a compute layer
# - NOTE: if spark, need a Spark Context object

In [4]:
cust = CustomerNode(pk='id', prefix='cust',fpath='dat/cust.csv', fmt='csv', compute_layer=ComputeLayerEnum.pandas)
order = OrderNode(pk='id', prefix='order', fpath='dat/orders.csv', fmt='csv',compute_layer=ComputeLayerEnum.pandas)


# instantiate the graph reduce object

In [5]:
gr = GraphReduce(
    cut_date=datetime.datetime(2023, 5, 6),
    compute_period_val=365,
    compute_period_unit=PeriodUnit.day,
    parent_node=cust,
    compute_layer=ComputeLayerEnum.pandas,
    has_labels=False,
    label_period_val=30,
    label_period_unit=PeriodUnit.day,
    dynamic_propagation=True
)

In [6]:
gr

<graphreduce.graph_reduce.GraphReduce at 0x13447bd60>

In [7]:
gr.add_node(cust)
gr.add_node(order)

gr.add_entity_edge(
    parent_node=cust,
    relation_node=order,
    parent_key='id',
    relation_key='customer_id',
    relation_type='parent_child',
    reduce=True
)

In [9]:
gr.plot_graph('cust_order.html',  notebook=True)

2023-07-22 14:26:40 [info     ] plotted graph at cust_order.html


In [10]:
!open cust_order.html

In [12]:
gr.do_transformations()

2023-07-22 14:38:03 [info     ] hydrating graph attributes
2023-07-22 14:38:03 [info     ] hydrating attributes for CustomerNode
2023-07-22 14:38:03 [info     ] hydrating attributes for OrderNode
2023-07-22 14:38:03 [info     ] hydrating graph data
2023-07-22 14:38:03 [info     ] checking for prefix uniqueness
2023-07-22 14:38:03 [info     ] running filters, clip cols, and annotations for CustomerNode
2023-07-22 14:38:03 [info     ] running filters, clip cols, and annotations for OrderNode
2023-07-22 14:38:03 [info     ] depth-first traversal through the graph from source: CustomerNode
2023-07-22 14:38:03 [info     ] reducing relation OrderNode
2023-07-22 14:38:03 [info     ] doing dynamic propagation on node OrderNode
2023-07-22 14:38:03 [info     ] joining OrderNode to CustomerNode


In [13]:
gr.parent_node.df

Unnamed: 0,cust_id,cust_name,order_customer_id,order_id_count,order_id_min,order_id_max,order_id_sum,order_customer_id_min,order_customer_id_max,order_customer_id_sum,order_ts_first
0,1,wes,1,2,1,2,3,1,1,2,2023-05-12
1,2,john,2,2,3,4,7,2,2,4,2023-01-01
