In [1]:
import datetime

import pandas as pd
from graphreduce.node import GraphReduceNode, DynamicNode
from graphreduce.graph_reduce import GraphReduce
from graphreduce.enum import ComputeLayerEnum as GraphReduceComputeLayerEnum, PeriodUnit



In [2]:
# building the graph dynamically

In [3]:
labels = pd.read_csv('odsc_east_labels.csv')

In [4]:
labels

Unnamed: 0.1,Unnamed: 0,from_name,from_identifier,from_object_str,from_rows,to_name,to_identifier,to_object_str,to_rows,from_key,to_key,weight,discovery_mechanism
0,0,notification_interactions.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,18,notification_interaction_types.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,3,interaction_type_id,id,,constraint
1,1,notification_interactions.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,18,notifications.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,17,notification_id,id,,constraint
2,2,order_products.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,19,orders.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,9,order_id,id,,constraint
3,4,orders.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,9,cust.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,4,customer_id,id,,constraint
4,5,notifications.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,17,cust.csv,/Users/wesmadrigal/projects/graphreduce/tests/...,<Entity (identifier=/Users/wesmadrigal/project...,4,customer_id,id,,constraint


In [5]:
files = {
    '/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/cust.csv' : {'prefix':'cu'},
    '/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/orders.csv':{'prefix':'ord'},
    '/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/order_products.csv': {'prefix':'op'},
    '/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notifications.csv':{'prefix':'notif'},
    '/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interactions.csv':{'prefix':'ni'},
    '/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interaction_types.csv':{'prefix':'nit'}

}

In [6]:
# create graph reduce nodes
gr_nodes = {
    f.split('/')[-1]: DynamicNode(
        fpath=f,
        fmt='csv',
        pk='id',
        prefix=files[f]['prefix'],
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day,
    )
    for f in files.keys()
}



In [9]:
gr_nodes

{'cust.csv': <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/cust.csv fmt=csv>,
 'orders.csv': <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/orders.csv fmt=csv>,
 'order_products.csv': <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/order_products.csv fmt=csv>,
 'notifications.csv': <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notifications.csv fmt=csv>,
 'notification_interactions.csv': <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interactions.csv fmt=csv>,
 'notification_interaction_types.csv': <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interaction_types.csv fmt=csv>}

In [10]:
gr = GraphReduce(
    name='cust_dynamic_graph',
    parent_node=gr_nodes['cust.csv'],
    fmt='csv',
    cut_date=datetime.datetime(2023,9,1),
    compute_layer=GraphReduceComputeLayerEnum.pandas,
    auto_features=True,
    auto_feature_hops_front=1,
    auto_feature_hops_back=2,
    #label_node=gr_nodes['orders.csv'],
    #label_operation='count',
    #label_field='id',
    #label_period_val=60,
    #label_period_unit=PeriodUnit.day
)

In [11]:
for ix, row in labels.iterrows():
    gr.add_entity_edge(
        parent_node=gr_nodes[row['to_name']],
        relation_node=gr_nodes[row['from_name']],
        parent_key=row['to_key'],
        relation_key=row['from_key'],
        reduce=True
    )

In [12]:
for node in gr.nodes():
    node.reload()
    

In [13]:
gr.do_transformations()

2024-04-23 09:01:06 [info     ] hydrating graph attributes
2024-04-23 09:01:06 [info     ] hydrating attributes for DynamicNode
2024-04-23 09:01:06 [info     ] hydrating attributes for DynamicNode
2024-04-23 09:01:06 [info     ] hydrating attributes for DynamicNode
2024-04-23 09:01:06 [info     ] hydrating attributes for DynamicNode
2024-04-23 09:01:06 [info     ] hydrating attributes for DynamicNode
2024-04-23 09:01:06 [info     ] hydrating attributes for DynamicNode
2024-04-23 09:01:06 [info     ] hydrating graph data
2024-04-23 09:01:06 [info     ] checking for prefix uniqueness
2024-04-23 09:01:06 [info     ] running filters, normalize, and annotations for <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interaction_types.csv fmt=csv>
2024-04-23 09:01:06 [info     ] running filters, normalize, and annotations for <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interactions.csv fmt=csv

In [14]:
pd.set_option('display.max_columns', 200)

In [15]:
gr.parent_node.df

Unnamed: 0,cu_id,cu_name,notif_customer_id,notif_id_count,notif_customer_id_count,notif_ts_first,notif_ts_min,notif_ts_max,ni_notification_id_min,ni_notification_id_max,ni_notification_id_sum,ni_id_count_min,ni_id_count_max,ni_id_count_sum,ni_notification_id_count_min,ni_notification_id_count_max,ni_notification_id_count_sum,ni_interaction_type_id_count_min,ni_interaction_type_id_count_max,ni_interaction_type_id_count_sum,ni_ts_first_first,ni_ts_first_min,ni_ts_first_max,ni_ts_min_first,ni_ts_min_min,ni_ts_min_max,ni_ts_max_first,ni_ts_max_min,ni_ts_max_max,ord_customer_id,ord_id_count,ord_customer_id_count,ord_ts_first,ord_ts_min,ord_ts_max,op_order_id_min,op_order_id_max,op_order_id_sum,op_id_count_min,op_id_count_max,op_id_count_sum,op_order_id_count_min,op_order_id_count_max,op_order_id_count_sum,op_product_id_count_min,op_product_id_count_max,op_product_id_count_sum
0,1,wes,1,6,6,2022-08-05,2022-08-05,2023-06-23,101.0,106.0,621.0,1.0,3.0,14.0,1.0,3.0,14.0,1.0,3.0,14.0,2022-08-06,2022-08-06,2023-05-15,2022-08-06,2022-08-06,2023-05-15,2022-08-08,2022-08-08,2023-05-15,1,3,3,2023-05-12,2023-05-12,2023-09-02,1.0,6.0,9.0,2.0,4.0,10.0,2.0,4.0,10.0,2.0,4.0,10.0
1,2,john,2,7,7,2022-09-05,2022-09-05,2023-05-22,107.0,110.0,434.0,1.0,1.0,4.0,1.0,1.0,4.0,1.0,1.0,4.0,2023-06-01,2023-06-01,2023-06-04,2023-06-01,2023-06-01,2023-06-04,2023-06-01,2023-06-01,2023-06-04,2,3,3,2023-01-01,2022-08-05,2023-10-15,3.0,4.0,7.0,4.0,4.0,8.0,4.0,4.0,8.0,4.0,4.0,8.0
2,3,ryan,3,2,2,2023-06-12,2023-06-12,2023-09-01,,,0.0,,,0.0,,,0.0,,,0.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,3,1,1,2023-06-01,2023-06-01,2023-06-01,5.0,5.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,4,tianji,4,2,2,2024-02-01,2024-02-01,2024-02-15,,,0.0,,,0.0,,,0.0,,,0.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,4,2,2,2024-01-01,2024-01-01,2024-02-01,,,0.0,,,0.0,,,0.0,,,0.0
