In [1]:
import datetime

import pandas as pd
from graphreduce.node import GraphReduceNode, DynamicNode
from graphreduce.graph_reduce import GraphReduce
from graphreduce.enum import ComputeLayerEnum as GraphReduceComputeLayerEnum, PeriodUnit



In [2]:
%load_ext watermark

In [3]:
%watermark

Last updated: 2024-04-23T08:31:13.034675-04:00

Python implementation: CPython
Python version       : 3.9.18
IPython version      : 8.3.0

Compiler    : Clang 15.0.0 (clang-1500.1.0.2.5)
OS          : Darwin
Release     : 23.1.0
Machine     : arm64
Processor   : arm
CPU cores   : 16
Architecture: 64bit



In [4]:
class CustomerNode(GraphReduceNode):
    def do_filters(self):
        pass
    def do_annotate(self):
        pass
    def do_normalize(self):
        pass
    def do_reduce(self, reduce_key):
        pass
    def do_post_join_annotate(self):
        pass
    def do_labels(self, reduce_key):
        pass

In [5]:
cust = CustomerNode(fpath='/Users/wesmadrigal/projects/graphreduce/data/cust_data/cust.csv',
        fmt='csv',
        pk='id',
        prefix='cu',
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day
)



In [6]:
class NotificationInteractionNode(GraphReduceNode):
    def do_filters(self):
        pass
    def do_annotate(self):
        pass
    def do_normalize(self):
        pass
    def do_reduce(self, reduce_key):
        return self.prep_for_features().groupby(self.colabbr(reduce_key)).agg(**{
            'num_interactions': pd.NamedAgg(column=self.colabbr('id'), aggfunc='count'),
            'first_interaction':pd.NamedAgg(column=self.colabbr('ts'), aggfunc='min'),
            'last_interaction':pd.NamedAgg(column=self.colabbr('ts'),aggfunc='max')
        }).reset_index()
    def do_post_join_annotate(self):
        pass
    def do_labels(self, reduce_key):
        pass

In [7]:
class NotificationNode(GraphReduceNode):
    def do_filters(self):
        pass
    def do_annotate(self):
        pass
    def do_normalize(self):
        pass
    def do_reduce(self, reduce_key):
        return self.prep_for_features().groupby(self.colabbr(reduce_key)).agg(**{
            'num_notifications': pd.NamedAgg(column='ni_notification_id',aggfunc='count'),
            'num_interactions': pd.NamedAgg(column='num_interactions', aggfunc='sum'),
            'first_notification': pd.NamedAgg(column=self.colabbr('ts'), aggfunc='min'),
            'last_notification':pd.NamedAgg(column=self.colabbr('ts'),aggfunc='max')
        }).reset_index()
    def do_post_join_annotate(self):
        pass
    def do_labels(self):
        pass

In [8]:
class OrderProductNode(GraphReduceNode):
    def do_filters(self):
        pass
    def do_annotate(self):
        pass
    def do_normalize(self):
        pass
    def do_reduce(self, reduce_key):
        return self.prep_for_features().groupby(self.colabbr(reduce_key)).agg(**{
            'num_products': pd.NamedAgg(column=self.colabbr('product_id'),aggfunc='count')
        }).reset_index()
    def do_post_join_annotate(self):
        pass
    def do_labels(self):
        pass

In [9]:
class OrderNode(GraphReduceNode):
    def do_filters(self):
        pass
    def do_annotate(self):
        pass
    def do_normalize(self):
        pass
    def do_reduce(self, reduce_key):
        return self.prep_for_features().groupby(self.colabbr(reduce_key)).agg(**{
            'num_orders': pd.NamedAgg(column='op_order_id',aggfunc='count'),
            'num_products_ordered': pd.NamedAgg(column='num_products', aggfunc='sum'),
            'first_order': pd.NamedAgg(column=self.colabbr('ts'), aggfunc='min'),
            'last_order': pd.NamedAgg(column=self.colabbr('ts'), aggfunc='max')
        }).reset_index()
    def do_post_join_annotate(self):
        pass
    def do_labels(self):
        pass

In [10]:
cust = CustomerNode(fpath='/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/cust.csv',
        fmt='csv',
        pk='id',
        prefix='cu',
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day
)
orders = OrderNode(fpath='/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/orders.csv',
        fmt='csv',
        pk='id',
        prefix='ord',
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day
                  )
op = OrderProductNode(fpath='/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/order_products.csv',
        fmt='csv',
        pk='id',
        prefix='op',
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day
                  )
nit = DynamicNode(fpath='/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interaction_types.csv',
        fmt='csv',
        pk='id',
        prefix='nit',
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day
                  )

ni = NotificationInteractionNode(fpath='/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interactions.csv',
        fmt='csv',
        pk='id',
        prefix='ni',
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day
                  )
notif = NotificationNode(
    fpath='/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notifications.csv',
        fmt='csv',
        pk='id',
        prefix='not',
        date_key=None,
        compute_layer=GraphReduceComputeLayerEnum.pandas,
        compute_period_val=730,
        compute_period_unit=PeriodUnit.day
)



In [11]:
gr = GraphReduce(
    name='cust',
    parent_node=cust,
    fmt='csv',
    cut_date=datetime.datetime(2023,9,1),
    compute_layer=GraphReduceComputeLayerEnum.pandas,
    #auto_features=False,
    #auto_feature_hops_front=1,
    #auto_feature_hops_back=2,
    #label_node=gr_nodes['orders.csv'],
    #label_operation='count',
    #label_field='id',
    #label_period_val=60,
    #label_period_unit=PeriodUnit.day
)

In [12]:
# Add relation from notification_interaction_types.csv to notification_interactions.csv
gr.add_entity_edge(
        parent_node=ni,
        relation_node=nit,
        parent_key='interaction_type_id',
        relation_key='id',
        reduce=False
    )

In [13]:
# Add relation from notification_interactions.csv to notifications.csv
gr.add_entity_edge(
        parent_node=notif,
        relation_node=ni,
        parent_key='id',
        relation_key='notification_id',
        reduce=True
    )

In [14]:
# Add relation from order_products.csv to orders.csv
gr.add_entity_edge(
        parent_node=orders,
        relation_node=op,
        parent_key='id',
        relation_key='order_id',
        reduce=True
    )


In [15]:
# Add relation from notifications.csv to cust.csv
gr.add_entity_edge(
        parent_node=cust,
        relation_node=notif,
        parent_key='id',
        relation_key='customer_id',
        reduce=True
    )

In [16]:
# Add relation from orders.csv to cust.csv

In [17]:
gr.add_entity_edge(
        parent_node=cust,
        relation_node=orders,
        parent_key='id',
        relation_key='customer_id',
        reduce=True
    )

In [18]:
gr.plot_graph('customer_graph.html')

2024-04-23 08:31:26 [info     ] plotted graph at customer_graph.html


In [19]:
from IPython.display import IFrame


In [20]:
IFrame(src='./customer_graph.html', width=600, height=600)

In [21]:
gr.do_transformations()

2024-04-23 08:31:36 [info     ] hydrating graph attributes
2024-04-23 08:31:36 [info     ] hydrating attributes for NotificationInteractionNode
2024-04-23 08:31:36 [info     ] hydrating attributes for DynamicNode
2024-04-23 08:31:36 [info     ] hydrating attributes for NotificationNode
2024-04-23 08:31:36 [info     ] hydrating attributes for OrderNode
2024-04-23 08:31:36 [info     ] hydrating attributes for OrderProductNode
2024-04-23 08:31:36 [info     ] hydrating attributes for CustomerNode
2024-04-23 08:31:36 [info     ] hydrating graph data
2024-04-23 08:31:36 [info     ] checking for prefix uniqueness
2024-04-23 08:31:36 [info     ] running filters, normalize, and annotations for <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_interactions.csv fmt=csv>
2024-04-23 08:31:36 [info     ] running filters, normalize, and annotations for <GraphReduceNode: fpath=/Users/wesmadrigal/projects/graphreduce/tests/data/cust_data/notification_inte

In [22]:
gr.parent_node.df

Unnamed: 0,cu_id,cu_name,ord_customer_id,num_orders,num_products_ordered,first_order,last_order,not_customer_id,num_notifications,num_interactions,first_notification,last_notification
0,1,wes,1,3,10.0,2023-05-12,2023-09-02,1,6,14.0,2022-08-05,2023-06-23
1,2,john,2,2,8.0,2022-08-05,2023-10-15,2,4,4.0,2022-09-05,2023-05-22
2,3,ryan,3,1,1.0,2023-06-01,2023-06-01,3,0,0.0,2023-06-12,2023-09-01
3,4,tianji,4,0,0.0,2024-01-01,2024-02-01,4,0,0.0,2024-02-01,2024-02-15
