# Basic usage for defining and instantiating a single node
# with SQL dialects.  We're using `sqlite` here!

In [1]:
import pandas as pd
import sqlite3
import json
import os
import typing


# examples for using SQL engines and dialects
from graphreduce.node import SQLNode
from graphreduce.graph_reduce import GraphReduce
from graphreduce.enum import SQLOpType, ComputeLayerEnum
from graphreduce.models import sqlop



In [2]:
# let's use the customer data from the codebase
# as an example with the `sqlite` engine

In [3]:
!ls dat/

cust.csv                      order_events.csv
cust.db                       order_products.csv
notification_interactions.csv orders.csv
notifications.csv             products.csv


In [4]:
cust = pd.read_csv('dat/cust.csv')

In [5]:
cust.head(2)

Unnamed: 0,id,name
0,1,wes
1,2,john


In [6]:
len(cust)

2

In [34]:
!rm dat/cust.db

In [35]:
dbfile = 'dat/cust.db'
conn = sqlite3.connect(dbfile)

In [36]:
files = [x for x in os.listdir('dat/') if x.endswith('.csv')]

In [37]:
# add each file to the database
for f in files:
    df = pd.read_csv(f"dat/{f}")
    name = f.split('.')[0]
    df.to_sql(name, conn, if_exists='replace', index=False)

In [38]:
pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table'", conn)

Unnamed: 0,name
0,notification_interactions
1,order_products
2,products
3,orders
4,notifications
5,cust
6,order_events


In [39]:
pd.read_sql_query("select * from notification_interactions", conn).head()

Unnamed: 0,id,notification_id,interaction_type_id,ts
0,1000,101,1500,2022-08-06
1,1001,101,1600,2022-08-07
2,1002,101,1700,2022-08-08
3,1003,102,1500,2023-01-01
4,1004,102,1600,2023-01-02


In [13]:
# create different nodes for these different tables

In [14]:
class CustNode(SQLNode):
    def do_annotate(self) -> typing.Union[sqlop, typing.List[sqlop]]:
        return [
            sqlop(optype=SQLOpType.select, opval=f"*, LENGTH({self.colabbr('name')}) as {self.colabbr('name_length')}")
        ]
    
    def do_filters(self) -> typing.Union[sqlop, typing.List[sqlop]]:
        return [
            sqlop(optype=SQLOpType.where, opval=f"{self.colabbr('id')} < 3")
        ]
    
    def do_normalize(self):
        pass
    
    
    def do_reduce(self, reduce_key):
        pass
    
    def do_post_join_annotate(self):
        pass
    
    def do_post_join_filters(self):
        pass

In [16]:
cust = CustNode(fpath='cust',
                prefix='cust',
                client=conn, 
                compute_layer=ComputeLayerEnum.sqlite, 
                columns=['id','name'])



In [17]:
cust.do_data()

[sqlop(optype=<SQLOpType.select: 'select'>, opval='id as cust_id,name as cust_name')]

In [18]:
print(cust.build_query(cust.do_data()))


        SELECT id as cust_id,name as cust_name
        FROM cust
        WHERE true
        


In [19]:
cust.create_ref(
    cust.build_query(cust.do_data()),
    cust.do_data
)

'CustNode_cust_do_data'

In [20]:
pd.read_sql_query("select * from CustNode_cust_do_data", conn)

Unnamed: 0,cust_id,cust_name
0,1,wes
1,2,john


In [21]:
cust._cur_data_ref

'CustNode_cust_do_data'

In [22]:
cust._temp_refs

{'do_data': 'CustNode_cust_do_data'}

In [23]:
print(cust.build_query(cust.do_annotate()))


        SELECT *, LENGTH(cust_name) as cust_name_length
        FROM CustNode_cust_do_data
        WHERE true
        


In [24]:
cust.create_ref(cust.build_query(cust.do_annotate()), cust.do_annotate)

'CustNode_cust_do_annotate'

In [25]:
pd.read_sql_query(f"select * from {cust._cur_data_ref}", conn)

Unnamed: 0,cust_id,cust_name,cust_name_length
0,1,wes,3
1,2,john,4


In [26]:
cust._cur_data_ref

'CustNode_cust_do_annotate'

In [27]:
cust._temp_refs

{'do_data': 'CustNode_cust_do_data',
 'do_annotate': 'CustNode_cust_do_annotate'}

In [28]:
cust.create_ref(cust.build_query(cust.do_normalize()), cust.do_normalize)

2024-07-01 21:11:05 [info     ] no sql was provided for do_normalize so using current data ref


'CustNode_cust_do_annotate'

In [29]:
cust._temp_refs

{'do_data': 'CustNode_cust_do_data',
 'do_annotate': 'CustNode_cust_do_annotate',
 'do_normalize': 'CustNode_cust_do_annotate'}

In [30]:
cust.create_ref(cust.build_query(cust.do_filters()), cust.do_filters)

'CustNode_cust_do_filters'

In [31]:
pd.read_sql_query(f"select * from {cust._cur_data_ref}", conn)

Unnamed: 0,cust_id,cust_name,cust_name_length
0,1,wes,3
1,2,john,4
