# Nuclio - Process user location signal

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Define environment variables

In [78]:
%%nuclio env

# DB Acess
V3IO_API=${V3IO_FRAMESD}
V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Customers
COSTUMERS_STREAM=customers_stream
COSTUMERS_TABLE=customers
STORES_TABLE=/stores

# Predictions
PREDICTIONS_STREAM=predictions_stream
PREDICTION_SERVER=http://35.158.112.89:32064

%nuclio: setting 'V3IO_API' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'COSTUMERS_STREAM' environment variable
%nuclio: setting 'COSTUMERS_TABLE' environment variable
%nuclio: setting 'STORES_TABLE' environment variable
%nuclio: setting 'PREDICTIONS_STREAM' environment variable
%nuclio: setting 'PREDICTION_SERVER' environment variable


%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line


In [79]:
# nuclio: ignore
!echo ${V3IO_API}

framesd.default-tenant.svc:8080


### Base image

In [6]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Set cron trigger to read from stream

In [61]:
%nuclio config spec.triggers.secs.kind = "cron"
%nuclio config spec.triggers.secs.attributes.interval = "1m"

%nuclio: setting spec.triggers.secs.kind to 'cron'
%nuclio: setting spec.triggers.secs.attributes.interval to '1m'


### Install packages

In [16]:
%%nuclio cmd
pip install v3io_frames
pip install v3io==0.1.1
pip install requests
pip install pandas



## Function code

In [62]:
import json
import os
import requests
import time

# Data handling
import pandas as pd

# DB
import v3io
import v3io.dataplane
import v3io.logger
import v3io_frames as v3f

### Init context

In [89]:
def init_context(context):
    # DB Contexts
    
    v3c = v3io.dataplane.Context(v3io.logger.Logger('DEBUG')).new_session().new_container('bigdata')
    setattr(context, 'v3c', v3c)
    
    v3c_frames = v3f.Client('http://' + os.environ['V3IO_API'])
    setattr(context, 'v3f', v3c_frames)
    
    
    # DB Tables
    
    customers_table = os.environ['COSTUMERS_TABLE']
    setattr(context, 'customers', customers_table)
    
    stores_table = os.environ['STORES_TABLE']
    setattr(context, 'stores', stores_table)
    
    predictions_stream = os.environ['PREDICTIONS_STREAM']
    setattr(context, 'predictions', predictions_stream)
    
    customers_stream = os.environ['COSTUMERS_STREAM']
    setattr(context, 'customers_stream', customers_stream)
    
#     setattr(context, 'stream_idx', 0)
#     setattr(context, 'max_update', int(os.environ['MAX_BATCH']))
    
    
    # Prediction server
    
    prediction_server = os.getenv('PREDICTION_SERVER')
    setattr(context, 'prediction_server', prediction_server)

### Helper functions

In [81]:
def is_customer_in_store(customer, context) -> bool:
    store_location = customer['location']
    store = context.v3f.read('kv', context.stores, filter=f'__name=="{store_location}"')

    return not store.empty

In [82]:
def update_customer_location(context, customer_id: str, location: str):
    update = context.v3f.execute('kv', context.customers , 'update', args={'key':customer_id, 'expression': f'location={location}', 'condition':f'id=={customer_id}'})



In [83]:
def update_store_count(customer, context, is_add=True):
    print(f'updating {customer}')
    operator = '+' if is_add else '-'
    response = context.v3c.update_item(v3io.dataplane.UpdateItemInput(v3io.common.helpers.url_join(context.stores, customer['location']),
                                                                              expression=f'customers = cutsomers {operator} 1'))
    return response.status_code

In [84]:
def save_predictions(context, customer_id: str, prediction: pd.DataFrame):
    context.v3f.write('tsdb', context.predictions, prediction)

### Handler

In [85]:
def handler(context, event):
    
    # Get latest customer locations from the customers stream
    customers_stream = context.v3f.read('stream', context.customers_stream, seek='time', shard_id='0', start='now-1m', as_iterator=True)
    print(customers_stream.shape[0])
    
    
    # Update the customer's new location
#     [update_customer_location(context, str(int(customer['id'])), customer['location']) for idx, customer in customers_stream.iterrows()]
        
    if not customers_stream.empty:
        # Get all customers that are in stores
        customers_stream['is_store'] = customers_stream.apply(is_customer_in_store, args=[context], axis=1)
        customers_stream = customers_stream[customers_stream['is_store']]
        
        # Update customers in stores count
#         customers_stream['update_stores'] = customers_stream.apply(update_store_count, args=[context], axis=1)
#         print(customers_stream)
        
        context.logger.debug(customers_stream)

        [requests.post(context.prediction_server, json={'id': str(int(customer['id'])), 'store': str(customer['location'])}) for idx, customer in customers_stream.iterrows()]

In [91]:
# nuclio: ignore
init_context(context)

In [92]:
# nuclio: ignore
event = nuclio.Event()
handler(context, event)

100


In [75]:
# nuclio: ignore
context.v3f.read('tsdb', 'predictions')

Unnamed: 0_level_0,prediction,customer_id,product_id,prediction_num,store
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-04-29 12:07:32.498,0.253893,2222.0,11520.0,0,3311311311233302303
2019-04-29 12:07:32.235,0.310562,547.0,37496.0,2,3311311311222000003
2019-04-29 12:07:32.368,0.585238,508.0,25825.0,1,3311311313011133120
2019-04-29 12:07:32.368,0.599259,508.0,21482.0,0,3311311313011133120
2019-04-29 12:07:32.235,0.294836,547.0,4294.0,3,3311311311222000003
2019-04-29 12:07:32.498,0.124962,2222.0,24221.0,3,3311311311233302303
2019-04-29 12:07:32.235,0.330082,547.0,30727.0,1,3311311311222000003
2019-04-29 12:07:32.235,0.482901,547.0,27608.0,0,3311311311222000003
2019-04-29 12:07:32.368,0.567981,508.0,17317.0,3,3311311313011133120
2019-04-29 12:07:32.498,0.181556,2222.0,32935.0,2,3311311311233302303


In [90]:
%nuclio deploy -n process_user_location -p recommendation_engine -c

%nuclio: ['deploy', '-n', 'process_user_location', '-p', 'recommendation_engine', '-c', '/User/tutorials/demos/location_based_recommendation/process_user_location.ipynb']
%nuclio: [nuclio.deploy] 2019-04-29 12:13:30,505 (info) Building processor image
%nuclio: [nuclio.deploy] 2019-04-29 12:13:35,561 (info) Pushing image
%nuclio: [nuclio.deploy] 2019-04-29 12:13:35,561 (info) Build complete
%nuclio: [nuclio.deploy] 2019-04-29 12:13:39,598 (info) Function deploy complete
%nuclio: [nuclio.deploy] 2019-04-29 12:13:39,603 done updating process-user-location, function address: 35.158.112.89:32345
%nuclio: function deployed


In [69]:
%nuclio show

%nuclio: notebook process-user-location exported
Config:
apiVersion: nuclio.io/v1
kind: Function
metadata:
  annotations:
    nuclio.io/generated_by: function generated at 17-04-2019 by iguazio from /User/tutorials/demos/location_based_recommendation/process_user_location.ipynb
  labels: {}
  name: process-user-location
spec:
  build:
    commands:
    - pip install v3io_frames
    - pip install v3io==0.1.1
    - pip install requests
    - pip install pandas
    functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlciBvbiAyMDE5LTA0LTE3IDEyOjEwCgppbXBvcnQganNvbgppbXBvcnQgb3MKaW1wb3J0IHJlcXVlc3RzCmltcG9ydCB0aW1lCgppbXBvcnQgcGFuZGFzIGFzIHBkCgppbXBvcnQgdjNpbwppbXBvcnQgdjNpby5kYXRhcGxhbmUKaW1wb3J0IHYzaW8ubG9nZ2VyCmltcG9ydCB2M2lvX2ZyYW1lcyBhcyB2M2YKCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICAKICAgIHYzYyA9IHYzaW8uZGF0YXBsYW5lLkNvbnRleHQodjNpby5sb2dnZXIuTG9nZ2VyKCdERUJVRycpKS5uZXdfc2Vzc2lvbigpLm5ld19jb250YWluZXIoJ2JpZ2RhdGEnKQogICAgc2V0YXR0cihjb250ZXh0LCAndjNjJywgdjNjKQogI