# Nuclio - Process user location signal

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Define environment variables

In [11]:
%%nuclio env

# DB Acess
V3IO_API=${V3IO_FRAMESD}
V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Customers
COSTUMERS_STREAM=customers_stream
COSTUMERS_TABLE=customers
STORES_TABLE=/stores

# Predictions
PREDICTIONS_STREAM=predictions_stream
PREDICTION_SERVER=prediction-server:8080

%nuclio: setting 'V3IO_API' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'COSTUMERS_STREAM' environment variable
%nuclio: setting 'COSTUMERS_TABLE' environment variable
%nuclio: setting 'STORES_TABLE' environment variable
%nuclio: setting 'PREDICTIONS_STREAM' environment variable
%nuclio: setting 'PREDICTION_SERVER' environment variable


%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line


In [10]:
# nuclio: ignore
import socket    
hostname = socket.gethostname()    
IPAddr = socket.gethostbyname(hostname)    
print("Your Computer Name is:" + hostname)    
print("Your Computer IP Address is:" + IPAddr)

Your Computer Name is:jupyter-wtyegwoi5t-ht71d-6f7cc8d58d-hptsw
Your Computer IP Address is:10.233.92.90


### Base image

In [6]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Set cron trigger to read from stream

In [61]:
%nuclio config spec.triggers.secs.kind = "cron"
%nuclio config spec.triggers.secs.attributes.interval = "1m"

%nuclio: setting spec.triggers.secs.kind to 'cron'
%nuclio: setting spec.triggers.secs.attributes.interval to '1m'


### Install packages

In [153]:
%%nuclio cmd
pip install v3io_frames
pip install v3io==0.1.1 --upgrade
pip install requests
pip install pandas

Requirement already up-to-date: v3io==0.1.1 in /User/.pythonlibs/lib/python3.6/site-packages (0.1.1)


## Function code

In [62]:
import json
import os
import requests
import time

# Data handling
import pandas as pd

# DB
import v3io
import v3io.dataplane
import v3io.logger
import v3io_frames as v3f

### Init context

In [163]:
def init_context(context):
    # DB Contexts
    
    v3c = v3io.dataplane.Context(v3io.logger.Logger('DEBUG')).new_session().new_container('bigdata')
    setattr(context, 'v3c', v3c)
    
    v3c_frames = v3f.Client('http://' + os.environ['V3IO_API'])
    setattr(context, 'v3f', v3c_frames)
    
    
    # DB Tables
    
    customers_table = os.environ['COSTUMERS_TABLE']
    setattr(context, 'customers', customers_table)
    
    stores_table = os.environ['STORES_TABLE']
    setattr(context, 'stores', stores_table)
    
    predictions_stream = os.environ['PREDICTIONS_STREAM']
    setattr(context, 'predictions', predictions_stream)
    
    customers_stream = os.environ['COSTUMERS_STREAM']
    setattr(context, 'customers_stream', customers_stream)
    
    
    # Prediction server
    
    prediction_server = os.getenv('PREDICTION_SERVER')
    setattr(context, 'prediction_server', prediction_server)

### Helper functions

In [247]:
def is_customer_in_store(customer, context) -> bool:
    store_location = customer['location']
    store = context.v3f.read('kv', context.stores, filter=f'__name=="{store_location}"')

    return not store.empty

In [373]:
def is_customer_out_of_store(context, new_customer_locations):
    if not new_customer_locations.empty:
        users = new_customer_locations['id'].values.astype('int').astype('str')
        filter_line = str(list(users))
        filter_line = f'__name IN ({filter_line[1:-1]})'
        old_customer_locations = context.v3f.read('kv', context.customers, columns=['location'], filter=filter_line)
        old_customer_locations['is_store'] = old_customer_locations.apply(is_customer_in_store, args=[context], axis=1)
        return old_customer_locations[old_customer_locations['is_store'] == True]['location']

In [375]:
def update_customer_location(context, customer_id: str, location: str):
    context.v3f.execute('kv', context.customers , 'update', args={'key':customer_id, 'expression': f'SET location="{location}"', 'condition':''})

In [376]:
def update_store_count(customer, context, is_add=True):
    operator = '+' if is_add else '-'
    context.v3f.execute('kv', context.stores , 'update', args={'key':customer['location'], 'expression': f'SET customers=customers{operator}1', 'condition':''})

In [377]:
def save_predictions(context, customer_id: str, prediction: pd.DataFrame):
    context.v3f.write('tsdb', context.predictions, prediction)

### Handler

In [389]:
def handler(context, event):
    
    # Get latest customer locations from the customers stream
    customers_stream = context.v3f.read('stream', context.customers_stream, seek='time', shard_id='0', start='now-1m', as_iterator=True)
    
    # Has anyone moved out of any store?
    stores_to_update = is_customer_out_of_store(context, customers_stream)
    stores_to_update.apply(update_store_count, args=[context, False])
    
    # Update the customer's new location
    [update_customer_location(context, str(int(customer['id'])), customer['location']) for idx, customer in customers_stream.iterrows()]
        
    if not customers_stream.empty:
        # Get all customers that are in stores
        customers_stream['is_store'] = customers_stream.apply(is_customer_in_store, args=[context], axis=1)
        customers_stream = customers_stream[customers_stream['is_store']]
        
        # Update customers in stores count
        customers_stream['update_stores'] = customers_stream.apply(update_store_count, args=[context], axis=1)
        
        context.logger.debug(customers_stream)

        [requests.post(context.prediction_server, json={'id': str(int(customer['id'])), 'store': str(customer['location'])}) for idx, customer in customers_stream.iterrows()]

In [382]:
# nuclio: ignore
init_context(context)

In [383]:
# nuclio: ignore
event = nuclio.Event()
handler(context, event)

In [75]:
# nuclio: ignore
context.v3f.read('tsdb', 'predictions')

Unnamed: 0_level_0,prediction,customer_id,product_id,prediction_num,store
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-04-29 12:07:32.498,0.253893,2222.0,11520.0,0,3311311311233302303
2019-04-29 12:07:32.235,0.310562,547.0,37496.0,2,3311311311222000003
2019-04-29 12:07:32.368,0.585238,508.0,25825.0,1,3311311313011133120
2019-04-29 12:07:32.368,0.599259,508.0,21482.0,0,3311311313011133120
2019-04-29 12:07:32.235,0.294836,547.0,4294.0,3,3311311311222000003
2019-04-29 12:07:32.498,0.124962,2222.0,24221.0,3,3311311311233302303
2019-04-29 12:07:32.235,0.330082,547.0,30727.0,1,3311311311222000003
2019-04-29 12:07:32.235,0.482901,547.0,27608.0,0,3311311311222000003
2019-04-29 12:07:32.368,0.567981,508.0,17317.0,3,3311311313011133120
2019-04-29 12:07:32.498,0.181556,2222.0,32935.0,2,3311311311233302303


In [12]:
%nuclio deploy -n process_user_location -p recommendation_engine -c

[nuclio.deploy] 2019-07-07 11:38:14,332 (info) Building processor image
[nuclio.deploy] 2019-07-07 11:38:41,622 (info) Pushing image
[nuclio.deploy] 2019-07-07 11:38:53,781 (info) Build complete
[nuclio.deploy] 2019-07-07 11:38:57,847 (info) Function deploy complete
[nuclio.deploy] 2019-07-07 11:38:57,854 done creating process-user-location, function address: 18.197.206.39:30185
%nuclio: function deployed
