# Nuclio - prediction function

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Set environment variables

In [44]:
# Iguazio access
%nuclio env FRAMESD=${V3IO_FRAMESD}
%nuclio env V3IO_USERNAME=${V3IO_USERNAME}
%nuclio env V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Model handling
%nuclio env MODEL_FILE=lgb.model
%nuclio env -c MODEL_FILEPATH=/tmp/mlmodel/${MODEL_FILE}
%nuclio env -l MODEL_FILEPATH=models/trained/${MODEL_FILE}

# Function variables
%nuclio env NUM_OF_PRODUCTS_TO_RETURN=4
%nuclio env CUSTOMERS_TABLE=customers
%nuclio env PREDICTIONS_TABLE=predictions

%nuclio: setting 'FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'MODEL_FILE' environment variable
%nuclio: setting 'MODEL_FILEPATH' environment variable
%nuclio: setting 'NUM_OF_PRODUCTS_TO_RETURN' environment variable
%nuclio: setting 'CUSTOMERS_TABLE' environment variable
%nuclio: setting 'PREDICTIONS_TABLE' environment variable


### Base image

In [3]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Installations

In [4]:
%%nuclio cmd
pip install requests
pip install pandas
pip install lightgbm
pip install v3io_frames



### Get the model

In [364]:
%%nuclio cmd -c 
apt-get update && apt-get install -y wget
mkdir -p /tmp/mlmodel
wget -O /tmp/mlmodel/${MODEL_FILE} --header "x-v3io-session-key: ${V3IO_ACCESS_KEY}" http://${V3IO_WEBAPI_SERVICE_HOST}:8081/bigdata/tutorials/demos/location_based_recommendation/models/trained/${MODEL_FILE}
        

### Imports

In [22]:
# Util
import os
import json
import requests
import datetime

# Function
import pandas as pd
import lightgbm as lgb

# DB
import v3io_frames as v3f

## Function code

### Init context

In [43]:
def init_context(context):
    # define DB
    client = v3f.Client('http://' + os.environ['FRAMESD'])
    setattr(context, 'client', client)
    
    setattr(context, 'customers_table', os.environ['CUSTOMERS_TABLE'])
    
    setattr(context, 'predictions', os.environ['PREDICTIONS_TABLE'])
    
    # define Model
    model = lgb.Booster(model_file=os.environ['MODEL_FILEPATH'])
    setattr(context, 'model', model)
    
    # vars
    setattr(context, 'k', int(os.environ['NUM_OF_PRODUCTS_TO_RETURN']))

### Format dataframe for prediction

In [24]:
def prepare_df(df):
    # Extract features col
    keep_cols = ['products']
    df = df.loc[:, keep_cols]
    
    # Create features df
    df = pd.read_json(df.values[0][0])
    return df

In [353]:
def handler(context, event):
    
    customer_id = event.body['id']
    store = event.body['store']

    # Get user parameters
    df = context.client.read('kv', context.customers_table, filter=f'id=={customer_id}').reset_index()

    # Do we have features for the user?
    if not df.empty:
                             
        # Create features df for the user
        df = prepare_df(df)
                            
        # Predict
        df["prediction"] = context.model.predict(df)
                             
        # Eliminate low confidence results
        df = df.loc[df.prediction > 0.01, ['prediction', 'product_id']]

        # Keep top products
        df.sort_values(by='prediction', ascending=False, inplace=True)
        best_products = df.iloc[:context.k, :]
        best_products = best_products.reset_index()
        best_products = best_products.reset_index()
        best_products = best_products.rename(columns={'level_0': 'prediction_num'})
        best_products['customer_id'] = int(customer_id)
        best_products['store'] = store
        best_products['time'] = datetime.datetime.now()
        best_products = best_products.drop('index', axis=1)
        best_products = best_products.set_index(['time', 'store', 'prediction_num'])
        print(best_products)
        
                            
        # Save results to DB
        context.client.write('tsdb', context.predictions, best_products)

In [365]:
# nuclio: ignore
init_context(context)

In [366]:
# nuclio: ignore
event = nuclio.Event(body={'id': '1232', 'store': '03311311313011021022'})
handler(context, event)

                                                                prediction  \
time                       store                prediction_num               
2019-04-25 14:51:11.222339 03311311313011021022 0                 0.474077   
                                                1                 0.275197   

                                                                product_id  \
time                       store                prediction_num               
2019-04-25 14:51:11.222339 03311311313011021022 0                    47209   
                                                1                    22035   

                                                                customer_id  
time                       store                prediction_num               
2019-04-25 14:51:11.222339 03311311313011021022 0                      1232  
                                                1                      1232  


In [367]:
# nuclio: ignore
context.client.read('tsdb', query='select * from predictions', step='1m', start="now-1h", end='now',multi_index=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,product_id,prediction,customer_id
time,prediction_num,store,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-04-25 14:46:12,0,3311311313011021022,47209.0,0.474077,1232.0
2019-04-25 14:50:12,0,3311311313011021022,47209.0,0.474077,1232.0
2019-04-25 14:46:12,1,3311311313011021022,22035.0,0.275197,1232.0
2019-04-25 14:50:12,1,3311311313011021022,22035.0,0.275197,1232.0


In [368]:
%nuclio deploy -n prediction_server -p recommendation_engine -c

[nuclio.deploy] 2019-04-25 14:51:39,810 (info) Building processor image


INFO:nuclio.deploy:(info) Building processor image


[nuclio.deploy] 2019-04-25 14:51:45,900 (info) Pushing image


INFO:nuclio.deploy:(info) Pushing image


[nuclio.deploy] 2019-04-25 14:51:45,901 (info) Build complete


INFO:nuclio.deploy:(info) Build complete


[nuclio.deploy] 2019-04-25 14:51:49,940 (info) Function deploy complete


INFO:nuclio.deploy:(info) Function deploy complete


[nuclio.deploy] 2019-04-25 14:51:49,947 done updating prediction-server, function address: 3.121.211.71:32432


INFO:nuclio.deploy:done updating prediction-server, function address: 3.121.211.71:32432


%nuclio: function deployed
