# Nuclio - prediction function

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Set environment variables

In [2]:
# Iguazio access
%nuclio env FRAMESD=${V3IO_FRAMESD}
%nuclio env V3IO_USERNAME=${V3IO_USERNAME}
%nuclio env V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Model handling
%nuclio env MODEL_FILE=lgb.model
%nuclio env -c MODEL_FILEPATH=/tmp/mlmodel/${MODEL_FILE}
%nuclio env -l MODEL_FILEPATH=models/trained/${MODEL_FILE}

# Function variables
%nuclio env NUM_OF_PRODUCTS_TO_RETURN=4
%nuclio env CUSTOMERS_TABLE=customers

%nuclio: setting 'FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'MODEL_FILE' environment variable
%nuclio: setting 'MODEL_FILEPATH' environment variable
%nuclio: setting 'NUM_OF_PRODUCTS_TO_RETURN' environment variable
%nuclio: setting 'CUSTOMERS_TABLE' environment variable


### Base image

In [3]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Installations

In [4]:
%%nuclio cmd
pip install requests
pip install pandas
pip install lightgbm
pip install v3io_frames



### Get the model

In [21]:
%%nuclio cmd -c 
apt-get update && apt-get install -y wget
mkdir -p /tmp/mlmodel
wget -O /tmp/mlmodel/${MODEL_FILE} --header "x-v3io-session-key: ${V3IO_ACCESS_KEY}" http://${V3IO_WEBAPI_SERVICE_HOST}:8081/bigdata/tutorials/demos/location_based_recommendation/models/trained/${MODEL_FILE}
        

### Imports

In [22]:
# Util
import os
import json
import requests
import datetime

# Function
import pandas as pd
import lightgbm as lgb

# DB
import v3io_frames as v3f

## Function code

### Init context

In [23]:
def init_context(context):
    # define DB
    client = v3f.Client('http://' + os.environ['FRAMESD'])
    setattr(context, 'client', client)
    
    setattr(context, 'customers_table', os.environ['CUSTOMERS_TABLE'])
    
    # define Model
    model = lgb.Booster(model_file=os.environ['MODEL_FILEPATH'])
    setattr(context, 'model', model)
    
    # vars
    setattr(context, 'k', int(os.environ['NUM_OF_PRODUCTS_TO_RETURN']))

### Format dataframe for prediction

In [24]:
def prepare_df(df):
    # Extract features col
    keep_cols = ['products']
    df = df.loc[:, keep_cols]
    
    # Create features df
    df = pd.read_json(df.values[0][0])
    return df

In [25]:
def handler(context, event):

    # Get user parameters
    df = context.client.read('kv', context.customers_table, filter=f'id=={event.body["id"]}').reset_index()

    # Do we have features for the user?
    if not df.empty:
                             
        # Create features df for the user
        df = prepare_df(df)
                            
        # Predict
        df["prediction"] = context.model.predict(df)
                             
        # Eliminate low confidence results
        df = df.loc[df.prediction > 0.01, ['prediction', 'product_id']]

        # Keep top products
        df.sort_values(by='prediction', ascending=False, inplace=True)
        best_products = df.iloc[:context.k, :]


        return json.dumps(best_products.to_json())
    return json.dumps({})

In [26]:
# nuclio: ignore
init_context(context)

In [27]:
# nuclio: ignore
event = nuclio.Event(body={'id': '1232'})
handler(context, event)

'"{\\"prediction\\":{\\"17167\\":0.4740774152,\\"566\\":0.2751968181},\\"product_id\\":{\\"17167\\":47209,\\"566\\":22035}}"'

In [28]:
%nuclio deploy -n prediction_server -p recommendation_engine -c

[nuclio.deploy] 2019-04-25 04:58:23,129 (info) Building processor image
[nuclio.deploy] 2019-04-25 04:58:34,269 (info) Pushing image
[nuclio.deploy] 2019-04-25 04:58:35,285 (info) Build complete
[nuclio.deploy] 2019-04-25 04:58:43,362 (info) Function deploy complete
[nuclio.deploy] 2019-04-25 04:58:43,368 done updating prediction-server, function address: 3.121.211.71:32432
%nuclio: function deployed
