# Nuclio - prediction function

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Set environment variables

In [2]:
# Iguazio access
%nuclio env FRAMESD=${V3IO_FRAMESD}
%nuclio env V3IO_USERNAME=${V3IO_USERNAME}
%nuclio env V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Model handling
%nuclio env MODEL_FILE=lgb.model
%nuclio env -c MODEL_FILEPATH=/tmp/mlmodel/${MODEL_FILE}
%nuclio env -l MODEL_FILEPATH=models/trained/${MODEL_FILE}

# Function variables
%nuclio env NUM_OF_PRODUCTS_TO_RETURN=4
%nuclio env CUSTOMERS_TABLE=customers
%nuclio env PREDICTIONS_TABLE=predictions

%nuclio: setting 'FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'MODEL_FILE' environment variable
%nuclio: setting 'MODEL_FILEPATH' environment variable
%nuclio: setting 'NUM_OF_PRODUCTS_TO_RETURN' environment variable
%nuclio: setting 'CUSTOMERS_TABLE' environment variable
%nuclio: setting 'PREDICTIONS_TABLE' environment variable


### Base image

In [3]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Installations

In [None]:
%%nuclio cmd
pip install requests
pip install pandas
pip install lightgbm
pip install v3io_frames

### Get the model

In [6]:
%%nuclio cmd -c 
apt-get update && apt-get install -y wget
mkdir -p /tmp/mlmodel
wget -O /tmp/mlmodel/${MODEL_FILE} --header "x-v3io-session-key: ${V3IO_ACCESS_KEY}" http://${V3IO_WEBAPI_SERVICE_HOST}:8081/bigdata/tutorials/demos/location_based_recommendation/models/trained/${MODEL_FILE}
        

### Imports

In [4]:
# Util
import os
import json
import requests
import datetime

# Function
import pandas as pd
import lightgbm as lgb

# DB
import v3io_frames as v3f

## Function code

### Init context

In [5]:
def init_context(context):
    # Define DB
    client = v3f.Client('http://' + os.environ['FRAMESD'])
    setattr(context, 'client', client)
    
    setattr(context, 'customers_table', os.environ['CUSTOMERS_TABLE'])
    
    setattr(context, 'predictions', os.environ['PREDICTIONS_TABLE'])
    
    # define Model
    model = lgb.Booster(model_file=os.environ['MODEL_FILEPATH'])
    setattr(context, 'model', model)
    
    # vars
    setattr(context, 'k', int(os.environ['NUM_OF_PRODUCTS_TO_RETURN']))

### Format dataframe for prediction

In [6]:
def prepare_df(df):
    # Extract features col
    keep_cols = ['products']
    df = df.loc[:, keep_cols]
    
    # Create features df
    df = pd.read_json(df.values[0][0])
    return df

In [7]:
def handler(context, event):
    
    # Get user
    customer_id = event.body['id']
    store = event.body['store']
    context.logger.debug(f'Predicting for: {customer_id} in {store}')

    # Get user parameters
    df = context.client.read('kv', context.customers_table, filter=f'id=={customer_id}').reset_index()

    # Do we have features for the user?
    if not df.empty:
                             
        # Create features df for the user
        df = prepare_df(df)
                            
        # Predict
        df["prediction"] = context.model.predict(df)
                             
        # Eliminate low confidence results
        df = df.loc[df.prediction > 0.01, ['prediction', 'product_id']]

        # Keep top products
        df.sort_values(by='prediction', ascending=False, inplace=True)
        best_products = df.iloc[:context.k, :]
        best_products = best_products.reset_index()
        best_products = best_products.reset_index()
        best_products = best_products.rename(columns={'level_0': 'prediction_num'})
        best_products['customer_id'] = int(customer_id)
        best_products['store'] = store
        best_products['time'] = datetime.datetime.now()
        best_products = best_products.drop('index', axis=1)
        best_products = best_products.set_index(['time', 'store', 'prediction_num'])
        context.logger.debug(f'Predicted:\n{best_products}')
        
                            
        # Save results to DB
        context.client.write('tsdb', context.predictions, best_products)

In [8]:
# nuclio: ignore
init_context(context)

In [9]:
# nuclio: ignore
event = nuclio.Event(body={'id': '1232', 'store': '03311311313011021022'})
handler(context, event)

In [10]:
# nuclio: ignore
context.client.read('tsdb', query='select * from predictions', step='1m', start="now-1h", end='now',multi_index=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,customer_id,product_id,prediction
time,prediction_num,store,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-04-29 10:39:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:40:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:41:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:42:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:43:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:44:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:45:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:46:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:47:29,3,03311311313011021022,1232.0,33984.0,0.069884
2019-04-29 10:48:29,3,03311311313011021022,1232.0,33984.0,0.069884


In [23]:
%nuclio deploy -n prediction_server -p recommendation_engine -c

%nuclio: ['deploy', '-n', 'prediction_server', '-p', 'recommendation_engine', '-c', '/User/tutorials/demos/location_based_recommendation/prediction.ipynb']
%nuclio: [nuclio.deploy] 2019-04-29 11:45:38,481 (info) Building processor image
%nuclio: [nuclio.deploy] 2019-04-29 11:45:48,574 (info) Pushing image
%nuclio: [nuclio.deploy] 2019-04-29 11:45:56,650 (info) Build complete
%nuclio: [nuclio.deploy] 2019-04-29 11:46:00,685 (info) Function deploy complete
%nuclio: [nuclio.deploy] 2019-04-29 11:46:00,690 done updating prediction-server, function address: 35.158.112.89:32064
%nuclio: function deployed
