# Nuclio - prediction function

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Set environment variables

In [2]:
import os

In [3]:
# nuclio: ignore
os.environ['CUSTOMERS_TABLE'] = os.path.join(os.getenv('V3IO_USERNAME', 'iguazio'), 'customers')
os.environ['PREDICTIONS_TABLE'] = os.path.join(os.getenv('V3IO_USERNAME', 'iguazio'), 'predictions')

In [4]:
# Iguazio access
%nuclio env FRAMESD=${V3IO_FRAMESD}
%nuclio env V3IO_USERNAME=${V3IO_USERNAME}
%nuclio env V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Model handling
%nuclio env MODEL_FILE=lgb.model
%nuclio env -c MODEL_FILEPATH=/tmp/mlmodel/${MODEL_FILE}
%nuclio env -l MODEL_FILEPATH=models/trained/${MODEL_FILE}

# Function variables
%nuclio env NUM_OF_PRODUCTS_TO_RETURN=4
%nuclio env CUSTOMERS_TABLE= ${CUSTOMERS_TABLE}
%nuclio env PREDICTIONS_TABLE= ${PREDICTIONS_TABLE}

%nuclio: setting 'FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'MODEL_FILE' environment variable
%nuclio: setting 'MODEL_FILEPATH' environment variable
%nuclio: setting 'NUM_OF_PRODUCTS_TO_RETURN' environment variable
%nuclio: setting 'CUSTOMERS_TABLE' environment variable
%nuclio: setting 'PREDICTIONS_TABLE' environment variable


### Base image

In [5]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Installations

When installing packages while working, Please reset the kernel to allow Jupyter to load the new packages.

In [6]:
%%nuclio cmd -c
pip install requests
pip install pandas
pip install lightgbm
pip install v3io_frames

### Get the model

In [7]:
# nuclio: ignore
# Verify the model is in the shared data directory
os.environ['MODEL_SHARED_FILEPATH'] = f'/users/{os.environ["V3IO_USERNAME"]}/recommendation_demo/models/'
os.environ['FULL_LOCAL_MODEL_PATH'] = f'{os.path.join(os.getcwd(), os.environ["MODEL_FILEPATH"])}'
!mkdir -p /v3io${MODEL_SHARED_FILEPATH}
!cp ${FULL_LOCAL_MODEL_PATH} /v3io${MODEL_SHARED_FILEPATH} 

In [8]:
%nuclio env MODEL_SHARED_FILEPATH = ${MODEL_SHARED_FILEPATH}

%nuclio: setting 'MODEL_SHARED_FILEPATH' environment variable


In [9]:
%%nuclio cmd -c 
apt-get update && apt-get install -y wget
mkdir -p /tmp/mlmodel
wget -O /tmp/mlmodel/${MODEL_FILE} --header "x-v3io-session-key: ${V3IO_ACCESS_KEY}" http://${V3IO_WEBAPI_SERVICE_HOST}:8081${MODEL_SHARED_FILEPATH}${MODEL_FILE}

### Imports

In [10]:
# Util
import json
import requests
import datetime

# Function
import pandas as pd
import lightgbm as lgb

# DB
import v3io_frames as v3f

## Function code

### Init context

In [11]:
def init_context(context):
    # Define DB
    client = v3f.Client('framesd:8081', container='users')
    setattr(context, 'client', client)
    
    setattr(context, 'customers_table', os.environ['CUSTOMERS_TABLE'])
    
    setattr(context, 'predictions', os.environ['PREDICTIONS_TABLE'])
    
    try:
        context.client.delete('tsdb', context.predictions, if_missing=1)
    except:
        context.logger.debug(f'couldnt delete {context.predictions}')
    try:
        context.client.create('tsdb', context.predictions, attrs={'rate': '1/s'})
    except:
        context.logger.debug(f'couldnt create {context.predictions}')
    
    # define Model
    model = lgb.Booster(model_file=os.environ['MODEL_FILEPATH'])
    setattr(context, 'model', model)
    
    # vars
    setattr(context, 'k', int(os.environ['NUM_OF_PRODUCTS_TO_RETURN']))

### Format dataframe for prediction

In [12]:
def prepare_df(df):
    # Extract features col
    keep_cols = ['products']
    df = df.loc[:, keep_cols]
    
    # Create features df
    df = pd.read_json(df.values[0][0])
    return df

In [13]:
def handler(context, event):
    
    # Get user
    customer_id = event.body['id']
    store = event.body['store']
    context.logger.debug(f'Predicting for: {customer_id} in {store}')

    # Get user parameters
    df = context.client.read('kv', context.customers_table, filter=f'id=={customer_id}').reset_index()

    # Do we have features for the user?
    if not df.empty:
                             
        # Create features df for the user
        df = prepare_df(df)
                            
        # Predict
        df["prediction"] = context.model.predict(df)
                             
        # Eliminate low confidence results
        df = df.loc[df.prediction > 0.01, ['prediction', 'product_id']]

        # Keep top products
        df.sort_values(by='prediction', ascending=False, inplace=True)
        best_products = df.iloc[:context.k, :]
        best_products = best_products.reset_index()
        best_products = best_products.reset_index()
        best_products = best_products.rename(columns={'level_0': 'prediction_num'})
        best_products['customer_id'] = int(customer_id)
        best_products['store'] = store
        best_products['time'] = datetime.datetime.now()
        best_products = best_products.drop('index', axis=1)
        best_products = best_products.set_index(['time', 'store', 'prediction_num', 'product_id'])
        context.logger.debug(f'Predicted:\n{best_products}')
        
                            
        # Save results to DB
        context.client.write('tsdb', context.predictions, best_products)

In [14]:
# nuclio: ignore
init_context(context)

In [16]:
# nuclio: ignore
event = nuclio.Event(body={'id': '1232', 'store': '03311311313011021022'})
handler(context, event)

In [19]:
# nuclio: ignore
context.client.read('tsdb', context.predictions)

Unnamed: 0_level_0,customer_id,prediction,prediction_num,product_id,store
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-08-11 12:34:28.806,1232.0,0.195523,2,24852,3311311313011021022
2019-08-11 12:34:28.806,1232.0,0.188855,3,5884,3311311313011021022
2019-08-11 12:34:28.806,1232.0,0.307525,0,1940,3311311313011021022
2019-08-11 12:34:28.806,1232.0,0.200307,1,9387,3311311313011021022


In [18]:
%nuclio deploy -n prediction_server -p recommendation_engine -c

[nuclio.deploy] 2019-08-11 12:34:44,484 (info) Building processor image
[nuclio.deploy] 2019-08-11 12:34:48,553 (info) Pushing image
[nuclio.deploy] 2019-08-11 12:35:03,785 (info) Build complete
[nuclio.deploy] 2019-08-11 12:35:08,840 (info) Function deploy complete
[nuclio.deploy] 2019-08-11 12:35:08,847 done updating prediction-server, function address: 3.120.15.118:32692
%nuclio: function deployed
