# Nuclio - Generator

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Define environment variables

In [2]:
# nuclio: ignore
import os

os.environ['CUSTOMERS'] = os.path.join(os.getenv('V3IO_USERNAME', 'iguazio'), 'customers')
os.environ['DATA_STREAM'] = os.path.join(os.getenv('V3IO_USERNAME', 'iguazio'), 'customers_stream')

In [3]:
%%nuclio env

# Iguazio access
V3IO_FRAMESD=${V3IO_FRAMESD}
V3IO_USERNAME=${V3IO_USERNAME}
V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Function variables
BATCH_SIZE=100
DATA_STREAM=${DATA_STREAM}
CUSTOMERS=${CUSTOMERS}
london_locations="03311311313011311011000321002320,03311311311233323013031101320003,03311311313010023000032330133111,03311311311222300331010333220231"
ACCURACY=20
BASE_ACCURACY=14

%nuclio: setting 'V3IO_FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'BATCH_SIZE' environment variable
%nuclio: setting 'DATA_STREAM' environment variable
%nuclio: setting 'CUSTOMERS' environment variable
%nuclio: setting 'london_locations' environment variable
%nuclio: setting 'ACCURACY' environment variable
%nuclio: setting 'BASE_ACCURACY' environment variable


%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line


### Base image

In [4]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Set cron trigger

In [5]:
%nuclio config spec.triggers.secs.kind = "cron"
%nuclio config spec.triggers.secs.attributes.interval = "1m"

%nuclio: setting spec.triggers.secs.kind to 'cron'
%nuclio: setting spec.triggers.secs.attributes.interval to '1m'


### Install packages

When installing packages while working, Please **reset the kernel* to allow Jupyter to load the new packages.

In [6]:
%%nuclio cmd -c

# General
pip install pandas
# pip install json

# DB
pip install v3io_frames

# Function
pip install faker

## Function code

### Imports

In [7]:
import os
import itertools
import random
from datetime import datetime

# Data handling
import pandas as pd

# DB
import v3io_frames as v3f

# Function
import faker
from faker.providers import BaseProvider

### Helper classes definitions

In [8]:
class LocationProvider(BaseProvider):
    '''
    Creates locations within base_location

    Uses QuadTree for Geohashing
        @{http://tech.taskrabbit.com/blog/2015/06/09/elasticsearch-geohash-vs-geotree/}
        @{http://mapzen.github.io/leaflet-spatial-prefix-tree/}
        @{http://blog.notdot.net/2009/11/Damn-Cool-Algorithms-Spatial-indexing-with-Quadtrees-and-Hilbert-Curves}
    '''    
    def location(self, location_base: str, base_acc: int=10, acc: int=20):
        coordinates = location_base[:base_acc]
        for i in range(acc-len(coordinates)):
            coordinates += str(random.randint(0, 3))
        return coordinates

### Init context

In [9]:
def init_context(context):
    ##########
    # Setups #
    ##########
    
    # DB Contexts
    v3c_frames = v3f.Client('framesd:8081', container='users')
    setattr(context, 'v3f', v3c_frames)
    
    # DB Tables
    customers_table = os.getenv('CUSTOMERS', os.path.join(os.getenv('V3IO_USERNAME', 'iguazio'), 'customers'))
    setattr(context, 'customers_table', customers_table)
    
    customers_stream = os.getenv('CUSTOMERS_STREAM', os.path.join(os.getenv('V3IO_USERNAME', 'iguazio'), 'customers_stream'))
    setattr(context, 'customers_stream', customers_stream)
    
    # Function
    fakers = faker.Faker()
    fakers.add_provider(LocationProvider)
    setattr(context, 'faker', fakers)
    
    locations = list(os.getenv('london_locations', '').split(','))
    setattr(context, 'locations', locations)
    
    customers = v3c_frames.read('kv', customers_table)
    customers = range(customers.shape[0])
    setattr(context, 'customers', customers)
    
    location_accuracy_params = [int(os.getenv('BASE_ACCURACY', 14)), int(os.getenv('ACCURACY', 20))]
    setattr(context, 'location_accuracy_params', location_accuracy_params)
    
    ###########
    # Actions #
    ###########
    try:
        v3c_frames.delete('tsdb', customers_stream, if_missing=1)
    except:
        context.logger.debug('Couldnt delete customers stream')
    
    try:
        v3c_frames.create('tsdb', customers_stream, attrs={'rate':'1/s'})
    except:
        context.logger.debug('Couldnt create customers stream')

### Helper functions

In [10]:
def create_customer(context):
    customer_id = random.choice(context.customers)
    location = context.faker.location(random.choice(context.locations), *context.location_accuracy_params)
    customer = {
        'timestamp': datetime.now(),
        'id': customer_id,
        'location': location
    }
    return customer

In [11]:
def create_batch(context, batch_size: int):
    customers = (create_customer(context) for i in range(batch_size))
    customers = pd.DataFrame.from_records(itertools.chain(customers))
    customers = customers.set_index(['timestamp', 'id'])
    return customers

### Handler

In [12]:
def handler(context, event):
    # Create customers
    customers = create_batch(context, int(os.getenv('BATCH_SIZE', 100)))
    context.logger.debug(f'created {customers.count()} customers')
    context.logger.debug(f'example:\n{customers.head(1)}')
    
    # Send to stream
    context.v3f.write('tsdb', context.customers_stream, [customers])

In [13]:
# nuclio: ignore
init_context(context)

In [14]:
# nuclio: ignore
event = nuclio.Event(body='')
handler(context, event)

WriteError: error in _write: <_InactiveRpcError of RPC that terminated with:
	status = StatusCode.UNKNOWN
	details = "backend Write failed: failed to create adapter: No TSDB schema file found at 'v3io-webapi:8081/users/admin/customers_stream'."
	debug_error_string = "{"created":"@1578682828.977882999","description":"Error received from peer ipv4:10.233.51.149:8081","file":"src/core/lib/surface/call.cc","file_line":1056,"grpc_message":"backend Write failed: failed to create adapter: No TSDB schema file found at 'v3io-webapi:8081/users/admin/customers_stream'.","grpc_status":2}"
>

In [None]:
%nuclio deploy -n location_generator -p recommendation_engine -c