# Streaming enrichment Example

This example creates and deploys a Nuclio function which is triggered by incoming event-messages to a V3IO-Stream.
The function enrich the original event-message with data from V3IO-KV table,
and writes the enriched message to an output V3IO-Stream.


### Parameter definition

In [8]:
import nuclio
import os
import requests

NUCLIO_PROJ_NAME = 'examples'
NUCLIO_FUNC_NAME = 'enrich-stream'

CONTAINER_NAME = 'users'
TABLE_NAME = 'cars'
TABLE_KEY = 'CarID'
OUTPUT_STREAM_NAME = 'stream2'
V3IO_API = os.getenv('V3IO_API')
V3IO_ACCESS_KEY = os.environ['V3IO_ACCESS_KEY']
V3IO_USERNAME = os.getenv('V3IO_USERNAME')
V3IO_PASSWORD = 'data123'

INPUT_STREAM_NAME = 'stream1'
INPUT_STREAM_SEARCH_KEY = 'CarID'
INPUT_STREAM_URL = f'http://{V3IO_API}/{CONTAINER_NAME}/{INPUT_STREAM_NAME}/'
INPUT_STREAM_PARTITIONS = [0, 1, 2]
INPUT_STREAM_NUM_CONTAINER_WORKERS = 0
INPUT_STREAM_SEEK_TO = 'earliest'
INPUT_STREAM_READ_BATCH_SIZE = 64
INPUT_STREAM_POLLING_INTERVAL_MS = 250



### Define a list of environment variable to be set for our Nuclio-function

In [9]:
NUCLIO_ENV = [
   f'V3IO_API={V3IO_API}',
   f'CONTAINER_NAME={CONTAINER_NAME}',
   f'TABLE_NAME={TABLE_NAME}',
   f'TABLE_KEY={TABLE_KEY}',
   f'INPUT_STREAM_NAME={INPUT_STREAM_NAME}',
   f'INPUT_STREAM_SEARCH_KEY={INPUT_STREAM_SEARCH_KEY}',
   f'OUTPUT_STREAM_NAME={OUTPUT_STREAM_NAME}',
   f'V3IO_ACCESS_KEY={V3IO_ACCESS_KEY}'
]

### Define trigger configuration for our Nuclio-function

In [10]:
NUCLIO_TRIGGER_CONF = {
    'spec.triggers':{
        INPUT_STREAM_NAME: {
            'kind': 'v3ioStream',
            'url': INPUT_STREAM_URL,
            'username': V3IO_USERNAME,
            'password': V3IO_PASSWORD,
            'attributes': {
                'partitions': INPUT_STREAM_PARTITIONS,
                'numContainerWorkers': INPUT_STREAM_NUM_CONTAINER_WORKERS,
                'seekTo': INPUT_STREAM_SEEK_TO,
                'readBatchSize': INPUT_STREAM_READ_BATCH_SIZE,
                'pollingIntervalMs': INPUT_STREAM_POLLING_INTERVAL_MS,
                }
            }
    }
}

### Define build commands for our Nuclio-function

In [11]:
NUCLIO_CMD = 'pip install requests'

### Define the Nuclio-function code

In [12]:
NUCLIO_CODE = '''
import requests
import json
import base64
import os


def init_context(context):
    # env -> config
    setattr(context.user_data, 'config', {
        'v3io_api': os.environ['V3IO_API'],
        'container_name': os.environ['CONTAINER_NAME'],
        'table': {
            'name': os.environ['TABLE_NAME'],
            'key': os.environ['TABLE_KEY']
        },
        'input_stream': {
            'name': os.environ['INPUT_STREAM_NAME'],
            'search_key': os.environ['INPUT_STREAM_SEARCH_KEY']
        },
        'output_stream_name': os.environ['OUTPUT_STREAM_NAME'],
        'v3io_access_key': os.environ['V3IO_ACCESS_KEY'],
    })


def handler(context, event):
    config = context.user_data.config
    event_body = event.body
    context.logger.debug(f'Incoming event body: {event_body}')    
    
    msg = json.loads(event_body.decode())
    search_value = msg[config['input_stream']['search_key']]
    search_key = config['table']['key']
    context.logger.debug(f'Search key: {search_key} for value: {search_value}')    
    
    enrichment_data = _search_kv(search_key, search_value, config)
    context.logger.debug(f'Search result: {enrichment_data}')    
    
    msg['enrichment'] = enrichment_data
    context.logger.debug(f'Output message: {msg}')    
    context.logger.debug(_put_records([msg], config))



def _get_url(v3io_api, container_name, collection_name):
    return f'http://{v3io_api}/{container_name}/{collection_name}/'


def _get_headers(v3io_function, v3io_access_key):
    return {
        'Content-Type': "application/json",
        'X-v3io-function': v3io_function,
        'cache-control': "no-cache",
        'x-v3io-session-key': v3io_access_key
    }


def _search_kv(key, value, config):
    v3io_api = config['v3io_api']
    container_name = config['container_name']
    table_name = config['table']['name']
    v3io_access_key = config['v3io_access_key']

    url = _get_url(v3io_api, container_name, table_name)
    payload = {"Key": {key: {"S": value}}}
    headers = _get_headers("GetItem", v3io_access_key)

    json_response = json.loads(requests.request("POST", url, json=payload, headers=headers).text)

    response = {}
    if 'Item' in json_response:
        response = json_response['Item']

    return response


def _put_records(items, config):
    v3io_api = config['v3io_api']
    container_name = config['container_name']
    output_stream_name = config['output_stream_name']
    v3io_access_key = config['v3io_access_key']

    records = _items_to_records(items)
    url = _get_url(v3io_api, container_name, output_stream_name)
    headers = _get_headers("PutRecords", v3io_access_key)

    return requests.request("POST", url, json=records, headers=headers)


def _item_to_b64(item):
    item_string = json.dumps(item)
    return base64.b64encode(item_string.encode('utf-8')).decode('utf-8')


def _items_to_records(items):
    return {'Records': [{'Data': _item_to_b64(item)} for item in items]}
'''

### Deploy the code

In [None]:
addr = nuclio.deploy_code(code=NUCLIO_CODE,name=NUCLIO_FUNC_NAME,project=NUCLIO_PROJ_NAME,verbose=True, create_new=True, env=NUCLIO_ENV, config=NUCLIO_TRIGGER_CONF, cmd=NUCLIO_CMD)

# invoke the generated function 
#resp = requests.get(addr)
#print(resp.text)