# 0. Setup

Create an MLRun project and the streams relevant for our scenario.

![Model deployment with streaming Real-time operational Pipeline](../../assets/images/model-deployment-with-streaming.png)

### Load configurations

In [1]:
from os import getenv, path
from urllib.parse import urljoin

# Project base name
PROJECT_BASE_NAME = "model-deployment-with-streaming"
V3IO_USERNAME = getenv('V3IO_USERNAME')

Data path where to store stream data and kv tables

In [2]:
DATA_PATH = path.join(V3IO_USERNAME, 'examples',PROJECT_BASE_NAME, 'data')

All data in the platform is stored in user-defined data containers. In this case we use the predefined "users" container. For more information refer to [Data containers, collections, and objects documentation](https://www.iguazio.com/docs/latest-release/concepts/containers-collections-objects)

In [3]:
CONTAINER = 'users'

Set up the different stream information

In [4]:
WEB_API = "http://v3io-webapi:8081"
WEB_API_USERS = urljoin(WEB_API, CONTAINER)
STREAM_CONFIGS = {'generated-stream': {
                        'path': path.join(DATA_PATH, 'generated-stream'),
                        'shard_count': 8},
                  'incoming-events-stream': {
                        'path': path.join(DATA_PATH, 'incoming-events-stream'),
                        'shard_count': 8
                  },
                  'enriched-events-stream': {
                        'path': path.join(DATA_PATH, 'enriched-events-stream'),
                        'shard_count': 8
                  },
                  'serving-stream': {
                        'path': path.join(DATA_PATH, 'serving-stream'),
                        'shard_count': 8
                  },
                  'inference-stream': {
                        'path': path.join(DATA_PATH, 'inference-stream'),
                        'shard_count': 8
                  }
                 }

When we stream data, we associate the records with a specific partition key to ensure that similar records are assigned to the same shard. For more information, see the [stream sharding and partitioning description](https://www.iguazio.com/docs/latest-release/concepts/streams/#stream-sharding-and-partitioning).

In [5]:
PARTITION_ATTR = "user_id"

Target path to store the raw data as parquet files

In [6]:
PARQUET_TARGET_PATH = path.join(DATA_PATH.replace(V3IO_USERNAME, '/User'),  'events-pq')

Target path to store the enrichment table (a key-value table)

In [7]:
ENRICHMENT_TABLE_PATH = path.join(DATA_PATH, 'enrichment-table')

Target path to store the calculated features

In [8]:
FEATURE_TABLE_PATH = path.join(DATA_PATH, 'feature-table')

## Create V3IO Client

In [9]:
import v3io.dataplane

In [10]:
v3io_client = v3io.dataplane.Client(endpoint='http://v3io-webapi:8081')

## Manage Streams

#### Delete all streams

In [11]:
for stream_name, stream_config in STREAM_CONFIGS.items():
    resp = v3io_client.delete_stream(container=CONTAINER, path=stream_config['path'], 
                                     raise_for_status=v3io.dataplane.RaiseForStatus.never)
    print(f'Delete Stream call for stream {stream_name} returned with status {resp.status_code}, and content: {resp.body.decode("utf-8")}')

Delete Stream call for stream generated-stream returned with status 204, and content: 
Delete Stream call for stream incoming-events-stream returned with status 204, and content: 
Delete Stream call for stream enriched-events-stream returned with status 204, and content: 
Delete Stream call for stream serving-stream returned with status 204, and content: 
Delete Stream call for stream inference-stream returned with status 204, and content: 


#### Create all streams

In [12]:
for stream_name, stream_config in STREAM_CONFIGS.items():
    print(stream_config['path'])
    resp = v3io_client.create_stream(container=CONTAINER,
                                     path=stream_config['path'],
                                     shard_count=stream_config['shard_count'],
                                    raise_for_status=v3io.dataplane.RaiseForStatus.never)
    print(f'Create Stream call for stream {stream_name} returned with status {resp.status_code}, and content: {resp.body.decode("utf-8")}')

iguazio/examples/model-deployment-with-streaming/data/generated-stream
Create Stream call for stream generated-stream returned with status 204, and content: 
iguazio/examples/model-deployment-with-streaming/data/incoming-events-stream
Create Stream call for stream incoming-events-stream returned with status 204, and content: 
iguazio/examples/model-deployment-with-streaming/data/enriched-events-stream
Create Stream call for stream enriched-events-stream returned with status 204, and content: 
iguazio/examples/model-deployment-with-streaming/data/serving-stream
Create Stream call for stream serving-stream returned with status 204, and content: 
iguazio/examples/model-deployment-with-streaming/data/inference-stream
Create Stream call for stream inference-stream returned with status 204, and content: 


## Set-up MLRun Project

In [13]:
from mlrun import new_project

project_name = '-'.join(filter(None, [PROJECT_BASE_NAME, getenv('V3IO_USERNAME', None)]))
project_path = path.abspath('conf')
project = new_project(project_name, project_path, init_git=True)

print(f'Project path: {project_path}\nProject name: {project_name}')

Project path: /User/work/tutorials/demos/model-deployment-with-streaming/conf
Project name: model-deployment-with-streaming-iguazio


In [14]:
from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io

# Target location for storing pipeline artifacts
project.artifact_path = path.abspath('artifacts')
# MLRun DB path or API service URL
mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'

print(f'Artifacts path: {project.artifact_path}\nMLRun DB path: {mlconf.dbpath}')

Artifacts path: /User/work/tutorials/demos/model-deployment-with-streaming/artifacts
MLRun DB path: http://mlrun-api:8080


In [15]:
project.params['PROJECT_BASE_NAME'] = PROJECT_BASE_NAME
project.params['STREAM_CONFIGS'] = STREAM_CONFIGS
project.params['CONTAINER'] = CONTAINER
project.params['WEB_API'] = WEB_API
project.params['WEB_API_USERS'] = WEB_API_USERS
project.params['PARTITION_ATTR'] = PARTITION_ATTR
project.params['PARQUET_TARGET_PATH'] = PARQUET_TARGET_PATH
project.params['ENRICHMENT_TABLE_PATH'] = ENRICHMENT_TABLE_PATH
project.params['FEATURE_TABLE_PATH'] = FEATURE_TABLE_PATH

In [16]:
from IPython.display import display, Markdown, JSON
display(Markdown('### Shared configuration:'), JSON(project.params, expanded=True))

### Shared configuration:

<IPython.core.display.JSON object>

### Save the Project

In [17]:
project.save()