# Nuclio - Generator function

## Environment

In [2]:
# nuclio: ignore
import nuclio

### Configurations

In [3]:
%%nuclio config 

# Trigger
spec.triggers.secs.kind = "cron"
spec.triggers.secs.attributes.interval = "10s"

# Base image
spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.triggers.secs.kind to 'cron'
%nuclio: setting spec.triggers.secs.attributes.interval to '10s'
%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Mount / wget

In [4]:
import os

In [5]:
# nuclio: ignore
os.environ['configurations_path'] = os.path.join('~', 'netops', 'tutorials', 'demos', 'netops', 'configurations')

In [6]:
%nuclio mount /configurations ${configurations_path}

mounting volume path /configurations as ~/netops/tutorials/demos/netops/configurations


### Setups

In [None]:
%%nuclio cmd

# Utils
pip install pyarrow
pip install pyyaml --upgrade
pip install pandas
pip install pytimeparse

# Igz DB
pip install v3io_frames --upgrade

# Function
pip install -i https://test.pypi.org/simple/ v3io-generator
pip install faker

# Commands
mkdir /configurations

### Variables

In [304]:
%%nuclio env

# DB Config
V3IO_FRAMESD=${V3IO_FRAMESD}
V3IO_USERNAME=${V3IO_USERNAME}
V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Deployment
SAVE_DEPLOYMENT=0
DEPLOYMENT_TABLE=netops_devices

# Metrics
SAVE_TO=/v3io/bigdata/netops_metrics_parquet
INITIAL_TIMESTAMP=-1
SECS_TO_GENERATE=10

# Save as
SAVE_TO_TSDB=0

%nuclio: setting 'V3IO_FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'SAVE_DEPLOYMENT' environment variable
%nuclio: setting 'DEPLOYMENT_TABLE' environment variable
%nuclio: setting 'SAVE_TO' environment variable
%nuclio: setting 'INITIAL_TIMESTAMP' environment variable
%nuclio: setting 'SECS_TO_GENERATE' environment variable
%nuclio: setting 'SAVE_TO_TSDB' environment variable


%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line


In [222]:
%nuclio env -c METRICS_CONFIGURATION_FILEPATH=/configurations/metrics_configuration.yaml
%nuclio env -l METRICS_CONFIGURATION_FILEPATH=configurations/metrics_configuration.yaml

%nuclio: setting 'METRICS_CONFIGURATION_FILEPATH' environment variable


## Function

In [293]:
# import os # Already imported earlier
import time
import yaml
import pandas as pd
import datetime
import itertools

# DB Connection
import v3io_frames as v3f

# Data generator
from v3io_generator import metrics_generator, deployment_generator

### Helper functions

In [224]:
def _create_deployment():
    print('creating deployment')
    # Create meta-data factory
    dep_gen = deployment_generator.deployment_generator()
    faker=dep_gen.get_faker()

    # Design meta-data
    dep_gen.add_level(name='company',number=2,level_type=faker.company)
    dep_gen.add_level('data_center',number=2,level_type=faker.street_name)
    dep_gen.add_level('device',number=2,level_type=faker.msisdn)

    # Create meta-data
    deployment_df = dep_gen.generate_deployment()
    return deployment_df

In [225]:
def _is_deployment_exist(path):
    # Checking shared path for the devices table
    return os.path.exists(f'/v3io/bigdata/{path}')

In [226]:
def _get_deployment_from_kv(client, path):
    print(f'Retrieving deployment from {path}')
    context.logger.debug(f'Retrieving deployment from {path}')
    # Read the devices table from our KV store
    deployment_df = client.read(backend='kv', table=path)
    
    # Reset index to column
    deployment_df.index.name = 'device'
    deployment_df = deployment_df.reset_index()
    return deployment_df

In [227]:
def _save_deployment_to_kv(path, df, client=v3f.Client('framesd:8081')):
    # Save deployment to our KV store
    client.write(backend='kv', table='netops_devices',dfs=df, index_cols=['device'])

In [228]:
def get_or_create_deployment(path, save_to_cloud=False, client=None):
    if client and _is_deployment_exist(path):
        # Get deployment from KV
        deployment_df = _get_deployment_from_kv(client, path)
    else:
        # Create deployment
        deployment_df = _create_deployment()
        
        if client and save_to_cloud:
            _save_deployment_to_kv(path, deployment_df, client)

    return deployment_df

In [250]:
def set_indexes(df):
    df = df.set_index(['timestamp', 'company', 'data_center', 'device'])
    return df

In [251]:
def save_metrics_to_tsdb(context, metrics: pd.DataFrame):
    print('Saving metrics to TSDB')
    
    # Fix indexes
    metrics= set_indexes(metrics)
    
    context.v3f.write('tsdb', context.metrics_table, metrics)

In [299]:
def save_metrics_to_parquet(context, metrics: pd.DataFrame):
    print('Saving metrics to Parquet')
    
    # Need to fix timestamps from ns to ms if we write to parquet
    print(metrics.loc[:1, 'timestamp'])
#     metrics['timestamp'] = pd.to_datetime(metrics.loc[:, 'timestamp'], unit='D')
    metrics['timestamp'] = metrics.loc[:, 'timestamp'].astype('datetime64[ms]')
    print(metrics.loc[:1, 'timestamp'])
    
    # Fix indexes
    metrics= set_indexes(metrics)
    
    # Save parquet
    filepath = os.path.join(context.metrics_table, time.strftime('%Y%m%dT%H%M%S') + '.parquet')
    with open(filepath, 'wb+') as f:
        metrics.to_parquet(f)

In [300]:
def is_deployment_initialized(context):
    return hasattr(context, 'metric_generator')

### Init context

In [305]:
def init_context(context):
    
    # Get saving configuration
    save_to_tsdb = os.getenv('SAVE_TO_TSDB', 1) == 1
    print(f'SAVE_TO_TSDB: {os.environ["SAVE_TO_TSDB"]}-{type(save_to_tsdb)}-{save_to_tsdb}')
    
    # Set metrics table
    metrics_table = os.getenv('SAVE_TO', 'netops_metrics')
    setattr(context, 'metrics_table', metrics_table) 

    # TSDB Based demo
    if save_to_tsdb:
        print('Saving to TSDB-')
        # Create our DB client
        client = v3f.Client(address='http://' + os.getenv('V3IO_FRAMESD', 'framesd:8081'), 
                        container='bigdata', 
                        password=os.environ['V3IO_ACCESS_KEY'], 
                        user=os.environ['V3IO_USERNAME'])
        
        # Create TSDB table if needed
        client.create('tsdb', metrics_table, attrs={'rate': '1/s'}, if_exists=1)
        
        # Set saving fucntion
        setattr(context, 'write', save_metrics_to_tsdb)
    
    else:
        print('Saving to Parquet-')
        # Set empty client for verification purposes
        client = None
          
        # Create saving directory
        filepath = os.path.join(metrics_table)
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        
        # Set saving fucntion
        setattr(context, 'write', save_metrics_to_parquet)
    
          
    # Set batch endtime
    secs_to_generate = os.getenv('SECS_TO_GENERATE', 10)
    setattr(context, 'secs_to_generate', secs_to_generate)
    
     
    
    # Generate or create deployment
    deployment_df = get_or_create_deployment(os.environ['DEPLOYMENT_TABLE'], os.environ['SAVE_DEPLOYMENT'], client)
    
    deployment_df['cpu_utilization'] = 70
    deployment_df['latency'] = 0
    deployment_df['packet_loss'] = 0
    deployment_df['throughput'] = 290
    deployment_df.head()
    
    # Get metrics configuration
    with open(os.getenv('METRICS_CONFIGURATION_FILEPATH', '/configurations/metrics_configuration.yaml'), 'r') as f:
        metrics_configuration = yaml.load(f)
        
    # Create metrics generator
    initial_timestamp = int(os.getenv('INITIAL_TIMESTAMP', time.time()))
    met_gen = metrics_generator.Generator_df(metrics_configuration, 
                                             user_hierarchy=deployment_df, 
                                             initial_timestamp=initial_timestamp)
    setattr(context, 'metric_generator', met_gen)
    
    # Set client
    setattr(context, 'v3f', client)


### Handler

In [306]:
def handler(context, event):
       
    # Create metrics generator based on YAML configuration and deployment
    metrics = context.metric_generator.generate_range(start_time=datetime.datetime.now(),
                                     end_time=datetime.datetime.now()+datetime.timedelta(seconds=int(context.secs_to_generate)),
                                     as_df=True,
                                     as_iterator=False)
    
        
    # Save Generated metrics
    context.write(context, metrics)

## Test

In [307]:
# nuclio: ignore
init_context(context)
event = nuclio.Event(body='')
output = handler(context, event)
output

SAVE_TO_TSDB: 0-<class 'bool'>-False
Saving to Parquet-
creating deployment
Saving metrics to Parquet
0   2019-05-14 07:40:33.851568
1   2019-05-14 07:40:33.851568
Name: timestamp, dtype: datetime64[ns]
0   2019-05-14 07:40:33.851
1   2019-05-14 07:40:33.851
Name: timestamp, dtype: datetime64[ns]




# Deploy

In [311]:
%nuclio deploy -p netops -n generator -c

%nuclio: ['deploy', '-p', 'netops', '-n', 'generator', '-c', '/User/netops/tutorials/demos/netops/Nuclio-Generator.ipynb']
%nuclio: [nuclio.deploy] 2019-05-14 07:42:08,676 (info) Building processor image
%nuclio: [nuclio.deploy] 2019-05-14 07:42:32,926 (info) Pushing image
%nuclio: [nuclio.deploy] 2019-05-14 07:42:36,968 (info) Build complete
%nuclio: [nuclio.deploy] 2019-05-14 07:43:09,314 (warn) Create function failed failed, setting function status
%nuclio: [nuclio.deploy] 2019-05-14 07:43:09,314
%nuclio: Error - NuclioFunction in error state (
%nuclio: Error - context deadline exceeded
%nuclio:     .../platform/kube/controller/nucliofunction.go:122
%nuclio: 
%nuclio: Call stack:
%nuclio: Failed to wait for function resources to be available
%nuclio:     .../platform/kube/controller/nucliofunction.go:122
%nuclio: )
%nuclio:     .../nuclio/nuclio/pkg/platform/kube/deployer.go:185
%nuclio: 
%nuclio: Call stack:
%nuclio: NuclioFunction in error state (
%nuclio: Error - context deadline

%nuclio: cannot deploy
error: cannot deploy
