# Generate simulated infrastructure telemetry 

In [1]:
# Install requiered packages if needed (only once)
!pip install pytimeparse
!pip install -i https://test.pypi.org/simple/ v3io-generator --upgrade
!pip install faker
!pip install pyarrow --upgrade

Looking in indexes: https://test.pypi.org/simple/
Requirement already up-to-date: v3io-generator in /User/.pythonlibs/lib/python3.6/site-packages (0.0.27.dev0)
Requirement already up-to-date: pyarrow in /User/.pythonlibs/lib/python3.6/site-packages (0.13.0)


In [2]:
import os
import time
import yaml
import pandas as pd
import datetime
import itertools

# DB Connection
import v3io_frames as v3f

# Data generator
from v3io_generator import metrics_generator, deployment_generator

General definitions

In [3]:
%env SAVE_TO_KV = True                 
%env DEPLOYMENT_TABLE = netops_devices 

env: SAVE_TO_KV=True
env: DEPLOYMENT_TABLE=netops_devices


## Create Metadata
the following section will create a list of devices which are scattered in multiple datacenters

In [4]:
def _create_deployment():
    print('creating deployment')
    # Create meta-data factory
    dep_gen = deployment_generator.deployment_generator()
    faker=dep_gen.get_faker()

    # Design meta-data
    dep_gen.add_level(name='company',number=2,level_type=faker.company)
    dep_gen.add_level('data_center',number=2,level_type=faker.street_name)
    dep_gen.add_level('device',number=2,level_type=faker.msisdn)

    # Create meta-data
    deployment_df = dep_gen.generate_deployment()
    return deployment_df

In [5]:
def _is_deployment_exist(path):
    # Checking shared path for the devices table
    return os.path.exists(f'/v3io/bigdata/{path}')

In [6]:
def _get_deployment_from_kv(path):
    print(f'Retrieving deployment from {path}')
    # Read the devices table from our KV store
    deployment_df = client.read(backend='kv', table=path)
    
    # Reset index to column
    deployment_df.index.name = 'device'
    deployment_df = deployment_df.reset_index()
    return deployment_df

In [7]:
def _save_deployment_to_kv(path, df, client=v3f.Client('framesd:8081')):
    # Save deployment to our KV store
    client.write(backend='kv', table='netops_devices',dfs=df, index_cols=['device'])

In [8]:
def get_or_create_deployment(path, save_to_cloud=False, client=v3f.Client('framesd:8081')):
    if _is_deployment_exist(path):
        # Get deployment from KV
        deployment_df = _get_deployment_from_kv(path)
    else:
        # Create deployment
        deployment_df = _create_deployment()
        
        if save_to_cloud:
            _save_deployment_to_kv(path, deployment_df, client)

    return deployment_df

In [9]:
# Create our DB client
client = v3f.Client('framesd:8081')

In [10]:
deployment_df = get_or_create_deployment(os.environ['DEPLOYMENT_TABLE'], os.environ['SAVE_TO_KV'])
deployment_df

Retrieving deployment from netops_devices


Unnamed: 0,device,company,data_center
0,1416256267840,Washington-Lambert,Tamara_Greens
1,9909218583192,Ortega__Nelson_and_Parrish,Gallagher_Springs
2,9999032632997,Ortega__Nelson_and_Parrish,Chen_Rest
3,243526550889,Washington-Lambert,Nicole_Trail
4,2588823083436,Ortega__Nelson_and_Parrish,Chen_Rest
5,6842603445464,Ortega__Nelson_and_Parrish,Gallagher_Springs
6,9025508674926,Washington-Lambert,Tamara_Greens
7,5079628775825,Washington-Lambert,Nicole_Trail


Read from our KV to make sure we have backup

In [11]:
# verify the table is written 
client.read(backend='kv', table='netops_devices')

Unnamed: 0_level_0,company,data_center
device,Unnamed: 1_level_1,Unnamed: 2_level_1
6842603445464,Ortega__Nelson_and_Parrish,Gallagher_Springs
1416256267840,Washington-Lambert,Tamara_Greens
9909218583192,Ortega__Nelson_and_Parrish,Gallagher_Springs
9999032632997,Ortega__Nelson_and_Parrish,Chen_Rest
243526550889,Washington-Lambert,Nicole_Trail
2588823083436,Ortega__Nelson_and_Parrish,Chen_Rest
9025508674926,Washington-Lambert,Tamara_Greens
5079628775825,Washington-Lambert,Nicole_Trail


## Add initial values

In [12]:
deployment_df['cpu_utilization'] = 70
deployment_df['latency'] = 0
deployment_df['packet_loss'] = 0
deployment_df['throughput'] = 290
deployment_df.head()

Unnamed: 0,device,company,data_center,cpu_utilization,latency,packet_loss,throughput
0,1416256267840,Washington-Lambert,Tamara_Greens,70,0,0,290
1,9909218583192,Ortega__Nelson_and_Parrish,Gallagher_Springs,70,0,0,290
2,9999032632997,Ortega__Nelson_and_Parrish,Chen_Rest,70,0,0,290
3,243526550889,Washington-Lambert,Nicole_Trail,70,0,0,290
4,2588823083436,Ortega__Nelson_and_Parrish,Chen_Rest,70,0,0,290


## Generate simulated metrics per device
Metrics schema (describe simulated values) is read from `metrics_configuration.yaml`

In [13]:
# Load metrics configuration from YAML file
with open('configurations/metrics_configuration.yaml', 'r') as f:
    metrics_configuration = yaml.load(f)

# Create metrics generator based on YAML configuration
met_gen = metrics_generator.Generator_df(metrics_configuration, user_hierarchy=deployment_df, initial_timestamp=time.time())
metrics = met_gen.generate_range(start_time=datetime.datetime.now(),
                                 end_time=datetime.datetime.now()+datetime.timedelta(hours=1),
                                 as_df=True,
                                 as_iterator=True)

  This is separate from the ipykernel package so we can avoid doing imports until


In [14]:
df = pd.concat(itertools.chain(metrics))
df.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,cpu_utilization_is_error,latency,latency_is_error,packet_loss,packet_loss_is_error,throughput,throughput_is_error,is_error
timestamp,data_center,company,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-06-17 14:39:24.404731,Tamara_Greens,Washington-Lambert,1416256267840,68.340178,False,4.736444,False,3.086756,False,255.270204,False,False
2019-06-17 14:39:24.404731,Gallagher_Springs,Ortega__Nelson_and_Parrish,9909218583192,79.825575,False,5.369881,False,0.0,False,244.680344,False,False
2019-06-17 14:39:24.404731,Chen_Rest,Ortega__Nelson_and_Parrish,9999032632997,74.625916,False,0.0,False,0.8105,False,229.439649,False,False
2019-06-17 14:39:24.404731,Nicole_Trail,Washington-Lambert,243526550889,64.806974,False,0.0,False,1.177091,False,240.797342,False,False
2019-06-17 14:39:24.404731,Chen_Rest,Ortega__Nelson_and_Parrish,2588823083436,57.56172,False,2.732402,False,3.374598,False,238.602076,False,False


## Save to Iguazio Time-series Database

In [15]:
# uncomment the line below if you want to reset the TSDB table 
client.delete(backend='tsdb', table='netops_metrics_jupyter')

In [16]:
# create a new table, need to specify estimated sample rate
client.create(backend='tsdb', table='netops_metrics_jupyter', attrs={'rate': '1/m'})

In [17]:
# write the dataframe into the time-seried DB, note the company,data_center,device indexes are automatically converted to search optimized labels
client.write(backend='tsdb', table='netops_metrics_jupyter', dfs=df)

## Verify that the data was written

In [18]:
client.read(backend='tsdb', query='select avg(cpu_utilization), avg(latency) , avg(packet_loss) , avg(throughput)  from netops_metrics_jupyter group by company, data_center, device',
            start="now-1d", end='now+1d', multi_index=True, step='5m').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,avg(cpu_utilization),avg(latency),avg(packet_loss),avg(throughput)
time,company,data_center,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-06-17 14:34:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,68.078416,1.423847,0.897193,244.903743
2019-06-17 14:39:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,68.533801,2.367653,0.84491,246.186991
2019-06-17 14:44:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,70.377798,2.096841,0.455499,252.081668
2019-06-17 14:49:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,87.306579,47.314863,21.452884,159.983865
2019-06-17 14:54:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,67.573655,2.729573,0.856969,250.914966
2019-06-17 14:59:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,69.896618,1.819913,0.666666,251.590818
2019-06-17 15:04:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,69.737311,2.025129,0.734578,249.674377
2019-06-17 15:09:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,72.079085,1.60698,0.915787,255.811956
2019-06-17 15:14:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,72.180327,1.475628,1.772953,252.170768
2019-06-17 15:19:46,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,84.834458,45.07605,25.396808,166.740806


### Save the generated dataset to parquet for future reproducability 

In [19]:
# craete directory if doesnt exist 
!mkdir data

mkdir: cannot create directory ‘data’: File exists


In [20]:
import pyarrow as pa
from pyarrow import parquet as pq

In [21]:
#write the dataframe into a parquet (on iguazio file system)
version = '1.0'
filepath = 'data/netops_metrics.v{}.parquet'.format(version)
pq.write_table(pa.Table.from_pandas(df), filepath)

### Reading the data from parquet into the time-series DB
if we want to reproduce the same results we can rebuild the TSDB from the saved parquet file

In [22]:
# uncomment the line below if you want to reset the TSDB table 
client.delete(backend='tsdb', table='netops_metrics_jupyter')
client.create(backend='tsdb', table='netops_metrics_jupyter', attrs={'rate': '1/m'})

In [23]:
# read the parquet into memory and print the head 
pqdf = pq.read_table(filepath).to_pandas()
pqdf.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,cpu_utilization_is_error,latency,latency_is_error,packet_loss,packet_loss_is_error,throughput,throughput_is_error,is_error
timestamp,data_center,company,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-06-17 14:39:24.404731,Tamara_Greens,Washington-Lambert,1416256267840,68.340178,False,4.736444,False,3.086756,False,255.270204,False,False
2019-06-17 14:39:24.404731,Gallagher_Springs,Ortega__Nelson_and_Parrish,9909218583192,79.825575,False,5.369881,False,0.0,False,244.680344,False,False
2019-06-17 14:39:24.404731,Chen_Rest,Ortega__Nelson_and_Parrish,9999032632997,74.625916,False,0.0,False,0.8105,False,229.439649,False,False
2019-06-17 14:39:24.404731,Nicole_Trail,Washington-Lambert,243526550889,64.806974,False,0.0,False,1.177091,False,240.797342,False,False
2019-06-17 14:39:24.404731,Chen_Rest,Ortega__Nelson_and_Parrish,2588823083436,57.56172,False,2.732402,False,3.374598,False,238.602076,False,False


In [24]:
# write the dataframe into the time-seried DB, uncomment the line below
client.write(backend='tsdb', table='netops_metrics_jupyter', dfs=pqdf)

In [25]:
# verify the table is written 
client.read(backend='tsdb', query='select avg(cpu_utilization) , avg(latency) , avg(packet_loss) , avg(throughput)  from netops_metrics_jupyter group by company, data_center, device',
            start="now-1d", end='now+1d', multi_index=True, step='5m').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,avg(cpu_utilization),avg(latency),avg(packet_loss),avg(throughput)
time,company,data_center,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-06-17 14:34:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,67.675289,1.139078,0.797778,239.154164
2019-06-17 14:39:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,68.178614,2.367653,0.857688,246.453351
2019-06-17 14:44:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,70.86944,2.096841,0.440598,253.142512
2019-06-17 14:49:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,86.95739,47.429515,21.464396,159.796418
2019-06-17 14:54:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,67.901543,2.638625,0.839577,249.824
2019-06-17 14:59:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,70.016103,1.796209,0.666666,252.405314
2019-06-17 15:04:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,69.325575,2.14181,0.734578,249.794588
2019-06-17 15:09:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,72.047269,1.490299,0.915787,255.727727
2019-06-17 15:14:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,72.510986,1.475628,1.772953,251.61278
2019-06-17 15:19:54,Ortega__Nelson_and_Parrish,Chen_Rest,9999032632997,85.081636,45.07605,25.412336,166.853232
