# Generate simulated infrastructure telemetry 

In [None]:
# Install requiered packages if needed (only once)
!pip install pytimeparse
!pip install -i https://test.pypi.org/simple/ v3io-generator --upgrade
!pip install faker

In [1]:
import os
import pandas as pd
import time
import v3io_frames as v3f
import yaml
from v3io_generator import metrics_generator, deployment_generator

## Create Metadata
the following section will create a list of devices which are scattered in multiple datacenters

In [2]:
# Create meta-data factory
dep_gen = deployment_generator.deployment_generator()
faker=dep_gen.get_faker()

# Design meta-data
dep_gen.add_level(name='company',number=2,level_type=faker.company)
dep_gen.add_level('data_center',number=2,level_type=faker.street_name)
dep_gen.add_level('device',number=2,level_type=faker.msisdn)

# Create meta-data
deployment_df = dep_gen.generate_deployment()

Write the device list to a key/value table

In [3]:
client = v3f.Client('framesd:8081')

In [4]:
# uncomment the line below if you want to reset the key/value table 
#client.delete(backend='kv', table='netops_devices')

In [5]:
client.write(backend='kv', table='netops_devices',dfs=deployment_df, index_cols=['device'])

In [6]:
# verify the table is written 
client.read(backend='kv', table='netops_devices')

Unnamed: 0_level_0,company,data_center
__name,Unnamed: 1_level_1,Unnamed: 2_level_1
5728297877286,Boyd-Faulkner,Barrett_Crossing
8566530842848,Boyd-Faulkner,Barrett_Crossing
7542919702594,Boyd-Faulkner,Nancy_Mall
1184469177528,Wheeler__Shelton_and_Hill,Dean_Park
3916108545629,Boyd-Faulkner,Nancy_Mall
2368397418989,Wheeler__Shelton_and_Hill,Dean_Park
847959813128,Wheeler__Shelton_and_Hill,Cindy_Rapids
4963294562966,Wheeler__Shelton_and_Hill,Cindy_Rapids


## Add initial values

In [7]:
deployment_df['cpu_utilization'] = 70
deployment_df['latency'] = 0
deployment_df['packet_loss'] = 0
deployment_df['throughput'] = 290
deployment_df.head()

Unnamed: 0,company,data_center,device,cpu_utilization,latency,packet_loss,throughput
0,Wheeler__Shelton_and_Hill,Dean_Park,2368397418989,70,0,0,290
1,Wheeler__Shelton_and_Hill,Dean_Park,1184469177528,70,0,0,290
2,Wheeler__Shelton_and_Hill,Cindy_Rapids,847959813128,70,0,0,290
3,Wheeler__Shelton_and_Hill,Cindy_Rapids,4963294562966,70,0,0,290
4,Boyd-Faulkner,Barrett_Crossing,8566530842848,70,0,0,290


## Generate simulated metrics per device
Metrics schema (describe simulated values) is read from `metrics_configuration.yaml`

In [8]:
# Load metrics configuration from YAML file
with open('metrics_configuration.yaml', 'r') as f:
    metrics_configuration = yaml.load(f)

# Create metrics generator based on YAML configuration
met_gen = metrics_generator.Generator_df(metrics_configuration, user_hierarchy=deployment_df, initial_timestamp=time.time())
metrics = met_gen.generate()


In [9]:
num_samples = 1000
df = pd.concat([next(metrics) for i in range(num_samples)])
df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,cpu_utilization_is_error,latency,latency_is_error,packet_loss,packet_loss_is_error,throughput,throughput_is_error,is_error
timestamp,data_center,device,company,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-03-05 14:38:47.338574,Dean_Park,2368397418989,Wheeler__Shelton_and_Hill,62.7,False,0.0,False,0.0,False,222.27,False,False
2019-03-05 14:38:47.338574,Dean_Park,1184469177528,Wheeler__Shelton_and_Hill,67.19,False,2.13,False,1.0,False,242.92,False,False
2019-03-05 14:38:47.338574,Cindy_Rapids,847959813128,Wheeler__Shelton_and_Hill,73.02,False,0.4,False,0.0,False,222.61,False,False
2019-03-05 14:38:47.338574,Cindy_Rapids,4963294562966,Wheeler__Shelton_and_Hill,61.16,False,0.0,False,0.0,False,265.66,False,False
2019-03-05 14:38:47.338574,Barrett_Crossing,8566530842848,Boyd-Faulkner,78.61,False,4.87,False,0.0,False,289.78,False,False
2019-03-05 14:38:47.338574,Barrett_Crossing,5728297877286,Boyd-Faulkner,85.61,False,0.0,False,0.0,False,258.25,False,False
2019-03-05 14:38:47.338574,Nancy_Mall,3916108545629,Boyd-Faulkner,55.59,False,9.96,False,0.0,False,257.54,False,False
2019-03-05 14:38:47.338574,Nancy_Mall,7542919702594,Boyd-Faulkner,66.63,False,0.0,False,1.0,False,268.52,False,False
2019-03-05 14:38:53.338574,Dean_Park,2368397418989,Wheeler__Shelton_and_Hill,65.12,False,1.74,False,3.0,False,253.52,False,False
2019-03-05 14:38:53.338574,Dean_Park,1184469177528,Wheeler__Shelton_and_Hill,96.6,False,0.0,False,0.0,False,240.68,False,False


## Save to Iguazio Time-series Database

In [10]:
# uncomment the line below if you want to reset the TSDB table 
#client.delete(backend='tsdb', table='netops_metrics_jupyter')

In [11]:
# create a new table, need to specify estimated sample rate
client.create(backend='tsdb', table='netops_metrics_jupyter', attrs={'rate': '1/m'})

In [12]:
# write the dataframe into the time-seried DB, note the company,data_center,device indexes are automatically converted to search optimized labels
client.write(backend='tsdb', table='netops_metrics_jupyter', dfs=df)

## Verify that the data was written

In [13]:
client.read(backend='tsdb', query='select avg(cpu_utilization) as cpu_utilization, avg(latency) as latency, avg(packet_loss) as packet_loss, avg(throughput) as throughput from netops_metrics_jupyter group by company, data_center, device',
            start="now-1d", end='now+1d', multi_index=True, step='5m').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,latency,packet_loss,throughput
time,data_center,device,company,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-03-05 14:34:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,71.093333,1.08,0.111111,254.581111
2019-03-05 14:39:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,69.5232,2.2654,0.7,251.5794
2019-03-05 14:44:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,82.4496,31.3854,17.6,194.6722
2019-03-05 14:49:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,72.8818,13.5588,6.46,218.545
2019-03-05 14:54:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,83.358,49.1876,23.38,159.5008
2019-03-05 14:59:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,81.3818,35.3046,15.82,186.931
2019-03-05 15:04:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,73.2438,13.233,6.62,220.3578
2019-03-05 15:09:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,70.21,1.7898,0.78,251.9336
2019-03-05 15:14:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,72.3982,2.1098,0.6,248.3038
2019-03-05 15:19:37,Barrett_Crossing,8566530842848,Boyd-Faulkner,69.9516,1.3732,0.66,252.0036


### Save the generated dataset to parquet for future reproducability 

In [15]:
# craete directory if doesnt exist 
!mkdir data

In [16]:
import pyarrow as pa

In [None]:
#write the dataframe into a parquet (on iguazio file system)
version = '1.0'
filepath = 'data/netops_metrics.v{}.parquet'.format(version)
pa.parquet.write_table(pa.Table.from_pandas(df), filepath)

### Reading the data from parquet into the time-series DB
if we want to reproduce the same results we can rebuild the TSDB from the saved parquet file

In [16]:
# uncomment the line below if you want to reset the TSDB table 
#client.delete(backend='tsdb', table='netops_metrics_jupyter')
#client.create(backend='tsdb', table='netops_metrics_jupyter', attrs={'rate': '1/m'})

In [14]:
# read the parquet into memory and print the head 
pqdf = pa.parquet.read_table(filepath).to_pandas()
pqdf.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,cpu_utilization_is_error,latency,latency_is_error,packet_loss,packet_loss_is_error,throughput,throughput_is_error,is_error
timestamp,company,data_center,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-02-23 14:07:28.219701,Brennan-Meyer,Bender_Mountain,8056353454828,73.94,False,0.0,False,0.0,False,219.86,False,False
2019-02-23 14:07:28.219701,Brennan-Meyer,Bender_Mountain,8654196541788,73.78,False,0.0,False,1.0,False,271.04,False,False
2019-02-23 14:07:28.219701,Brennan-Meyer,Mccoy_Union,5082949640375,64.45,False,0.0,False,0.0,False,223.61,False,False
2019-02-23 14:07:28.219701,Brennan-Meyer,Mccoy_Union,3879474364771,77.6,False,0.0,False,0.0,False,260.23,False,False
2019-02-23 14:07:28.219701,Robinson__Andrews_and_Smith,Mills_Fields,1569394356868,60.26,False,0.43,False,2.0,False,276.79,False,False


In [17]:
# write the dataframe into the time-seried DB, uncomment the line below
#client.write(backend='tsdb', table='netops_metrics_jupyter', dfs=pqdf)