# Using Python Client

In [1]:
from ga_api import System, Repository, HyperLogLog
import pandas as pd
import logging
import asyncio
logging.basicConfig(
    format='▸ %(asctime)s %(levelname)s %(filename)s:%(funcName)s(%(lineno)d) - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    # level=logging.INFO
)

In [2]:
# repo = Repository('https://rdlab-214.genie-analytics.com/api', 'api', 'default', 'api123!@#', burst=4)
repo = Repository('https://192.168.11.222/api', 'api', 'default', 'api123!@#', ssl=False, burst=4)

## Adhoc Query

In [3]:
adhoc = await repo.set_adhoc(300, {
    'scope': {
        'i-field': '@controller.home',
        'i-entry': [
            [True,'t:0:0:1',0]
        ],
        'mode': '%directional'
    },
    'metric': [
        ['$sum', {
            'field':'@flow.bytes'
        }]
    ]
})

### Point Query

In [4]:
df_point = await adhoc.read_data('2022-03-11 10:00:00', 'hour', columns=['forward','opposite'])
print(df_point)

    forward    opposite
0  49406924  1461267597


In [5]:
df_point_0800 = await adhoc.read_data('2022-03-11 10:00:00+0800', 'hour', columns=['forward','opposite'])
df_point_0900 = await adhoc.read_data('2022-03-11 11:00:00+0900', 'hour', columns=['forward','opposite'])
print(df_point_0800)
print(df_point_0900)

    forward    opposite
0  49406924  1461267597
    forward    opposite
0  49406924  1461267597


### Range Query

In [6]:
dts = pd.date_range(start='2022-03-11 10:00:00', end='2022-03-11 11:00:00', freq='5T', inclusive='left')
df_range = await adhoc.read_data(dts, 'hour', columns=['timstamp','forward','opposite'])
print(df_range)

              timstamp    forward    opposite
0  2022-03-11 10:00:00   49406924  1461267597
1  2022-03-11 10:05:00   51059216   474717295
2  2022-03-11 10:10:00   65980104   399852436
3  2022-03-11 10:15:00  136451534   769131598
4  2022-03-11 10:20:00  177329937   643387464
5  2022-03-11 10:25:00  267064304   793054430
6  2022-03-11 10:30:00  157039040   619663619
7  2022-03-11 10:35:00  459774214   939979866
8  2022-03-11 10:40:00  820108989   857197053
9  2022-03-11 10:45:00  478451960   549668690
10 2022-03-11 10:50:00   79747664   427091909
11 2022-03-11 10:55:00   46992840   521694738


### Sample Query

In [7]:
dts = pd.date_range(start='2022-03-11 10:00:00', end='2022-03-27 10:00:00', freq='D')
df_range = await adhoc.read_data(dts, 'hour', columns=['timstamp','forward','opposite'])
print(df_range)

              timstamp      forward      opposite
0  2022-03-11 10:00:00   49406924.0  1.461268e+09
1  2022-03-12 10:00:00   14763142.0  4.053517e+07
2  2022-03-13 10:00:00   19757650.0  3.465118e+07
3  2022-03-14 10:00:00   68562400.0  3.158328e+08
4  2022-03-15 10:00:00   60450191.0  1.267742e+09
5  2022-03-16 10:00:00   56088660.0  3.076066e+08
6  2022-03-17 10:00:00  114197889.0  1.144716e+09
7  2022-03-18 10:00:00   66442039.0  5.955546e+08
8  2022-03-19 10:00:00   70044985.0  7.544690e+07
9  2022-03-20 10:00:00          NaN           NaN
10 2022-03-21 10:00:00   74811007.0  3.237817e+08
11 2022-03-22 10:00:00  165016596.0  5.291642e+08
12 2022-03-23 10:00:00          NaN           NaN
13 2022-03-24 10:00:00          NaN           NaN
14 2022-03-25 10:00:00          NaN           NaN
15 2022-03-26 10:00:00          NaN           NaN
16 2022-03-27 10:00:00          NaN           NaN


## Store Query

In [8]:
dset_conf = {
    'name':'example',
    'series':'hour',
    'freq':300,
    'retainDepth':86400*7,
    'retainSize':10000000,
    'run':False
}
# delete the dataset
await repo.del_dataset(100, missing_ok=True)
# create the dataset
dset = await repo.get_dataset(100, dset_conf)
# get the dataset
dset = await repo.get_dataset(100, dset_conf)
# delete the dataset
await repo.del_dataset(100, missing_ok=True)
# create the dataset
dset = await repo.set_dataset(100, dset_conf)
# update the dataset
dset = await repo.set_dataset(100, dset_conf)

In [9]:
pipe_conf = {
    'name':'example',
    'pipeline': {
        'scope': {
            'i-field': '@controller.home',
            'i-entry': [
                [True,'t:0:0:1',0]
            ],
            'mode': '%directional'
        },
        'bucket': [
            ['$distinctTuple', {
                'fields':['@flow.protocol'],
                'unwind':[False],
                'sizeLimit':256,
                'sortedBy':['$sum', {
                    'field':'@flow.bytes'
                }],
                'sortedDir':1,
                'nullOpt':False
            }]
        ],
        'metric': [
            ['$sum', {
                'field':'@flow.bytes'
            }]
        ]
    }
}
# delete the pipeline
await dset.del_pipeline(1001, missing_ok=True)
# create the pipeline
pipe = await dset.get_pipeline(1001, pipe_conf)
# get the pipeline
pipe = await dset.get_pipeline(1001, pipe_conf)
# delete the pipeline
await dset.del_pipeline(1001, missing_ok=True)
# create the pipeline
pipe = await dset.set_pipeline(1001, pipe_conf)
# updatge the pipeline
pipe = await dset.set_pipeline(1001, pipe_conf)

await asyncio.sleep(1)

In [10]:
dts = pd.date_range(start='2022-03-09 12:00:00', end='2022-03-09 18:00:00', freq='5T', inclusive='left')
# dts = pd.date_range(start='2022-03-04 00:00:00', end='2022-03-18 00:00:00', freq='5T', inclusive='left')

In [11]:
df_patch = await dset.patch_data(dts)

In [12]:
df_patch['1001'].value_counts(dropna=False)

1    72
Name: 1001, dtype: int64

In [13]:
df_poll = await dset.poll_data(dts)

In [14]:
df_poll['1001'].value_counts(dropna=False)

1    72
Name: 1001, dtype: int64

In [15]:
df_data = await pipe.read_data(dts, columns=['timestamp','protocol','forward','opposite'])

In [16]:
# should be 30
df_data[df_data.isna().any(axis=1)]

Unnamed: 0,timestamp,protocol,forward,opposite
134,2022-03-09 14:30:00,!all,,
135,2022-03-09 14:35:00,!all,,
136,2022-03-09 14:40:00,!all,,
137,2022-03-09 14:45:00,!all,,
138,2022-03-09 14:50:00,!all,,
139,2022-03-09 14:55:00,!all,,
140,2022-03-09 15:00:00,!all,,
141,2022-03-09 15:05:00,!all,,
142,2022-03-09 15:10:00,!all,,
143,2022-03-09 15:15:00,!all,,


In [17]:
df_data

Unnamed: 0,timestamp,protocol,forward,opposite
0,2022-03-09 12:00:00,!all,73327384.0,1.281103e+09
1,2022-03-09 12:00:00,6,56400693.0,1.139092e+09
2,2022-03-09 12:00:00,17,8017419.0,1.410606e+08
3,2022-03-09 12:00:00,1,4935120.0,1.751560e+05
4,2022-03-09 12:00:00,50,3974152.0,7.749200e+05
...,...,...,...,...
212,2022-03-09 17:55:00,!all,71612452.0,6.332233e+08
213,2022-03-09 17:55:00,6,38237044.0,5.335505e+08
214,2022-03-09 17:55:00,50,11737600.0,8.406992e+06
215,2022-03-09 17:55:00,17,11708783.0,9.106803e+07


## HyperLogLog

In [18]:
dts = pd.date_range(start='2022-03-11 10:00:00', end='2022-03-11 11:00:00', freq='5T', inclusive='left')
adhoc = await repo.set_adhoc(300, {
    'scope': {
        'i-field': '@controller.home',
        'i-entry': [
            [True,'t:0:0:1',0]
        ],
        'mode': '%directional'
    },
    'metric': [
        ['$distinct', {
            'fields':['@flow.addr.dst']
        }]
    ]
})
df = await adhoc.read_data(dts, 'hour', columns=['ts','forward','opposite'])
print(df)

                    ts forward opposite
0  2022-03-11 10:00:00   h:265    h:293
1  2022-03-11 10:05:00   h:265    h:337
2  2022-03-11 10:10:00   h:264    h:320
3  2022-03-11 10:15:00   h:288    h:346
4  2022-03-11 10:20:00   h:308    h:450
5  2022-03-11 10:25:00   h:316    h:449
6  2022-03-11 10:30:00   h:338    h:434
7  2022-03-11 10:35:00   h:335    h:391
8  2022-03-11 10:40:00   h:278    h:365
9  2022-03-11 10:45:00   h:313    h:380
10 2022-03-11 10:50:00   h:281    h:372
11 2022-03-11 10:55:00   h:263    h:367


In [19]:
df2 = df.set_index('ts').sum()
df2

forward     h:1558
opposite    h:2501
dtype: object

In [20]:
df2.apply(HyperLogLog.value)

forward     1558
opposite    2501
dtype: int64

In [21]:
await repo.close()