# Using Python Client

In [4]:
from ga_api import System, Repository, HyperLogLog, FieldTable as ftbl, pprint
import pandas as pd
import logging
import asyncio
logging.basicConfig(
    format='▸ %(asctime)s %(levelname)s %(filename)s:%(funcName)s(%(lineno)d) - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO
)

In [5]:
repo = Repository('https://163.13.240.232/api', 'root', 'default', 'genieanlys', ssl=False, burst=4, retry=1)
# repo = Repository('https://192.168.11.222/api', 'api', 'default', 'api123!@#', ssl=False, burst=4)

In [6]:
await repo.show_datasets()

▸ 2022-06-03 18:21:55 INFO ga_api_client.py:_authenticate(106) - bringup
▸ 2022-06-03 18:21:56 INFO ga_api_client.py:_authenticate(149) - authenticate done
▸ 2022-06-03 18:21:56 INFO ga_api_client.py:get_all_datasets(355) - status=200 rlt=[{'_id': 1, 'tenant': 0, 'config': {'name': 'performance', 'series': 'full', 'freq': 300, 'retainDepth': 2592000, 'retainSize': 10000000000, 'run': True}, 'status': {'created': '2022-05-27T05:30:55.552Z', 'modified': '2022-06-02T10:40:28.854Z', 'nextTick': '2022-06-03T10:20:00.000Z', 'oldest': '2022-05-23T07:05:00.000Z', 'size': 53593637}}, {'_id': 999, 'tenant': 0, 'config': {'name': 'test1', 'series': 'full', 'freq': 60, 'retainDepth': 360000, 'retainSize': 1000000000, 'run': False}, 'status': {'created': '2022-05-31T07:37:09.606Z', 'modified': '2022-05-31T07:37:09.606Z', 'nextTick': None, 'oldest': '2022-05-31T07:36:00.000Z', 'size': 116}}]


Unnamed: 0,name,series,freq,retainDepth,retainSize,run,created,modified,nextTick,oldest,size
1,performance,full,300,2592000,10000000000,True,2022-05-27T05:30:55.552Z,2022-06-02T10:40:28.854Z,2022-06-03T10:20:00.000Z,2022-05-23T07:05:00.000Z,53593637
999,test1,full,60,360000,1000000000,False,2022-05-31T07:37:09.606Z,2022-05-31T07:37:09.606Z,,2022-05-31T07:36:00.000Z,116


In [9]:
dset = await repo.get_dataset(1)

▸ 2022-06-03 18:22:49 INFO ga_api_client.py:get_dataset(376) - status=200 dsid=1


In [10]:
await dset.show_pipelines()

▸ 2022-06-03 18:23:00 INFO ga_api_client.py:get_all_pipelines(404) - status=200 dsid=1


Unnamed: 0,name,created,modified,oldest,size
100,top_ott,2022-05-30T07:05:32.109Z,2022-06-02T10:40:28.924Z,2022-05-23T07:05:00.000Z,15202359
101,top_cdn,2022-05-30T07:05:32.142Z,2022-06-02T10:40:29.031Z,2022-05-23T07:05:00.000Z,23541891


In [12]:
await dset.del_pipeline(101)

▸ 2022-06-03 18:23:16 INFO ga_api_client.py:delete_pipeline(447) - status=204 dsid=1 plid=101


## Adhoc Query

In [None]:
adhoc = await repo.set_adhoc(300, {
    'scope': {
        'i-field': '@controller.home',
        'i-entry': [
            [True,'t:0:0:1',0]
        ],
        'mode': '%directional'
    },
    'metric': [
        ['$sum', {
            'field':'@flow.bytes'
        }]
    ]
})

### Point Query

In [None]:
df_point = await adhoc.read_data('2022-03-11 10:00:00', 'hour', columns=['forward','opposite'])
print(df_point)

In [None]:
df_point_0800 = await adhoc.read_data('2022-03-11 10:00:00+0800', 'hour', columns=['forward','opposite'])
df_point_0900 = await adhoc.read_data('2022-03-11 11:00:00+0900', 'hour', columns=['forward','opposite'])
print(df_point_0800)
print(df_point_0900)

### Range Query

In [None]:
dts = pd.date_range(start='2022-03-11 10:00:00', end='2022-03-11 11:00:00', freq='5T', inclusive='left')
df_range = await adhoc.read_data(dts, 'hour', columns=['timstamp','forward','opposite'])
print(df_range)

### Sample Query

In [None]:
dts = pd.date_range(start='2022-03-11 10:00:00', end='2022-03-27 10:00:00', freq='D')
df_range = await adhoc.read_data(dts, 'hour', columns=['timstamp','forward','opposite'])
print(df_range)

## Store Query

In [None]:
dset_conf = {
    'name':'example',
    'series':'hour',
    'freq':300,
    'retainDepth':86400*7,
    'retainSize':10000000,
    'run':False
}
# delete the dataset
await repo.del_dataset(100, missing_ok=True)
# create the dataset
dset = await repo.get_dataset(100, dset_conf)
# get the dataset
dset = await repo.get_dataset(100, dset_conf)
# delete the dataset
await repo.del_dataset(100, missing_ok=True)
# create the dataset
dset = await repo.set_dataset(100, dset_conf)
# update the dataset
dset = await repo.set_dataset(100, dset_conf)

In [None]:
pipe_conf = {
    'name':'example',
    'pipeline': {
        'scope': {
            'i-field': '@controller.home',
            'i-entry': [
                [True,'t:0:0:1',0]
            ],
            'mode': '%directional'
        },
        'bucket': [
            ['$distinctTuple', {
                'fields':['@flow.protocol'],
                'unwind':[False],
                'sizeLimit':256,
                'sortedBy':['$sum', {
                    'field':'@flow.bytes'
                }],
                'sortedDir':1,
                'nullOpt':False
            }]
        ],
        'metric': [
            ['$sum', {
                'field':'@flow.bytes'
            }]
        ]
    }
}
# delete the pipeline
await dset.del_pipeline(1001, missing_ok=True)
# create the pipeline
pipe = await dset.get_pipeline(1001, pipe_conf)
# get the pipeline
pipe = await dset.get_pipeline(1001, pipe_conf)
# delete the pipeline
await dset.del_pipeline(1001, missing_ok=True)
# create the pipeline
pipe = await dset.set_pipeline(1001, pipe_conf)
# updatge the pipeline
pipe = await dset.set_pipeline(1001, pipe_conf)

await asyncio.sleep(1)

In [None]:
dts = pd.date_range(start='2022-03-04 00:00:00', end='2022-03-10 00:00:00', freq='5T', inclusive='left')
# dts = pd.date_range(start='2022-03-09 12:00:00', end='2022-03-09 18:00:00', freq='5T', inclusive='left')
# dts = pd.date_range(start='2022-03-04 00:00:00', end='2022-03-18 00:00:00', freq='5T', inclusive='left')

In [None]:
df_patch = await dset.patch_data(dts)

In [None]:
df_patch['1001'].value_counts(dropna=False)

In [None]:
df_poll = await dset.poll_data(dts)

In [None]:
df_poll['1001'].value_counts(dropna=False)

In [None]:
df_data = await pipe.read_data(dts, columns=['timestamp','protocol','forward','opposite'])

In [None]:
df_patch[df_patch['1001'] == 0]

In [None]:
# should be 30
df_data[df_data.isna().any(axis=1)]

In [None]:
df_data[df_data.protocol=='!all']

## HyperLogLog

In [None]:
dts = pd.date_range(start='2022-03-11 10:00:00', end='2022-03-11 11:00:00', freq='5T', inclusive='left')
adhoc = await repo.set_adhoc(300, {
    'scope': {
        'i-field': '@controller.home',
        'i-entry': [
            [True,'t:0:0:1',0]
        ],
        'mode': '%directional'
    },
    'metric': [
        ['$distinct', {
            'fields':['@flow.addr.dst']
        }]
    ]
})
df = await adhoc.read_data(dts, 'hour', columns=['ts','forward','opposite'])
print(df)

In [None]:
df2 = df.set_index('ts').sum()
df2

In [None]:
df2.apply(HyperLogLog.value)

In [None]:
await repo.close()