# Test Python Client

In [1]:
from src.ga_api_package.ga_api_client import System, Repository, HyperLogLog, pprint
import logging
import pandas as pd
logging.basicConfig(
    format='▸ %(asctime)s %(levelname)s %(filename)s:%(funcName)s(%(lineno)d) - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    # level=logging.INFO
)

## System

In [2]:
system = System('https://rdlab-214.genie-analytics.com:8443/api', 'api', 'api123!@#')

In [3]:
system_desc = await system.get_system()
pprint(system_desc)

{
    "config":{
        "burst":1,
        "retry":1,
        "maxTasks":65536
    },
    "status":{
        "created":"2022-02-17T10:18:06.714Z",
        "modified":"2022-03-17T08:30:14.982Z",
        "size":637,
        "maxId":36
    }
}

In [4]:
system_conf = await system.set_system(system_desc['config'])
pprint(system_conf)

{
    "config":{
        "burst":1,
        "retry":1,
        "maxTasks":65536
    },
    "status":{
        "created":"2022-02-17T10:18:06.714Z",
        "modified":"2022-03-17T08:33:57.144Z",
        "size":637,
        "maxId":36
    }
}

In [5]:
pprint(await system.get_all_tenants())

[
    {
        "_id":0,
        "config":{
            "limit":10000000000
        },
        "status":{
            "created":"2022-03-14T17:46:28.860Z",
            "modified":"2022-03-17T08:30:15.087Z",
            "size":0
        }
    }
]

In [6]:
tenant_desc = await system.create_tenant(0, {
    "limit":10000000000
})
pprint(tenant_desc)

{
    "_id":0,
    "config":{
        "limit":10000000000
    },
    "status":{
        "created":"2022-03-14T17:46:28.860Z",
        "modified":"2022-03-17T08:30:15.087Z",
        "size":0
    }
}

In [7]:
tenant_conf = await system.update_tenant(0, tenant_desc['config'])
pprint(tenant_conf)

{
    "_id":0,
    "config":{
        "limit":10000000000
    },
    "status":{
        "created":"2022-03-14T17:46:28.860Z",
        "modified":"2022-03-17T08:33:57.322Z",
        "size":0
    }
}

In [8]:
tenant_conf = await system.get_tenant(0)
pprint(tenant_conf)

{
    "_id":0,
    "config":{
        "limit":10000000000
    },
    "status":{
        "created":"2022-03-14T17:46:28.860Z",
        "modified":"2022-03-17T08:33:57.322Z",
        "size":0
    }
}

In [9]:
# await system.delete_tenant(0)

## Repository

In [10]:
repo = Repository('https://rdlab-214.genie-analytics.com/api', 'api@default', 'api123!@#')

## Using Dataset

In [11]:
# delete all datasets to have a clean environment
# await repo.del_all_datasets()

In [12]:
dset_id = 13579
dset_conf = {
    'name':'example2',
    'series':'hour',
    'freq':300,
    'retainDepth':86400*7,
    'retainSize':10000000,
    'run':False
}
# create dataset
dset = await repo.set_dataset(dset_id, dset_conf)
# update dataset
dset = await repo.set_dataset(dset_id, dset_conf)
# get dataset
dset = await repo.get_dataset(dset_id)

In [13]:
pipe_conf = {
    'name':'example',
    'pipeline': {
        'scope': {
            'i-field': '@controller.home',
            'i-entry': [
                [True,'t:0:0:1',0]
            ],
            'mode': '%directional'
        },
        'metric': [
            ['$sum', {
                'field':'@flow.bytes'
            }]
        ]
    }
}
# create pipeline
pipe = await dset.set_pipeline(1001, pipe_conf)
# update pipeline
pipe = await dset.set_pipeline(1001, pipe_conf)
# get pipeline
pipe = await dset.get_pipeline(1001)

In [14]:
# patch dataset data
dts = pd.date_range('2022-03-11 12:00:00+08:00', end='2022-03-11 13:00:00+08:00', freq='5T')
print(await dset.patch_data(dts))

                           1001
timestamp                      
2022-03-11 12:00:00+08:00     1
2022-03-11 12:05:00+08:00     1
2022-03-11 12:10:00+08:00     1
2022-03-11 12:15:00+08:00     1
2022-03-11 12:20:00+08:00     1
2022-03-11 12:25:00+08:00     1
2022-03-11 12:30:00+08:00     1
2022-03-11 12:35:00+08:00     1
2022-03-11 12:40:00+08:00     1
2022-03-11 12:45:00+08:00     1
2022-03-11 12:50:00+08:00     1
2022-03-11 12:55:00+08:00     1
2022-03-11 13:00:00+08:00     1


In [15]:
# poll dataset data
print(await dset.poll_data(dts))

                           1001
timestamp                      
2022-03-11 12:00:00+08:00     1
2022-03-11 12:05:00+08:00     1
2022-03-11 12:10:00+08:00     1
2022-03-11 12:15:00+08:00     1
2022-03-11 12:20:00+08:00     1
2022-03-11 12:25:00+08:00     1
2022-03-11 12:30:00+08:00     1
2022-03-11 12:35:00+08:00     1
2022-03-11 12:40:00+08:00     1
2022-03-11 12:45:00+08:00     1
2022-03-11 12:50:00+08:00     1
2022-03-11 12:55:00+08:00     1
2022-03-11 13:00:00+08:00     1


In [16]:
# read dataset data
df = await pipe.read_data(dts, columns=['ts','forward','opposite'])
print(df)

                          ts   forward   opposite
0  2022-03-11 12:00:00+08:00  58809744  349717483
1  2022-03-11 12:05:00+08:00  44572192  562492493
2  2022-03-11 12:10:00+08:00  49195654  620673105
3  2022-03-11 12:15:00+08:00  46527559  845228917
4  2022-03-11 12:20:00+08:00  34982007  614685028
5  2022-03-11 12:25:00+08:00  57100159  911658581
6  2022-03-11 12:30:00+08:00  31518823  625671660
7  2022-03-11 12:35:00+08:00  37538293  508386394
8  2022-03-11 12:40:00+08:00  46701163  831596278
9  2022-03-11 12:45:00+08:00  47010361  428794089
10 2022-03-11 12:50:00+08:00  47534735  696814504
11 2022-03-11 12:55:00+08:00  38657516  474305205
12 2022-03-11 13:00:00+08:00  43047044  689049150


In [17]:
# delete pipeline
await dset.del_pipeline(1001)

In [18]:
# delete all pipelines
await dset.del_all_pipelines()

In [19]:
# delete dataset
await repo.del_dataset(dset_id)

In [20]:
# delete all datasets
# await repo.del_all_datasets()

### HyperLogLog

In [21]:
adhoc = await repo.set_adhoc(300, {
    'scope': {
        'i-field': '@controller.home',
        'i-entry': [
            [True,'t:0:0:1',0]
        ],
        'mode': '%directional'
    },
    'metric': [
        ['$distinct', {
            'fields':['@flow.addr.dst']
        }]
    ]
})
dts = pd.date_range('2022-03-11 12:00:00+08:00', end='2022-03-11 13:00:00+08:00', freq='5T')
df = await adhoc.read_data(dts, 'hour', columns=['ts','forward','opposite'])
print(df)

                          ts        forward       opposite
0  2022-03-11 12:00:00+08:00  <HyperLogLog>  <HyperLogLog>
1  2022-03-11 12:05:00+08:00  <HyperLogLog>  <HyperLogLog>
2  2022-03-11 12:10:00+08:00  <HyperLogLog>  <HyperLogLog>
3  2022-03-11 12:15:00+08:00  <HyperLogLog>  <HyperLogLog>
4  2022-03-11 12:20:00+08:00  <HyperLogLog>  <HyperLogLog>
5  2022-03-11 12:25:00+08:00  <HyperLogLog>  <HyperLogLog>
6  2022-03-11 12:30:00+08:00  <HyperLogLog>  <HyperLogLog>
7  2022-03-11 12:35:00+08:00  <HyperLogLog>  <HyperLogLog>
8  2022-03-11 12:40:00+08:00  <HyperLogLog>  <HyperLogLog>
9  2022-03-11 12:45:00+08:00  <HyperLogLog>  <HyperLogLog>
10 2022-03-11 12:50:00+08:00  <HyperLogLog>  <HyperLogLog>
11 2022-03-11 12:55:00+08:00  <HyperLogLog>  <HyperLogLog>
12 2022-03-11 13:00:00+08:00  <HyperLogLog>  <HyperLogLog>


In [22]:
df2 = df.set_index('ts').sum()
df2

forward     <HyperLogLog>
opposite    <HyperLogLog>
dtype: object

In [23]:
df2.apply(HyperLogLog.value)

forward     1056
opposite    1114
dtype: int64

In [24]:
await repo.close()