# BULKI_Store Demo

## Starting Server

In [50]:

%env RUST_LOG=debug
print("Let's start")

env: RUST_LOG=debug
Let's start


In [51]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [52]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[0.85145864 0.00792319 0.95261065 0.61995173 0.03597683 0.75336802
     0.83681386 0.14653973 0.79083424 0.10668189 0.70232073 0.46730604
     0.4576554  0.48172623 0.63273956]
    [0.7660509  0.49724709 0.08379918 0.65618082 0.1593293  0.91335306
     0.25145069 0.25613072 0.32150132 0.27294604 0.5855969  0.91566433
     0.64381021 0.12548458 0.856787  ]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.82130492 0.72050228 0.51056194 0.96958949 0.36870883 0.97647234
     0.82174002 0.82823948 0.53608928 0.20676045 0.62776169 0.30974611
     0.94277592 0.52523779 0.96963767]
    [0.14220489 0.70166777 0.58417155 0.29077307 0.54502693 0.04387415
     0.49914566 0.03459516 0.69316477 0.88370183 0.80681569 0.10982847
     0.69772312 0.38980817 0.8571029 ]]]]]
arr3.shape= (15, 15, 15)
arr3 last 2 dim preview:
[[[0.21500062 0.38815759 0.78434518 0.42522725 0.2000731  0.98673409
   0.

## Client-Side NDArray Arithmetic Operation

In [53]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[1.67276356 0.72842547 1.46317259 1.58954122 0.40468566 1.72984036
     1.65855387 0.97477921 1.32692352 0.31344234 1.33008242 0.77705215
     1.40043132 1.00696402 1.60237723]
    [0.90825579 1.19891486 0.66797073 0.94695388 0.70435623 0.9572272
     0.75059636 0.29072587 1.01466608 1.15664786 1.39241259 1.0254928
     1.34153333 0.51529275 1.7138899 ]]]]]


## Remote Array Slicing

In [54]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[0.31344234 0.97477921 1.72984036 1.58954122 0.72842547]
    [1.15664786 0.29072587 0.9572272  0.94695388 1.19891486]]]]]


## Remote Array Arithmetic Operation

In [55]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[0.62688468 1.94955841 3.45968071 3.17908244 1.45685094]
    [2.31329573 0.58145174 1.91445441 1.89390777 2.39782972]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [56]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": "container",
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32120020439695301487090392415755557, 32120020439695412167554838968032549, 32120020439695670421971875196722469, 32120020439695817995924469168102693, 32120020439704856900520591143361829, 32120020439705170495169848500706597, 32120020439707052063065371169938725]


In [57]:
print(f"{name_id}")

870785


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [58]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32120020439695301487090392415755557, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'keys': ['870785/arr6', '870785/arr5', '870785/arr4', '870785/arr3', '870785/gnn_arr1', '870785/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'name': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1323.148916651761 MetaPS: 1323.1472470342546


In [59]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id="container",
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32120020439695301487090392415755557, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'keys': ['870785/arr6', '870785/arr5', '870785/arr4', '870785/arr3', '870785/gnn_arr1', '870785/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'name': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695301487090392415755557, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1265.5455602605255 MetaPS: 1265.5425054477143


### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [60]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'keys': ['870785/arr6',
                       '870785/arr5',
                       '870785/arr4',
                       '870785/arr3',
                       '870785/gnn_arr1',
                       '870785/gnn_arr2'],
              'name': 'container',
              'part_num': 1,
              'part_size': 100,
              'ranges': [(1, 100), (100, 200)],
              'type': 'container'},
 'obj_id': 32120020439695301487090392415755557,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=1203.0017220550392, MetaPS: 7217.993770328177 


### Retrieving single attributes from a single sub-object

In [61]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id=obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32120020439695412167554838968032549, 'metadata': {'name': '870785/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695412167554838968032549, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695412167554838968032549, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695412167554838968032549, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695412167554838968032549, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32120020439695412167554838968032549, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=1366.3425231722258, MetaPS: 1366.3402976672912


### Retrieving multiple attributes from single sub-objects

In [62]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '870785/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32120020439695412167554838968032549,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=1269.7737641492013, MetaPS: 7618.631052662649


### Retrieving multiple attributes from multiple sub-objects

In [63]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32120020439695301487090392415755557,
 'sub_obj_metadata': [{'metadata': {'name': '870785/arr3',
                                    'shape': [15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '870785/arr3',
                       'obj_id': 32120020439704856900520591143361829},
                      {'metadata': {'name': '870785/gnn_arr2',
                                    'shape': [1, 100000],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '870785/gnn_arr2',
                       'obj_id': 32120020439707052063065371169938725},
                      {'metadata': {'name': '870785/ar

### Retrieving different set of attributes from multiple sub-objects

In [64]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32120020439695301487090392415755557,
 'sub_obj_metadata': [{'metadata': {'name': '870785/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '870785/arr5',
                       'obj_id': 32120020439695670421971875196722469},
                      {'metadata': {'type': 'array', 'vcount': 100},
                       'name': '870785/arr4',
                       'obj_id': 32120020439695817995924469168102693}]}

 Multiple Metadata retrieval of a single sub-object : TPS=1164.6894206159523, MetaPS: 5823.440634783997


### Retrieving array of a single object

In [65]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.38815759, 0.21500062]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=976.0848615649411, Sample/s = 162.6806588267564


In [66]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[
            slice(12, 14, None),
            slice(None),
            slice(None),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.13971352, 0.344912  , 0.70589283, 0.12456286, 0.31616676,
         0.14178969, 0.83472996, 0.10697274, 0.63169268, 0.5869323 ,
         0.30328417, 0.1337475 , 0.22274836, 0.65543588, 0.7808997 ],
        [0.86444773, 0.11851404, 0.86942868, 0.39203953, 0.08136141,
         0.21682425, 0.28476913, 0.86307273, 0.79602988, 0.33951306,
         0.86460166, 0.00413574, 0.45673864, 0.76562139, 0.59876452],
        [0.09699953, 0.78196393, 0.39856955, 0.36148485, 0.59131208,
         0.31240976, 0.03668813, 0.84166418, 0.24085435, 0.73096499,
         0.58172256, 0.87425856, 0.34585079, 0.96079691, 0.9433824 ],
        [0.71177256, 0.80332992, 0.14899462, 0.31178127, 0.35485289,
         0.85309387, 0.2107575 , 0.18355561, 0.40310336, 0.0342445 ,
         0.66580666, 0.21719523, 0.09868609, 0.0798657 , 0.3653543 ],
        [0.9182919 , 0.77512651, 0.28366292, 0.7951347 , 0.24686877,
         0.00644946, 0.56700381, 0.4060698 , 0.18023441, 0.75260158,
         0.076

### Retrieving array slices of multiple sub-objects (Large subarray included)

In [67]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.38815759, 0.21500062],
        [0.51290228, 0.34568329]]]),
 'sub_obj_slices': [{'array': array([[[[[0.97477921, 0.31344234],
          [0.29072587, 1.15664786]]]]]),
                     'id': 32120020439695670421971875196722469,
                     'name': '870785/arr5'},
                    {'array': array([[[[[0.8097401 , 1.49375079]]]],



       [[[[0.72842547, 1.67276356]]]]]),
                     'id': 32120020439695817995924469168102693,
                     'name': '870785/arr4'},
                    {'array': array([[[0.88765268, 0.54672274, 0.98673409, 0.42522725, 0.38815759],
        [0.20896375, 0.81137161, 0.21298959, 0.06669897, 0.51290228]]]),
                     'id': 32120020439704856900520591143361829,
                     'name': '870785/arr3'},
                    {'array': array([[[[[1.94955841, 0.62688468],
          [0.58145174, 2.31329573]]]]]),
                     'id': 32120020439695412167554838968032549,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [68]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20), slice(0, 1)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 10, -1),
                    slice(0, 1),
                ],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 15, -1),
                    slice(0, 2),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([[0.16624375],
       [0.09193581],
       [0.79017957],
       [0.19551637],
       [0.259979  ],
       [0.28815188],
       [0.81531725],
       [0.63885858],
       [0.72362413],
       [0.10604527],
       [0.95976747],
       [0.83775038],
       [0.73223697],
       [0.50949976],
       [0.91839263],
       [0.94956843],
       [0.21504789],
       [0.06508718],
       [0.85484235],
       [0.75350255]]),
                     'id': 32120020439705170495169848500706597,
                     'name': '870785/gnn_arr1'},
                    {'array': array([[0.10604527],
       [0.72362413],
       [0.63885858],
       [0.81531725],
       [0.28815188],
       [0.259979  ],
       [0.19551637],
       [0.79017957],
       [0.09193581],
       [0.16624375]]),
                     'id': 32120020439705170495169848500706597,
                     'name': '870785/gnn_arr1'},
                    {'array': array([[0.91839263, 0.483419

## Closing the Store

In [69]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")