# BULKI_Store Demo

## Starting Server

In [70]:
%env RUST_LOG=debug
print("Let's start")

env: RUST_LOG=debug
env: RUST_LOG=debug
Let's start


In [71]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [1]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[0.52247269 0.77778029 0.15351166 0.89299104 0.1023923  0.52751194
     0.85338196 0.95815383 0.49929548 0.61646084 0.82078631 0.15420129
     0.89174536 0.14580323 0.91852354]
    [0.94034599 0.83270734 0.15984929 0.50767306 0.22507279 0.82199182
     0.38865491 0.10077313 0.56038047 0.00373694 0.96652496 0.91124554
     0.51997611 0.44493788 0.67523469]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.99874613 0.53077038 0.02678497 0.04152164 0.64095635 0.3746379
     0.44449203 0.79953359 0.73165791 0.60451205 0.78808039 0.06174522
     0.9382103  0.54597625 0.97281119]
    [0.80957214 0.63637043 0.00824206 0.12996641 0.55501843 0.78909795
     0.56508667 0.68657011 0.35606286 0.7292414  0.76355382 0.97528064
     0.08803698 0.47605655 0.27814141]]]]]
arr3.shape= (15, 15, 15)
arr3 last 2 dim preview:
[[[0.99487847 0.05262452 0.85029812 0.15565602 0.14166324 0.16234166
   0.2

## Client-Side NDArray Arithmetic Operation

In [2]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[1.52121883 1.30855066 0.18029662 0.93451268 0.74334865 0.90214984
     1.29787399 1.75768742 1.2309534  1.22097289 1.60886669 0.21594651
     1.82995566 0.69177948 1.89133473]
    [1.74991813 1.46907776 0.16809135 0.63763947 0.78009122 1.61108977
     0.95374158 0.78734324 0.91644333 0.73297834 1.73007879 1.88652617
     0.60801309 0.92099444 0.95337609]]]]]


## Remote Array Slicing

In [3]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[1.22097289 1.75768742 0.90214984 0.93451268 1.30855066]
    [0.73297834 0.78734324 1.61108977 0.63763947 1.46907776]]]]]


## Remote Array Arithmetic Operation

In [4]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[2.44194578 3.51537485 1.80429968 1.86902536 2.61710132]
    [1.46595668 1.57468647 3.22217954 1.27527895 2.93815552]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [5]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": f"container{name_id}",
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32126597793653240402443980249552359, 32126597793656929751258726454842855, 32126597793657261792652057521739239, 32126597793657483153580946331325927, 32126597793670488108152915860182503, 32126597793670746362569952088872423, 32126597793674214350455813779543527]


In [6]:
print(f"{name_id}")

478702


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [7]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32126597793653240402443980249552359, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'keys': ['478702/arr6', '478702/arr5', '478702/arr4', '478702/arr3', '478702/gnn_arr1', '478702/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'name': 'container478702'}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1127.8291158880083 MetaPS: 1127.827902816999


In [8]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id=f"container{name_id}",
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32126597793653240402443980249552359, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'keys': ['478702/arr6', '478702/arr5', '478702/arr4', '478702/arr3', '478702/gnn_arr1', '478702/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'name': 'container478702'}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32126597793653240402443980249552359, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1106.8756963654146 MetaPS: 1106.8742358463503


### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [9]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'keys': ['478702/arr6',
                       '478702/arr5',
                       '478702/arr4',
                       '478702/arr3',
                       '478702/gnn_arr1',
                       '478702/gnn_arr2'],
              'name': 'container478702',
              'part_num': 1,
              'part_size': 100,
              'ranges': [(1, 100), (100, 200)],
              'type': 'container'},
 'obj_id': 32126597793653240402443980249552359,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=977.1980353079815, MetaPS: 5863.175917709007 


### Retrieving single attributes from a single sub-object

In [11]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id=obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32126597793656929751258726454842855, 'metadata': {'name': '478702/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32126597793656929751258726454842855, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32126597793656929751258726454842855, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32126597793656929751258726454842855, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32126597793656929751258726454842855, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32126597793656929751258726454842855, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=1085.5489842415282, MetaPS: 1085.5475794612928


### Retrieving multiple attributes from single sub-objects

In [12]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '478702/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32126597793656929751258726454842855,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=1019.2867939751247, MetaPS: 6115.708874085408


### Retrieving multiple attributes from multiple sub-objects

In [13]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32126597793653240402443980249552359,
 'sub_obj_metadata': [{'metadata': {'name': '478702/arr5',
                                    'shape': [1, 1, 1, 2, 5],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '478702/arr5',
                       'obj_id': 32126597793657261792652057521739239},
                      {'metadata': {'name': '478702/arr4',
                                    'shape': [15, 15, 15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '478702/arr4',
                       'obj_id': 32126597793657483153580946331325927},
                      {'metadata': {'name': '47870

### Retrieving different set of attributes from multiple sub-objects

In [14]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32126597793653240402443980249552359,
 'sub_obj_metadata': [{'metadata': {'name': '478702/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '478702/arr5',
                       'obj_id': 32126597793657261792652057521739239},
                      {'metadata': {'type': 'array', 'vcount': 100},
                       'name': '478702/arr4',
                       'obj_id': 32126597793657483153580946331325927}]}

 Multiple Metadata retrieval of a single sub-object : TPS=934.9972134911612, MetaPS: 4674.981898854724


### Retrieving array of a single object

In [15]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.05262452, 0.99487847]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=1021.222056602864, Sample/s = 170.2033031331613


In [16]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[
            slice(12, 14, None),
            slice(None),
            slice(None),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.73453096, 0.49581507, 0.49099952, 0.60325526, 0.98618851,
         0.22849824, 0.5369206 , 0.28975978, 0.70019963, 0.80884148,
         0.75349571, 0.41469711, 0.71519392, 0.42129945, 0.10149537],
        [0.26944786, 0.10163012, 0.86232499, 0.52206205, 0.51166273,
         0.67984511, 0.78207069, 0.03079485, 0.3524176 , 0.46469743,
         0.05586642, 0.13796977, 0.99987251, 0.0182884 , 0.87519502],
        [0.45098876, 0.76757477, 0.61886736, 0.58086595, 0.86339937,
         0.81208596, 0.00588362, 0.47605789, 0.64390782, 0.10896188,
         0.28704601, 0.17008067, 0.20378963, 0.05398073, 0.40485594],
        [0.24175825, 0.72900987, 0.63238041, 0.22752917, 0.92276958,
         0.36885422, 0.50844638, 0.08893148, 0.39682884, 0.68685677,
         0.75789392, 0.71933951, 0.25980483, 0.18086439, 0.26584329],
        [0.95620973, 0.51787195, 0.10877435, 0.01425939, 0.80520905,
         0.15861455, 0.8612252 , 0.9625342 , 0.54989099, 0.87276975,
         0.053

### Retrieving array slices of multiple sub-objects (Large subarray included)

In [17]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.05262452, 0.99487847],
        [0.34973204, 0.80914095]]]),
 'sub_obj_slices': [{'array': array([[[[[1.75768742, 1.22097289],
          [0.78734324, 0.73297834]]]]]),
                     'id': 32126597793657261792652057521739239,
                     'name': '478702/arr5'},
                    {'array': array([[[[[1.25684066, 1.05792264]]]],



       [[[[1.30855066, 1.52121883]]]]]),
                     'id': 32126597793657483153580946331325927,
                     'name': '478702/arr4'},
                    {'array': array([[[0.86156386, 0.0232702 , 0.16234166, 0.15565602, 0.05262452],
        [0.99027278, 0.99246459, 0.40359868, 0.25062981, 0.34973204]]]),
                     'id': 32126597793670488108152915860182503,
                     'name': '478702/arr3'},
                    {'array': array([[[[[3.51537485, 2.44194578],
          [1.57468647, 1.46595668]]]]]),
                     'id': 32126597793656929751258726454842855,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [18]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20), slice(0, 1)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 10, -1),
                    slice(0, 1),
                ],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 15, -1),
                    slice(0, 2),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([[0.83306967],
       [0.21245617],
       [0.64915295],
       [0.56476719],
       [0.23132219],
       [0.41442525],
       [0.67569316],
       [0.22597701],
       [0.41579258],
       [0.33910983],
       [0.44122641],
       [0.2803136 ],
       [0.11697678],
       [0.36531879],
       [0.28510001],
       [0.57190317],
       [0.50399514],
       [0.92617762],
       [0.74360922],
       [0.64265   ]]),
                     'id': 32126597793670746362569952088872423,
                     'name': '478702/gnn_arr1'},
                    {'array': array([[0.33910983],
       [0.41579258],
       [0.22597701],
       [0.67569316],
       [0.41442525],
       [0.23132219],
       [0.56476719],
       [0.64915295],
       [0.21245617],
       [0.83306967]]),
                     'id': 32126597793670746362569952088872423,
                     'name': '478702/gnn_arr1'},
                    {'array': array([[0.28510001, 0.184792

## Closing the Store

In [89]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")