# BULKI_Store Demo

## Starting Server

In [1]:
%env RUST_LOG=debug
print("Let's start")

env: RUST_LOG=debug
Let's start


In [2]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [1]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[0.33168551 0.38372746 0.38529249 0.5754967  0.24854266 0.78258965
     0.77356204 0.51292624 0.28064838 0.24478047 0.141662   0.49623714
     0.29594246 0.74721916 0.51458899]
    [0.76216071 0.06886609 0.33818505 0.41312753 0.69118031 0.93764693
     0.20441648 0.75845817 0.05743331 0.91470635 0.70862228 0.46495651
     0.15816719 0.17998776 0.36267449]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.52919906 0.45883964 0.95779896 0.01486246 0.77713847 0.22163869
     0.79370926 0.958674   0.56354816 0.9326263  0.99194317 0.65700987
     0.92162703 0.53668223 0.82818654]
    [0.31005936 0.415445   0.80921334 0.73405374 0.60993311 0.78179974
     0.79527659 0.50806788 0.89546378 0.48111974 0.27841573 0.68401424
     0.50705913 0.00539876 0.8206666 ]]]]]
arr3.shape= (15, 15, 15)
arr3 last 2 dim preview:
[[[0.84387444 0.92677522 0.79622826 0.25320828 0.31133278 0.31094818
   0.

## Client-Side NDArray Arithmetic Operation

In [2]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[0.86088457 0.8425671  1.34309145 0.59035915 1.02568113 1.00422835
     1.5672713  1.47160024 0.84419654 1.17740676 1.13360516 1.15324701
     1.2175695  1.28390139 1.34277552]
    [1.07222007 0.48431109 1.14739838 1.14718127 1.30111342 1.71944667
     0.99969307 1.26652604 0.95289709 1.39582609 0.987038   1.14897075
     0.66522632 0.18538653 1.1833411 ]]]]]


## Remote Array Slicing

In [3]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[1.17740676 1.47160024 1.00422835 0.59035915 0.8425671 ]
    [1.39582609 1.26652604 1.71944667 1.14718127 0.48431109]]]]]


## Remote Array Arithmetic Operation

In [4]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[2.35481353 2.94320048 2.0084567  1.18071831 1.6851342 ]
    [2.79165217 2.53305209 3.43889335 2.29436254 0.96862219]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [5]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": f"container{name_id}",
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32241867340011452816671079505559450, 32241867340012135346201811053936538, 32241867340012172239689962768007066, 32241867340012209133178114482077594, 32241867340042812281596402923175834, 32241867340042922962060849475452826, 32241867340046427843434858585227162]


In [6]:
print(f"{name_id}")

175794


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [7]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32241867340011452816671079505559450, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'keys': ['175794/arr6', '175794/arr5', '175794/arr4', '175794/arr3', '175794/gnn_arr1', '175794/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'name': 'container175794'}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 6282.792029790919 MetaPS: 6282.75438517653


In [8]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id=f"container{name_id}",
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32241867340011452816671079505559450, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'keys': ['175794/arr6', '175794/arr5', '175794/arr4', '175794/arr3', '175794/gnn_arr1', '175794/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'name': 'container175794'}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32241867340011452816671079505559450, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 7413.273286754816 MetaPS: 7413.220876407553


### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [9]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'keys': ['175794/arr6',
                       '175794/arr5',
                       '175794/arr4',
                       '175794/arr3',
                       '175794/gnn_arr1',
                       '175794/gnn_arr2'],
              'name': 'container175794',
              'part_num': 1,
              'part_size': 100,
              'ranges': [(1, 100), (100, 200)],
              'type': 'container'},
 'obj_id': 32241867340011452816671079505559450,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=4657.641520852929, MetaPS: 27945.756026491243 


### Retrieving single attributes from a single sub-object

In [10]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id=obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32241867340012135346201811053936538, 'metadata': {'name': '175794/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32241867340012135346201811053936538, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32241867340012135346201811053936538, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32241867340012135346201811053936538, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32241867340012135346201811053936538, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32241867340012135346201811053936538, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=6358.358649712272, MetaPS: 6358.3200941096575


### Retrieving multiple attributes from single sub-objects

In [11]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '175794/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32241867340012135346201811053936538,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=5857.02575703623, MetaPS: 35141.90917723287


### Retrieving multiple attributes from multiple sub-objects

In [12]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32241867340011452816671079505559450,
 'sub_obj_metadata': [{'metadata': {'name': '175794/arr4',
                                    'shape': [15, 15, 15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '175794/arr4',
                       'obj_id': 32241867340012209133178114482077594},
                      {'metadata': {'name': '175794/arr3',
                                    'shape': [15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '175794/arr3',
                       'obj_id': 32241867340042812281596402923175834},
                      {'metadata': {'name': '175794/a

### Retrieving different set of attributes from multiple sub-objects

In [13]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32241867340011452816671079505559450,
 'sub_obj_metadata': [{'metadata': {'type': 'array', 'vcount': 100},
                       'name': '175794/arr4',
                       'obj_id': 32241867340012209133178114482077594},
                      {'metadata': {'name': '175794/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '175794/arr5',
                       'obj_id': 32241867340012172239689962768007066}]}

 Multiple Metadata retrieval of a single sub-object : TPS=5596.5617131656445, MetaPS: 27982.36051530779


## Retrieve multiple metadata attributes from multiple objects (with a list of object IDs)

### Retrieving array of a single object

In [16]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.92677522, 0.84387444]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=5749.433871224209, Sample/s = 958.2337244580912


In [17]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[
            slice(12, 14, None),
            slice(None),
            slice(None),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.14893291, 0.67803631, 0.05545661, 0.37738536, 0.57040274,
         0.62557655, 0.03332917, 0.60017098, 0.15297574, 0.77069315,
         0.78884436, 0.35573578, 0.12751065, 0.83462604, 0.97337488],
        [0.48697615, 0.52091613, 0.72884549, 0.29711955, 0.45601498,
         0.74599557, 0.44599237, 0.73372526, 0.02003314, 0.73791605,
         0.90717738, 0.93938249, 0.19557322, 0.94506392, 0.3742406 ],
        [0.39638403, 0.17191589, 0.68708895, 0.85688794, 0.79669507,
         0.12324473, 0.28676083, 0.24063706, 0.59012809, 0.46152916,
         0.09323674, 0.60850201, 0.37990989, 0.49518512, 0.12831398],
        [0.62193448, 0.23835224, 0.44219962, 0.044414  , 0.02397298,
         0.15914085, 0.13906844, 0.15073566, 0.01598167, 0.01442684,
         0.97381585, 0.50788165, 0.66935045, 0.83422141, 0.18119973],
        [0.16023414, 0.62195054, 0.4972597 , 0.51470713, 0.18111293,
         0.07151795, 0.4826289 , 0.42947462, 0.32325499, 0.8065441 ,
         0.863

### Retrieving array slices of multiple sub-objects (Large subarray included)

In [18]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.92677522, 0.84387444],
        [0.99168289, 0.73437325]]]),
 'sub_obj_slices': [{'array': array([[[[[1.47160024, 1.17740676],
          [1.26652604, 1.39582609]]]]]),
                     'id': 32241867340012172239689962768007066,
                     'name': '175794/arr5'},
                    {'array': array([[[[[1.37814842, 1.20389044]]]],



       [[[[0.8425671 , 0.86088457]]]]]),
                     'id': 32241867340012209133178114482077594,
                     'name': '175794/arr4'},
                    {'array': array([[[0.81278814, 0.72433482, 0.31094818, 0.25320828, 0.92677522],
        [0.87083857, 0.97587411, 0.04471095, 0.91263221, 0.99168289]]]),
                     'id': 32241867340042812281596402923175834,
                     'name': '175794/arr3'},
                    {'array': array([[[[[2.94320048, 2.35481353],
          [2.53305209, 2.79165217]]]]]),
                     'id': 32241867340012135346201811053936538,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [19]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20), slice(0, 1)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 10, -1),
                    slice(0, 1),
                ],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 15, -1),
                    slice(0, 2),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([[0.56249349],
       [0.17460969],
       [0.4450655 ],
       [0.40921978],
       [0.57775482],
       [0.56689632],
       [0.44602338],
       [0.69690084],
       [0.80330829],
       [0.73855516],
       [0.98234141],
       [0.44343586],
       [0.65681836],
       [0.80249468],
       [0.84089686],
       [0.53751869],
       [0.11450231],
       [0.95098718],
       [0.13885783],
       [0.92918438]]),
                     'id': 32241867340042922962060849475452826,
                     'name': '175794/gnn_arr1'},
                    {'array': array([[0.73855516],
       [0.80330829],
       [0.69690084],
       [0.44602338],
       [0.56689632],
       [0.57775482],
       [0.40921978],
       [0.4450655 ],
       [0.17460969],
       [0.56249349]]),
                     'id': 32241867340042922962060849475452826,
                     'name': '175794/gnn_arr1'},
                    {'array': array([[0.84089686, 0.206746

In [20]:
bkc.close()

## Closing the Store

In [89]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")