# BULKI_Store Demo

## Starting Server

In [70]:

%env RUST_LOG=debug

%env RUST_LOG=debug
print("Let's start")

env: RUST_LOG=debug
env: RUST_LOG=debug
Let's start


In [71]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [72]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[0.08663482 0.59558027 0.62960987 0.4729986  0.57928576 0.5306721
     0.11848833 0.08423938 0.70638524 0.88690634 0.0719992  0.37613026
     0.6058457  0.65754648 0.44527922]
    [0.47236678 0.29346513 0.68435253 0.83908487 0.79930203 0.89991119
     0.59928692 0.32923729 0.16767291 0.32729862 0.89142215 0.75340056
     0.19889376 0.37338368 0.04716   ]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.91809928 0.72458919 0.40881815 0.03364243 0.70669297 0.14787626
     0.03583577 0.64764197 0.98465139 0.72900086 0.491693   0.0896528
     0.02416408 0.94364375 0.0776397 ]
    [0.79195747 0.07572613 0.64079606 0.31680976 0.29157228 0.71658357
     0.05995136 0.566793   0.92811117 0.41916363 0.63608706 0.19718589
     0.09642889 0.41666164 0.06613984]]]]]
arr3.shape= (15, 15, 15)
arr3 last 2 dim preview:
[[[0.23587215 0.62178663 0.06894842 0.97736116 0.75520399 0.08845965
   0.52

## Client-Side NDArray Arithmetic Operation

In [73]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[1.0047341  1.32016946 1.03842801 0.50664102 1.28597873 0.67854836
     0.1543241  0.73188135 1.69103663 1.6159072  0.5636922  0.46578306
     0.63000977 1.60119023 0.52291892]
    [1.26432425 0.36919126 1.32514859 1.15589463 1.09087431 1.61649476
     0.65923829 0.89603029 1.09578408 0.74646225 1.52750921 0.95058645
     0.29532265 0.79004531 0.11329984]]]]]


## Remote Array Slicing

In [74]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[1.6159072  0.73188135 0.67854836 0.50664102 1.32016946]
    [0.74646225 0.89603029 1.61649476 1.15589463 0.36919126]]]]]


## Remote Array Arithmetic Operation

In [75]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[3.23181439 1.46376271 1.35709672 1.01328205 2.64033893]
    [1.49292449 1.79206058 3.23298952 2.31178927 0.73838253]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [76]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": "container",
        "name": "container",
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32120029897713095799432405078924581, 32120029897713206479896851631201573, 32120029897713446287569814150339877, 32120029897713593861522408121720101, 32120029897723370635881478479043877, 32120029897723536656578146159975717, 32120029897725658032146627053378853]


In [77]:
print(f"{name_id}")

198490


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [78]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32120029897713095799432405078924581, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'keys': ['198490/arr6', '198490/arr5', '198490/arr4', '198490/arr3', '198490/gnn_arr1', '198490/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'name': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1192.3356059221296 MetaPS: 1192.33425011904


In [79]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id="container",
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32120029897713095799432405078924581, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'keys': ['198490/arr6', '198490/arr5', '198490/arr4', '198490/arr3', '198490/gnn_arr1', '198490/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'name': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713095799432405078924581, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1200.9745686153744 MetaPS: 1200.9718175766122


### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [80]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'keys': ['198490/arr6',
                       '198490/arr5',
                       '198490/arr4',
                       '198490/arr3',
                       '198490/gnn_arr1',
                       '198490/gnn_arr2'],
              'name': 'container',
              'part_num': 1,
              'part_size': 100,
              'ranges': [(1, 100), (100, 200)],
              'type': 'container'},
 'obj_id': 32120029897713095799432405078924581,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=1115.1715349300703, MetaPS: 6691.014977651705 


### Retrieving single attributes from a single sub-object

In [81]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id=obj_ids[1],
        obj_id=obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32120029897713206479896851631201573, 'metadata': {'name': '198490/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713206479896851631201573, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713206479896851631201573, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713206479896851631201573, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713206479896851631201573, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32120029897713206479896851631201573, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=1373.1358919694724, MetaPS: 1373.133644279401


### Retrieving multiple attributes from single sub-objects

In [82]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '198490/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32120029897713206479896851631201573,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=1294.7532645852923, MetaPS: 7768.50999516587


### Retrieving multiple attributes from multiple sub-objects

In [83]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32120029897713095799432405078924581,
 'sub_obj_metadata': [{'metadata': {'name': '198490/arr4',
                                    'shape': [15, 15, 15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '198490/arr4',
                       'obj_id': 32120029897713593861522408121720101},
                      {'metadata': {'name': '198490/gnn_arr2',
                                    'shape': [1, 100000],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '198490/gnn_arr2',
                       'obj_id': 32120029897725658032146627053378853},
                      {'metadata': {'name': '1

### Retrieving different set of attributes from multiple sub-objects

In [84]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32120029897713095799432405078924581,
 'sub_obj_metadata': [{'metadata': {'name': '198490/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '198490/arr5',
                       'obj_id': 32120029897713446287569814150339877},
                      {'metadata': {'type': 'array', 'vcount': 100},
                       'name': '198490/arr4',
                       'obj_id': 32120029897713593861522408121720101}]}

 Multiple Metadata retrieval of a single sub-object : TPS=1226.0655385966338, MetaPS: 6130.320525000366


### Retrieving array of a single object

In [85]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.62178663, 0.23587215]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=1178.3626273165903, Sample/s = 196.39355051743186


In [86]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[
            slice(12, 14, None),
            slice(None),
            slice(None),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[4.11020637e-01, 2.18721105e-01, 3.69594400e-02, 9.59146544e-01,
         1.05358009e-01, 8.98254096e-02, 3.16815324e-01, 6.37150257e-01,
         2.68074970e-01, 7.94732376e-01, 6.41883772e-01, 9.49951724e-01,
         6.03958264e-01, 3.86807641e-04, 5.09058795e-01],
        [6.14056118e-01, 8.67337888e-01, 9.13021354e-01, 5.70167740e-01,
         7.38035429e-01, 1.11448307e-01, 7.73436528e-01, 8.79214469e-02,
         6.39623762e-01, 1.21359554e-01, 9.07317290e-01, 4.96837274e-01,
         1.18468512e-01, 5.70822556e-01, 7.36446395e-01],
        [1.45528000e-01, 6.37055115e-01, 1.37736467e-01, 6.43862174e-02,
         2.39138856e-01, 5.12645938e-01, 7.22607026e-01, 3.85778696e-01,
         6.40691252e-01, 6.32739595e-02, 1.00037648e-01, 1.64276675e-01,
         5.52927504e-01, 7.26349769e-04, 8.94497106e-01],
        [8.47386126e-01, 2.77525059e-01, 4.56280115e-01, 7.82925607e-01,
         5.22316034e-01, 2.58741240e-01, 4.34871298e-01, 6.81705701e-01,
       

### Retrieving array slices of multiple sub-objects (Large subarray included)

In [87]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.62178663, 0.23587215],
        [0.90618947, 0.09865762]]]),
 'sub_obj_slices': [{'array': array([[[[[0.73188135, 1.6159072 ],
          [0.89603029, 0.74646225]]]]]),
                     'id': 32120029897713446287569814150339877,
                     'name': '198490/arr5'},
                    {'array': array([[[[[0.18758013, 0.32419016]]]],



       [[[[1.32016946, 1.0047341 ]]]]]),
                     'id': 32120029897713593861522408121720101,
                     'name': '198490/arr4'},
                    {'array': array([[[0.42087656, 0.24204213, 0.08845965, 0.97736116, 0.62178663],
        [0.92967152, 0.66895578, 0.22977314, 0.44410789, 0.90618947]]]),
                     'id': 32120029897723370635881478479043877,
                     'name': '198490/arr3'},
                    {'array': array([[[[[1.46376271, 3.23181439],
          [1.79206058, 1.49292449]]]]]),
                     'id': 32120029897713206479896851631201573,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [90]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20), slice(0, 1)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 10, -1),
                    slice(0, 1),
                ],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 15, -1),
                    slice(0, 2),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([[0.02016286],
       [0.36533791],
       [0.42570238],
       [0.49772483],
       [0.06137669],
       [0.96256167],
       [0.38264972],
       [0.02212991],
       [0.82519936],
       [0.52947522],
       [0.28570145],
       [0.36036936],
       [0.46524358],
       [0.45217778],
       [0.57512953],
       [0.93579813],
       [0.48243052],
       [0.49472767],
       [0.60397132],
       [0.73038836]]),
                     'id': 32120029897723536656578146159975717,
                     'name': '198490/gnn_arr1'},
                    {'array': array([[0.52947522],
       [0.82519936],
       [0.02212991],
       [0.38264972],
       [0.96256167],
       [0.06137669],
       [0.49772483],
       [0.42570238],
       [0.36533791],
       [0.02016286]]),
                     'id': 32120029897723536656578146159975717,
                     'name': '198490/gnn_arr1'},
                    {'array': array([[0.57512953, 0.434942

## Closing the Store

In [89]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")