# BULKI_Store Demo

## Starting Server

In [93]:

%env RUST_LOG=debug
print("Let's start")

env: RUST_LOG=debug
Let's start


In [2]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [102]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[0.02121018 0.34371924 0.66384708 0.2509436  0.50725957 0.05965213
     0.80339222 0.52636032 0.15284226 0.60598629 0.49756299 0.57669296
     0.24820821 0.84380508 0.65887463]
    [0.21610796 0.4700512  0.64679471 0.23765731 0.74632617 0.7728649
     0.87550305 0.21909482 0.88943265 0.87354773 0.12846691 0.15252377
     0.52971696 0.03637056 0.1158859 ]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.01866858 0.38976415 0.34305569 0.5697682  0.94926718 0.18515047
     0.77275    0.06140011 0.47569409 0.36030891 0.55322774 0.55347914
     0.17540104 0.09666887 0.5075711 ]
    [0.09781131 0.61929597 0.94247784 0.44904642 0.95190808 0.15154083
     0.69775056 0.5352605  0.79407519 0.18885056 0.54015584 0.31037283
     0.08750778 0.17687794 0.85977445]]]]]
arr3.shape= (15, 15, 15)
arr3 last 2 dim preview:
[[[0.44833339 0.30359799 0.09444056 0.57723046 0.95768152 0.61989608
   0.7

## Client-Side NDArray Arithmetic Operation

In [103]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[0.03987875 0.73348338 1.00690278 0.8207118  1.45652675 0.2448026
     1.57614222 0.58776044 0.62853636 0.9662952  1.05079073 1.1301721
     0.42360925 0.94047395 1.16644573]
    [0.31391927 1.08934717 1.58927256 0.68670373 1.69823425 0.92440573
     1.57325361 0.75435532 1.68350785 1.06239829 0.66862276 0.4628966
     0.61722474 0.2132485  0.97566035]]]]]


## Remote Array Slicing

In [104]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[0.9662952  0.58776044 0.2448026  0.8207118  0.73348338]
    [1.06239829 0.75435532 0.92440573 0.68670373 1.08934717]]]]]


## Remote Array Arithmetic Operation

In [105]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[1.93259039 1.17552088 0.48960519 1.6414236  1.46696677]
    [2.12479658 1.50871064 1.84881146 1.37340746 2.17869435]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [106]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": "{}".format(name_id),
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32119302105375792446674707095189545, 32119302105375995360859522195224617, 32119302105376290508764705843017769, 32119302105376604103413963200362537, 32119302105396895521895048002107433, 32119302105397135329568010521245737, 32119302105400621764197945921468457]


In [85]:
print(f"{name_id}")

978260


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [108]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

ValueError: Failed to get object metadata: Not Found: Object 6 not found

In [101]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id="{}".format(name_id),
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

TypeError: argument 'obj_id': 'str' object cannot be interpreted as an integer

### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [30]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'keys': ['287851/arr6',
                       '287851/arr5',
                       '287851/arr4',
                       '287851/arr3',
                       '287851/gnn_arr1',
                       '287851/gnn_arr2'],
              'name': '287851',
              'part_num': 1,
              'part_size': 100,
              'ranges': [(1, 100), (100, 200)],
              'type': 'container'},
 'obj_id': 32118640016810009207249623172841341,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=999.4033560744261, MetaPS: 5996.407277229386 


### Retrieving single attributes from a single sub-object

In [10]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32111518807538522445086984615500166, 'metadata': {'name': '980579/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=1393.3865776168705, MetaPS: 1393.3847260357024


### Retrieving multiple attributes from single sub-objects

In [11]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '980579/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32111518807538522445086984615500166,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=1227.9586564517904, MetaPS: 7367.741153510709


### Retrieving multiple attributes from multiple sub-objects

In [12]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32111518807537600107883294842952070,
 'sub_obj_metadata': [{'metadata': {'name': '980579/arr3',
                                    'shape': [15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '980579/arr3',
                       'obj_id': 32111518807549904086180476293749126},
                      {'metadata': {'name': '980579/arr6',
                                    'shape': [1, 1, 1, 2, 5],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '980579/arr6',
                       'obj_id': 32111518807538522445086984615500166},
                      {'metadata': {'name': '980579/gnn_ar

### Retrieving different set of attributes from multiple sub-objects

In [13]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32111518807537600107883294842952070,
 'sub_obj_metadata': [{'metadata': {'name': '980579/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '980579/arr5',
                       'obj_id': 32111518807538762252759947134638470},
                      {'metadata': {'type': 'array', 'vcount': 100},
                       'name': '980579/arr4',
                       'obj_id': 32111518807538909826712541106018694}]}

 Multiple Metadata retrieval of a single sub-object : TPS=1190.0183029367263, MetaPS: 5950.064504011392


### Retrieving array of a single object

In [14]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.06581965, 0.14443375]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=1127.922226874497, Sample/s = 187.98678504762702


### Retrieving array slices of multiple sub-objects (Large subarray included)

In [15]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.06581965, 0.14443375],
        [0.47286405, 0.54217128]]]),
 'sub_obj_slices': [{'array': array([[[[[0.99800681, 1.50862712],
          [0.72772797, 0.80912739]]]]]),
                     'id': 32111518807538762252759947134638470,
                     'name': '980579/arr5'},
                    {'array': array([[[[[0.71853013, 0.5677142 ]]]],



       [[[[0.85894364, 1.49865614]]]]]),
                     'id': 32111518807538909826712541106018694,
                     'name': '980579/arr4'},
                    {'array': array([[[0.60643314, 0.86787323, 0.11887073, 0.32200975, 0.06581965],
        [0.54241194, 0.61326484, 0.10442969, 0.08650344, 0.47286405]]]),
                     'id': 32111518807549904086180476293749126,
                     'name': '980579/arr3'},
                    {'array': array([[[[[1.99601363, 3.01725424],
          [1.45545594, 1.61825478]]]]]),
                     'id': 32111518807538522445086984615500166,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [19]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20), slice(0, 1)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 10, -1),
                    slice(0, 1),
                ],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 15, -1),
                    slice(0, 2),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([[0.57183413],
       [0.67904876],
       [0.0145189 ],
       [0.29555364],
       [0.87452062],
       [0.44289371],
       [0.80648223],
       [0.91476421],
       [0.61382033],
       [0.18692719],
       [0.47040631],
       [0.76709546],
       [0.74777212],
       [0.12612044],
       [0.60623725],
       [0.73786713],
       [0.20566671],
       [0.96760431],
       [0.71751814],
       [0.27630647]]),
                     'id': 32111518807550125447109365103335814,
                     'name': '980579/gnn_arr1'},
                    {'array': array([[0.18692719],
       [0.61382033],
       [0.91476421],
       [0.80648223],
       [0.44289371],
       [0.87452062],
       [0.29555364],
       [0.0145189 ],
       [0.67904876],
       [0.57183413]]),
                     'id': 32111518807550125447109365103335814,
                     'name': '980579/gnn_arr1'},
                    {'array': array([[0.60623725, 0.771744

## Closing the Store

In [17]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")