# BULKI_Store Demo

## Starting Server

In [1]:
print("Let's start")

Let's start


In [2]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [22]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[0.19421397 0.69789637 0.10966411 0.11385342 0.325672   0.72353511
     0.89836467 0.91740844 0.72086002 0.68429284 0.9840494  0.08726599
     0.86159451 0.05301107 0.88849715]
    [0.04807389 0.67209275 0.46624541 0.85273968 0.25990564 0.63480477
     0.4777737  0.36109853 0.9255911  0.97866321 0.44021564 0.42244463
     0.17412648 0.81287561 0.90278257]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.22743205 0.7502629  0.93754821 0.38908088 0.97306404 0.06111973
     0.17668346 0.10216342 0.51458001 0.422307   0.74601993 0.59682183
     0.8778275  0.7657386  0.35193335]
    [0.11514713 0.61462531 0.3142041  0.08678491 0.78210466 0.1935421
     0.2457816  0.33052    0.90374434 0.59914413 0.08134098 0.31124651
     0.37365959 0.68539548 0.85094749]]]]]
arr3.shape= (15, 15, 15)
arr3 last 2 dim preview:
[[[0.49546214 0.01325358 0.73683745 0.85506341 0.61951003 0.43287758
   0.2

## Client-Side NDArray Arithmetic Operation

In [23]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[0.42164602 1.44815927 1.04721231 0.5029343  1.29873604 0.78465484
     1.07504813 1.01957186 1.23544003 1.10659984 1.73006933 0.68408782
     1.73942201 0.81874968 1.24043049]
    [0.16322102 1.28671806 0.7804495  0.93952459 1.0420103  0.82834687
     0.7235553  0.69161853 1.82933544 1.57780734 0.52155662 0.73369114
     0.54778607 1.49827109 1.75373005]]]]]


## Remote Array Slicing

In [24]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[1.10659984 1.01957186 0.78465484 0.5029343  1.44815927]
    [1.57780734 0.69161853 0.82834687 0.93952459 1.28671806]]]]]


## Remote Array Arithmetic Operation

In [25]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[2.21319968 2.03914371 1.56930968 1.0058686  2.89631853]
    [3.15561468 1.38323706 1.65669373 1.87904918 2.57343611]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [14]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": "{}".format(name_id),
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32118621127974698121572547276833259, 32118621127974845695525141248213483, 32118621127975177736918472315109867, 32118621127975417544591434834248171, 32118621127984087514306082618474987, 32118621127984290428490897718510059, 32118621127986633164988263126532587]


In [27]:
name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": "{}".format(name_id),
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
)
print(obj_ids)

[32118639584656936754091162049787809]


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [20]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32118626635353000372514221056818744, 'metadata': {}, 'sub_obj_metadata': None}
{'obj_id': 32118626635353000372514221056818744, 'metadata': {}, 'sub_obj_metadata': None}
{'obj_id': 32118626635353000372514221056818744, 'metadata': {}, 'sub_obj_metadata': None}
{'obj_id': 32118626635353000372514221056818744, 'metadata': {'name': '224591'}, 'sub_obj_metadata': None}
{'obj_id': 32118626635353000372514221056818744, 'metadata': {}, 'sub_obj_metadata': None}
{'obj_id': 32118626635353000372514221056818744, 'metadata': {}, 'sub_obj_metadata': None}
{'obj_id': 32118626635353000372514221056818744, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1239.7427882732497 MetaPS: 1239.7413225136254


### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [21]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '224591'},
 'obj_id': 32118626635353000372514221056818744,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=1101.7876614504978, MetaPS: 6610.705130175272 


### Retrieving single attributes from a single sub-object

In [10]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32111518807538522445086984615500166, 'metadata': {'name': '980579/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32111518807538522445086984615500166, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=1393.3865776168705, MetaPS: 1393.3847260357024


### Retrieving multiple attributes from single sub-objects

In [11]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '980579/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32111518807538522445086984615500166,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=1227.9586564517904, MetaPS: 7367.741153510709


### Retrieving multiple attributes from multiple sub-objects

In [12]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32111518807537600107883294842952070,
 'sub_obj_metadata': [{'metadata': {'name': '980579/arr3',
                                    'shape': [15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '980579/arr3',
                       'obj_id': 32111518807549904086180476293749126},
                      {'metadata': {'name': '980579/arr6',
                                    'shape': [1, 1, 1, 2, 5],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '980579/arr6',
                       'obj_id': 32111518807538522445086984615500166},
                      {'metadata': {'name': '980579/gnn_ar

### Retrieving different set of attributes from multiple sub-objects

In [13]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32111518807537600107883294842952070,
 'sub_obj_metadata': [{'metadata': {'name': '980579/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '980579/arr5',
                       'obj_id': 32111518807538762252759947134638470},
                      {'metadata': {'type': 'array', 'vcount': 100},
                       'name': '980579/arr4',
                       'obj_id': 32111518807538909826712541106018694}]}

 Multiple Metadata retrieval of a single sub-object : TPS=1190.0183029367263, MetaPS: 5950.064504011392


### Retrieving array of a single object

In [14]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.06581965, 0.14443375]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=1127.922226874497, Sample/s = 187.98678504762702


### Retrieving array slices of multiple sub-objects (Large subarray included)

In [15]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.06581965, 0.14443375],
        [0.47286405, 0.54217128]]]),
 'sub_obj_slices': [{'array': array([[[[[0.99800681, 1.50862712],
          [0.72772797, 0.80912739]]]]]),
                     'id': 32111518807538762252759947134638470,
                     'name': '980579/arr5'},
                    {'array': array([[[[[0.71853013, 0.5677142 ]]]],



       [[[[0.85894364, 1.49865614]]]]]),
                     'id': 32111518807538909826712541106018694,
                     'name': '980579/arr4'},
                    {'array': array([[[0.60643314, 0.86787323, 0.11887073, 0.32200975, 0.06581965],
        [0.54241194, 0.61326484, 0.10442969, 0.08650344, 0.47286405]]]),
                     'id': 32111518807549904086180476293749126,
                     'name': '980579/arr3'},
                    {'array': array([[[[[1.99601363, 3.01725424],
          [1.45545594, 1.61825478]]]]]),
                     'id': 32111518807538522445086984615500166,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [19]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20), slice(0, 1)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 10, -1),
                    slice(0, 1),
                ],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 15, -1),
                    slice(0, 2),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([[0.57183413],
       [0.67904876],
       [0.0145189 ],
       [0.29555364],
       [0.87452062],
       [0.44289371],
       [0.80648223],
       [0.91476421],
       [0.61382033],
       [0.18692719],
       [0.47040631],
       [0.76709546],
       [0.74777212],
       [0.12612044],
       [0.60623725],
       [0.73786713],
       [0.20566671],
       [0.96760431],
       [0.71751814],
       [0.27630647]]),
                     'id': 32111518807550125447109365103335814,
                     'name': '980579/gnn_arr1'},
                    {'array': array([[0.18692719],
       [0.61382033],
       [0.91476421],
       [0.80648223],
       [0.44289371],
       [0.87452062],
       [0.29555364],
       [0.0145189 ],
       [0.67904876],
       [0.57183413]]),
                     'id': 32111518807550125447109365103335814,
                     'name': '980579/gnn_arr1'},
                    {'array': array([[0.60623725, 0.771744

## Closing the Store

In [17]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")