# BULKI_Store Demo

## Starting Server

In [1]:
print("Let's start")

Let's start


In [2]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [3]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[0.40459251 0.83067646 0.28614771 0.98900449 0.14954358 0.02994448
     0.47649712 0.49971808 0.41403076 0.83319263 0.44445731 0.13533823
     0.81790112 0.16490294 0.78862462]
    [0.71692633 0.91303549 0.64646808 0.24261111 0.31372621 0.51111374
     0.05274643 0.88009388 0.53418363 0.62297865 0.59518975 0.98695825
     0.72312317 0.84969319 0.21780238]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.53930558 0.67534664 0.17110583 0.1350512  0.02741573 0.92854687
     0.45273279 0.0190791  0.59197665 0.0286356  0.54248115 0.81663565
     0.12594329 0.78245297 0.18741576]
    [0.58148536 0.6030635  0.66852038 0.2061908  0.52767463 0.53432944
     0.21228742 0.82306601 0.28019941 0.33324645 0.74196399 0.28317311
     0.98027109 0.96559209 0.26398958]]]]]
arr3.shape= (15, 15, 15)
arr3 last 2 dim preview:
[[[0.54868844 0.15436277 0.03920907 0.85561122 0.93497556 0.36623018
   0.

## Client-Side NDArray Arithmetic Operation

In [4]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[0.94389809 1.50602309 0.45725354 1.12405569 0.17695931 0.95849135
     0.92922991 0.51879718 1.00600741 0.86182824 0.98693846 0.95197389
     0.94384441 0.9473559  0.97604038]
    [1.29841169 1.51609899 1.31498846 0.44880191 0.84140084 1.04544318
     0.26503386 1.70315989 0.81438304 0.95622511 1.33715374 1.27013136
     1.70339426 1.81528528 0.48179196]]]]]


## Remote Array Slicing

In [5]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[0.86182824 0.51879718 0.95849135 1.12405569 1.50602309]
    [0.95622511 1.70315989 1.04544318 0.44880191 1.51609899]]]]]


## Remote Array Arithmetic Operation

In [6]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[1.72365647 1.03759436 1.91698269 2.24811138 3.01204618]
    [1.91245021 3.40631977 2.09088636 0.89760382 3.03219799]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [7]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": "{}".format(name_id),
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32110665600477932833558964742916116, 32110665600478541576113401453086740, 32110665600478818277274511391328276, 32110665600478984297971179072260116, 32110665600488410584192848948103188, 32110665600488631945121737757689876, 32110665600491177595803913970780180]


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [8]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32110665600477932833558964742916116, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32110665600477932833558964742916116, 'metadata': {'keys': ['984488/arr6', '984488/arr5', '984488/arr4', '984488/arr3', '984488/gnn_arr1', '984488/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32110665600477932833558964742916116, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32110665600477932833558964742916116, 'metadata': {'name': '984488'}, 'sub_obj_metadata': None}
{'obj_id': 32110665600477932833558964742916116, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32110665600477932833558964742916116, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1310.7343361568016 MetaPS: 1310.732697723009


### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [9]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'keys': ['984488/arr6',
                       '984488/arr5',
                       '984488/arr4',
                       '984488/arr3',
                       '984488/gnn_arr1',
                       '984488/gnn_arr2'],
              'name': '984488',
              'part_num': 1,
              'part_size': 100,
              'ranges': [(1, 100), (100, 200)],
              'type': 'container'},
 'obj_id': 32110665600477932833558964742916116,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=1218.4406368432765, MetaPS: 7310.635326134005 


### Retrieving single attributes from a single sub-object

In [10]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32110665600478541576113401453086740, 'metadata': {'name': '984488/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32110665600478541576113401453086740, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32110665600478541576113401453086740, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32110665600478541576113401453086740, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32110665600478541576113401453086740, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32110665600478541576113401453086740, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=1257.237509243503, MetaPS: 1257.2356249692757


### Retrieving multiple attributes from single sub-objects

In [11]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '984488/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32110665600478541576113401453086740,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=1292.1278947543533, MetaPS: 7752.755426674089


### Retrieving multiple attributes from multiple sub-objects

In [12]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32110665600477932833558964742916116,
 'sub_obj_metadata': [{'metadata': {'name': '984488/gnn_arr1',
                                    'shape': [100000, 2],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '984488/gnn_arr1',
                       'obj_id': 32110665600488631945121737757689876},
                      {'metadata': {'name': '984488/arr6',
                                    'shape': [1, 1, 1, 2, 5],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '984488/arr6',
                       'obj_id': 32110665600478541576113401453086740},
                      {'metadata': {'name': '984488

### Retrieving different set of attributes from multiple sub-objects

In [13]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32110665600477932833558964742916116,
 'sub_obj_metadata': [{'metadata': {'name': '984488/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '984488/arr5',
                       'obj_id': 32110665600478818277274511391328276},
                      {'metadata': {'type': 'array', 'vcount': 100},
                       'name': '984488/arr4',
                       'obj_id': 32110665600478984297971179072260116}]}

 Multiple Metadata retrieval of a single sub-object : TPS=1186.812676866555, MetaPS: 5934.056667972989


### Retrieving array of a single object

In [14]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.15436277, 0.54868844]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=1101.132503842133, Sample/s = 183.52208397368884


### Retrieving array slices of multiple sub-objects (Large subarray included)

In [15]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.15436277, 0.54868844],
        [0.31656043, 0.47356675]]]),
 'sub_obj_slices': [{'array': array([[[[[0.51879718, 0.86182824],
          [1.70315989, 0.95622511]]]]]),
                     'id': 32110665600478818277274511391328276,
                     'name': '984488/arr5'},
                    {'array': array([[[[[1.42981419, 1.07258671]]]],



       [[[[1.50602309, 0.94389809]]]]]),
                     'id': 32110665600478984297971179072260116,
                     'name': '984488/arr4'},
                    {'array': array([[[0.82540016, 0.77201225, 0.36623018, 0.85561122, 0.15436277],
        [0.22685817, 0.62386712, 0.63168049, 0.48490492, 0.31656043]]]),
                     'id': 32110665600488410584192848948103188,
                     'name': '984488/arr3'},
                    {'array': array([[[[[1.03759436, 1.72365647],
          [3.40631977, 1.91245021]]]]]),
                     'id': 32110665600478541576113401453086740,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [16]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20, -1), slice(0)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 10, -1), slice(0)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 15, -1), slice(0)],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [slice(0), slice(0, 20, -1)],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [slice(0), slice(0, 20, -1)],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [slice(0), slice(0, 20, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([], shape=(20, 0), dtype=float64),
                     'id': 32110665600488631945121737757689876,
                     'name': '984488/gnn_arr1'},
                    {'array': array([], shape=(10, 0), dtype=float64),
                     'id': 32110665600488631945121737757689876,
                     'name': '984488/gnn_arr1'},
                    {'array': array([], shape=(15, 0), dtype=float64),
                     'id': 32110665600488631945121737757689876,
                     'name': '984488/gnn_arr1'},
                    {'array': array([], shape=(0, 20), dtype=float64),
                     'id': 32110665600491177595803913970780180,
                     'name': '984488/gnn_arr2'},
                    {'array': array([], shape=(0, 20), dtype=float64),
                     'id': 32110665600491177595803913970780180,
                     'name': '984488/gnn_arr2'},
                    {'array': array([], shape=(0, 20), dty

## Closing the Store

In [17]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")