# BULKI_Store Demo

## Starting Server

In [8]:

%env RUST_LOG=debug
print("Let's start")

env: RUST_LOG=debug
Let's start


In [9]:
# import subprocess

# # Start the server as a background process
# server_process = subprocess.Popen(
#     ['/opt/homebrew/bin/mpirun', '-np', '4', 'target/debug/bulkistore-server'],
#     env={'RUST_LOG': 'debug'},
#     stdout=subprocess.PIPE,
#     stderr=subprocess.PIPE
# )

# print("Server started with PID:", server_process.pid)

## Creating Arrays

In [10]:
import bkstore_client as bkc
import numpy as np
import time
from pprint import pprint


bkc.init()
dim_size = 15

print("5D array size = {} elements".format(dim_size**5))

# dim_size = 10
rd1 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)
rd2 = np.random.rand(dim_size, dim_size, dim_size, dim_size, dim_size)

print("rd1.shape=", rd1.shape)
print("rd1 last 2 dim preview:")
print(rd1[0:1, 0:1, 0:1, 0:2, 0:])

print("rd2.shape=", rd2.shape)
print("rd2 last 2 dim preview:")
print(rd2[0:1, 0:1, 0:1, 0:2, 0:])

arr3 = np.random.rand(dim_size, dim_size, dim_size)
print("arr3.shape=", arr3.shape)
print("arr3 last 2 dim preview:")
print(arr3[0:1, 0:2, 0:])

5D array size = 759375 elements
rd1.shape= (15, 15, 15, 15, 15)
rd1 last 2 dim preview:
[[[[[8.60659800e-05 9.73895135e-01 5.76562820e-01 9.21060877e-01
     6.76601485e-01 2.27258591e-01 4.32011075e-02 9.13933589e-01
     7.49988178e-01 4.78288274e-01 9.59193667e-01 1.49411865e-01
     3.68864370e-01 8.31510562e-01 6.67647388e-01]
    [8.91248301e-02 5.66547528e-01 2.94469272e-01 7.32746613e-01
     8.96317163e-01 6.39601731e-01 1.95419276e-01 1.92912133e-01
     1.29165706e-02 5.96497823e-01 8.72988232e-01 7.85182415e-01
     4.92715796e-01 6.23480819e-01 4.39089218e-01]]]]]
rd2.shape= (15, 15, 15, 15, 15)
rd2 last 2 dim preview:
[[[[[0.3321544  0.33543769 0.62572182 0.68339472 0.10839495 0.61176486
     0.90140437 0.3781704  0.68609746 0.35257004 0.22016598 0.88263596
     0.10707881 0.71562959 0.46376008]
    [0.03137295 0.7918015  0.65082691 0.06104403 0.73150472 0.64130607
     0.9907016  0.40456413 0.08444942 0.98090678 0.36138976 0.93001023
     0.24211556 0.55888293 0.45297701

## Client-Side NDArray Arithmetic Operation

In [11]:
arr4 = bkc.polymorphic_add(rd1, rd2)

print("arr4.shape=", arr4.shape)
print("arr4 last 2 dim preview:")
print(arr4[0:1, 0:1, 0:1, 0:2, 0:])

arr4.shape= (15, 15, 15, 15, 15)
arr4 last 2 dim preview:
[[[[[0.33224046 1.30933282 1.20228464 1.60445559 0.78499644 0.83902345
     0.94460548 1.29210399 1.43608564 0.83085831 1.17935965 1.03204783
     0.47594318 1.54714015 1.13140747]
    [0.12049778 1.35834903 0.94529618 0.79379064 1.62782188 1.2809078
     1.18612087 0.59747627 0.09736599 1.57740461 1.23437799 1.71519264
     0.73483136 1.18236375 0.89206623]]]]]


## Remote Array Slicing

In [12]:
arr5 = bkc.array_slicing(
    arr4, [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 10, -2)]
)

print("arr5.shape=", arr5.shape)
print("arr5 last 2 dim preview:")
print(arr5[0:1, 0:1, 0:1, 0:2, 0:])

arr5.shape= (1, 1, 1, 2, 5)
arr5 last 2 dim preview:
[[[[[0.83085831 1.29210399 0.83902345 1.60445559 1.30933282]
    [1.57740461 0.59747627 1.2809078  0.79379064 1.35834903]]]]]


## Remote Array Arithmetic Operation

In [13]:
arr6 = bkc.times_two(arr5)

print("arr6.shape=", arr6.shape)
print("arr6:")
print(arr6)

arr6.shape= (1, 1, 1, 2, 5)
arr6:
[[[[[1.66171663 2.58420797 1.6780469  3.20891119 2.61866565]
    [3.15480921 1.19495253 2.5618156  1.58758128 2.71669805]]]]]


## Creating Object with Multiple NDArrays

1. You can attach a series of metadata attributes directly to the object you are creating
2. You can attach a series of NDArrays directly to the object you are creating along with the metadata attributes of these NDArrays.
3. Each of these NDArrays will become a sub-object of the main object you are creating.

In [14]:
# Create an 3D array

name_id = str(np.random.rand(1)[0] * 1000000)[0:6]

gnn_arr1 = np.random.rand(100000, 2)
gnn_arr2 = np.random.rand(1, 100000)

obj_ids = bkc.create_objects(
    obj_name_key="name",
    parent_id=None,
    metadata={
        "name": "{}".format(name_id),
        "type": "container",
        "keys": [
            "{}/arr6".format(name_id),
            "{}/arr5".format(name_id),
            "{}/arr4".format(name_id),
            "{}/arr3".format(name_id),
            "{}/gnn_arr1".format(name_id),
            "{}/gnn_arr2".format(name_id),
        ],
        "ranges": [(1, 100), (100, 200)],
        "part_num": 1,
        "part_size": 100,
    },
    data=arr3,
    array_meta_list=[
        {
            "name": "{}/arr6".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr5".format(name_id),
            "type": "array",
            "shape": arr6.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr4".format(name_id),
            "type": "array",
            "shape": arr4.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/arr3".format(name_id),
            "type": "array",
            "shape": arr3.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr1".format(name_id),
            "type": "array",
            "shape": gnn_arr1.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
        {
            "name": "{}/gnn_arr2".format(name_id),
            "type": "array",
            "shape": gnn_arr2.shape,
            "vcount": 100,
            "voffset": 0,
            "vdim": 0,
        },
    ],
    array_data_list=[arr6, arr5, arr4, arr3, gnn_arr1, gnn_arr2],
)
print(obj_ids)

[32119348377435464046736803755310233, 32119348377435593173945324017138841, 32119348377435832981618286536277145, 32119348377435999002314954217208985, 32119348377447159282479552790903961, 32119348377447399090152515310042265, 32119348377449741826649880718064793]


In [15]:
print(f"{name_id}")

276475


## Retrieve Metadata

### Retrieve a single metadata attributes from a single object


In HydraGNN, we need to retrieve a series of metadata attributes from a container. 

If every single metadata retrieval requires a single request, that would be too much overhead.

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.


In [16]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32119348377435464046736803755310233, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'keys': ['276475/arr6', '276475/arr5', '276475/arr4', '276475/arr3', '276475/gnn_arr1', '276475/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'name': '276475'}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1229.7797405393867 MetaPS: 1229.7768559517883


In [17]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size", "title"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_id="{}".format(name_id),
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of single object : TPS = {} MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32119348377435464046736803755310233, 'metadata': {'type': 'container'}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'keys': ['276475/arr6', '276475/arr5', '276475/arr4', '276475/arr3', '276475/gnn_arr1', '276475/gnn_arr2']}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'ranges': [(1, 100), (100, 200)]}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'name': '276475'}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'part_num': 1}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {'part_size': 100}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435464046736803755310233, 'metadata': {}, 'sub_obj_metadata': None}
Single Metadata retrieval of single object : TPS = 1315.2341301410434 MetaPS: 1315.2320680133282


### Retrieving multiple metadata attributes from a single object

Now we can combine the metadata retrieval of a single container into one single request. 

MetaPS is a new metric we designed to calculate the number of metadata attributes we can retrieve from one object within a second.

Slight drop on TPS but MEGA IMPROVEMENT for MetaPS

In [18]:
meta_keys = ["type", "keys", "ranges", "name", "part_num", "part_size"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        meta_keys=meta_keys,  # retrieving MULTIPLE!!!
    )
pprint(result)
print(
    "\n Multiple Metadata retrieval of single object : TPS={}, MetaPS: {} ".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'keys': ['276475/arr6',
                       '276475/arr5',
                       '276475/arr4',
                       '276475/arr3',
                       '276475/gnn_arr1',
                       '276475/gnn_arr2'],
              'name': '276475',
              'part_num': 1,
              'part_size': 100,
              'ranges': [(1, 100), (100, 200)],
              'type': 'container'},
 'obj_id': 32119348377435464046736803755310233,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of single object : TPS=1160.3617720322732, MetaPS: 6962.155223451168 


### Retrieving single attributes from a single sub-object

In [19]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    k = i % len(meta_keys)
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=[meta_keys[k]],  # retreiving SINGLE!!!
    )
    if i < len(meta_keys):
        print(result)

print(
    "Single Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 / (time.time() - start)
    )
)

{'obj_id': 32119348377435593173945324017138841, 'metadata': {'name': '276475/arr6'}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435593173945324017138841, 'metadata': {'type': 'array'}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435593173945324017138841, 'metadata': {'shape': [1, 1, 1, 2, 5]}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435593173945324017138841, 'metadata': {'vcount': 100}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435593173945324017138841, 'metadata': {'voffset': 0}, 'sub_obj_metadata': None}
{'obj_id': 32119348377435593173945324017138841, 'metadata': {'vdim': 0}, 'sub_obj_metadata': None}
Single Metadata retrieval of a single sub-object : TPS=1337.6353125435521, MetaPS: 1337.6331795730089


### Retrieving multiple attributes from single sub-objects

In [20]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[1],
        meta_keys=meta_keys,  # retreiving SINGLE!!!
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) / (time.time() - start)
    )
)

{'metadata': {'name': '276475/arr6',
              'shape': [1, 1, 1, 2, 5],
              'type': 'array',
              'vcount': 100,
              'vdim': 0,
              'voffset': 0},
 'obj_id': 32119348377435593173945324017138841,
 'sub_obj_metadata': None}

 Multiple Metadata retrieval of a single sub-object : TPS=1227.8993406301297, MetaPS: 7367.376632318905


### Retrieving multiple attributes from multiple sub-objects

In [21]:
meta_keys = ["name", "type", "shape", "vcount", "voffset", "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys=meta_keys,  # retreiving MULTIPLE for every sub-object!!!
    )

pprint(result)
print(
    "Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * len(meta_keys) * 3 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32119348377435464046736803755310233,
 'sub_obj_metadata': [{'metadata': {'name': '276475/arr3',
                                    'shape': [15, 15, 15],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '276475/arr3',
                       'obj_id': 32119348377447159282479552790903961},
                      {'metadata': {'name': '276475/arr6',
                                    'shape': [1, 1, 1, 2, 5],
                                    'type': 'array',
                                    'vcount': 100,
                                    'vdim': 0,
                                    'voffset': 0},
                       'name': '276475/arr6',
                       'obj_id': 32119348377435593173945324017138841},
                      {'metadata': {'name': '276475/arr5',

### Retrieving different set of attributes from multiple sub-objects

In [22]:
# meta_keys=["name",
#             "type",
#             "shape",
#             "vcount",
#             "voffset",
#             "vdim"]

start = time.time()
for i in range(1000):
    result = bkc.get_object_metadata(
        obj_ids[0],
        sub_meta_keys={
            "{}/arr5".format(name_id): ["name", "type", "voffset"],
            "{}/arr4".format(name_id): ["type", "vcount"],
        },  # retrieving different set of metadata for different sub-objects
    )

pprint(result)
print(
    "\n Multiple Metadata retrieval of a single sub-object : TPS={}, MetaPS: {}".format(
        1000 / (time.time() - start), 1000 * 5 / (time.time() - start)
    )
)

{'metadata': {},
 'obj_id': 32119348377435464046736803755310233,
 'sub_obj_metadata': [{'metadata': {'name': '276475/arr5',
                                    'type': 'array',
                                    'voffset': 0},
                       'name': '276475/arr5',
                       'obj_id': 32119348377435832981618286536277145},
                      {'metadata': {'type': 'array', 'vcount': 100},
                       'name': '276475/arr4',
                       'obj_id': 32119348377435999002314954217208985}]}

 Multiple Metadata retrieval of a single sub-object : TPS=1184.3237843207348, MetaPS: 5921.603873131561


### Retrieving array of a single object

In [23]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 1), slice(0, 2, -1)],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s = {}".format(
        1000 / (time.time() - start), (1000 / 6) / (time.time() - start)
    )
)

{'array_slice': array([[[0.48205712, 0.67145357]]]), 'sub_obj_slices': []}

 retrieving array slice of a single 3D array, TPS=1000.2439617092201, Sample/s = 166.70716792840616


### Retrieving array slices of multiple sub-objects (Large subarray included)

In [24]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=[slice(0, 1), slice(0, 2), slice(0, 2, -1)],
        sub_obj_regions=[
            (
                "{}/arr5".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
            (
                "{}/arr4".format(name_id),
                [
                    slice(0, 2, -1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 1),
                    slice(0, 2, -1),
                ],
            ),
            (
                "{}/arr3".format(name_id),
                [slice(0, 1), slice(0, 2), slice(0, 10, -2)],
            ),
            (
                "{}/arr6".format(name_id),
                [slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 2), slice(0, 2, -1)],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000 / (4 / 6)) / (time.time() - start)
    )
)

{'array_slice': array([[[0.48205712, 0.67145357],
        [0.83928621, 0.8233581 ]]]),
 'sub_obj_slices': [{'array': array([[[[[1.29210399, 0.83085831],
          [0.59747627, 1.57740461]]]]]),
                     'id': 32119348377435832981618286536277145,
                     'name': '276475/arr5'},
                    {'array': array([[[[[0.73377526, 1.54474945]]]],



       [[[[1.30933282, 0.33224046]]]]]),
                     'id': 32119348377435999002314954217208985,
                     'name': '276475/arr4'},
                    {'array': array([[[0.70220418, 0.42578952, 0.5314196 , 0.41273197, 0.48205712],
        [0.46147236, 0.18501475, 0.72301612, 0.88348578, 0.83928621]]]),
                     'id': 32119348377447159282479552790903961,
                     'name': '276475/arr3'},
                    {'array': array([[[[[2.58420797, 1.66171663],
          [1.19495253, 3.15480921]]]]]),
                     'id': 32119348377435593173945324017138841,
                     '

### Retrieving slices of multiple objects (only GNN arrays)

In [25]:
start = time.time()
for i in range(1000):
    result = bkc.get_object_data(
        obj_ids[0],
        region=None,
        sub_obj_regions=[
            (
                "{}/gnn_arr1".format(name_id),
                [slice(0, 20), slice(0, 1)],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 10, -1),
                    slice(0, 1),
                ],
            ),
            (
                "{}/gnn_arr1".format(name_id),
                [
                    slice(0, 15, -1),
                    slice(0, 2),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
            (
                "{}/gnn_arr2".format(name_id),
                [
                    slice(0, 1),
                    slice(0, 20, -1),
                ],
            ),
        ],
    )
pprint(result)
print(
    "\n retrieving array slice of a single 3D array, TPS={}, Sample/s={}".format(
        1000 / (time.time() - start), (1000) / (time.time() - start)
    )
)

{'array_slice': None,
 'sub_obj_slices': [{'array': array([[0.4725509 ],
       [0.4009871 ],
       [0.28816418],
       [0.03304448],
       [0.96981313],
       [0.91304028],
       [0.2036412 ],
       [0.24010823],
       [0.60022992],
       [0.71936996],
       [0.98409307],
       [0.82907341],
       [0.75973639],
       [0.9982704 ],
       [0.49448435],
       [0.67091999],
       [0.31854262],
       [0.21357598],
       [0.71976736],
       [0.88784411]]),
                     'id': 32119348377447399090152515310042265,
                     'name': '276475/gnn_arr1'},
                    {'array': array([[0.71936996],
       [0.60022992],
       [0.24010823],
       [0.2036412 ],
       [0.91304028],
       [0.96981313],
       [0.03304448],
       [0.28816418],
       [0.4009871 ],
       [0.4725509 ]]),
                     'id': 32119348377447399090152515310042265,
                     'name': '276475/gnn_arr1'},
                    {'array': array([[0.49448435, 0.498407

## Closing the Store

In [26]:
# server_process.terminate()
# server_process.wait()
# print("Server process terminated.")