In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
import functools
import json
import os
import ssl
import time

import fsspec
import irods_fsspec
from fsspec.registry import get_filesystem_class
from irods.session import iRODSSession
irods_fsspec.register()

In [3]:
import logging
# logging.basicConfig(level='DEBUG')

In [4]:
_ENV = None
ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None)
try:
    env_file = os.environ['IRODS_ENVIRONMENT_FILE']
except KeyError:
    env_file = os.path.expanduser('~/.irods/irods_environment.json')
# session = iRODSSession(irods_env_file=env_file, ssl_context=ssl_context)
with open(env_file, 'r') as f:
    _ENV = json.load(f)

In [5]:
test_subcollection = f'/iplant/home/{_ENV["irods_user_name"]}/delete_me_benchmark_test'
with iRODSSession(irods_env_file=env_file, ssl_context=ssl_context) as session:
    if not session.collections.exists(test_subcollection):
        session.collections.create(test_subcollection)

In [8]:
@functools.lru_cache(None)
def measure_data_object_create_delete(n_iter):
    # pre-emptive cleanup of incomplete benchmarks
    with iRODSSession(irods_env_file=env_file, ssl_context=ssl_context) as session:
        for i in range(n_iter):
            test_file_path = test_subcollection + f'/test_{i}.txt'
            if session.data_objects.exists(test_file_path):
                session.data_objects.unlink(test_file_path)
    
    
    with iRODSSession(irods_env_file=env_file, ssl_context=ssl_context) as session:
        start = time.perf_counter()
        for i in range(n_iter):
            test_file = session.data_objects.create(test_subcollection + f'/test_{i}.txt')
            with test_file.open(mode='w') as f:
                f.write(b'foo!\n')

    create_elapsed = time.perf_counter() - start
    create_t_per_iter = create_elapsed / n_iter
    print(f'created {n_iter} in {create_elapsed} s -- {create_t_per_iter} s / iter')

    with iRODSSession(irods_env_file=env_file, ssl_context=ssl_context) as session:
        start = time.perf_counter()
        for i in range(n_iter):
            test_file_path = test_subcollection + f'/test_{i}.txt'
            session.data_objects.unlink(test_file_path)
    unlink_elapsed = time.perf_counter() - start
    unlink_t_per_iter = unlink_elapsed / n_iter
    print(f'deleted {n_iter} in {unlink_elapsed} s -- {unlink_t_per_iter} s / iter')
    return create_t_per_iter, unlink_t_per_iter

In [11]:
measure_data_object_create_delete(20)

created 20 in 51.42954418599993 s -- 2.5714772092999967 s / iter
deleted 20 in 48.909453074999874 s -- 2.445472653749994 s / iter


(2.5714772092999967, 2.445472653749994)

In [None]:
n_iter_vals = [1, 5, 10, 50, 100, 150]
create_t_per_iter_vals = []
unlink_t_per_iter_vals = []
for n_iter in n_iter_vals:
    print(n_iter)
    create_t_per_iter, unlink_t_per_iter = measure_data_object_create_delete(n_iter)
    create_t_per_iter_vals.append(create_t_per_iter)
    unlink_t_per_iter_vals.append(unlink_t_per_iter)

In [None]:
plt.plot(n_iter_vals, create_t_per_iter_vals, label='Create iRODS data object')
plt.plot(n_iter_vals, unlink_t_per_iter_vals, label='Unlink iRODS data object')
plt.xlabel('N iterations')
plt.ylabel('sec / iteration')
plt.grid()

In [None]:
plt.plot(n_iter_vals, create_t_per_iter_vals, label='Create iRODS data object')
plt.plot(n_iter_vals, unlink_t_per_iter_vals, label='Unlink iRODS data object')
plt.axhline(1/3500, ls=':', label='Upload AWS S3 object')
plt.xlabel('N iterations')
plt.ylabel('sec / iteration')
plt.yscale('log')
plt.legend()
plt.grid()

In [None]:
@functools.lru_cache(None)
def measure_data_object_get(n_iter):
    for i in range(n_iter):
        test_file_path = test_subcollection + f'/test_{i}.txt'
        if not session.data_objects.exists(test_file_path):
            test_file = session.data_objects.create(test_file_path)
            with test_file.open(mode='w') as f:
                f.write(b'foo!\n')
    
    start = time.perf_counter()
    for i in range(n_iter):
        test_file = session.data_objects.get(test_subcollection + f'/test_{i}.txt')
        with test_file.open(mode='r') as f:
            _ = f.read()

    get_elapsed = time.perf_counter() - start
    get_t_per_iter = get_elapsed / n_iter
    print(f'got {n_iter} in {get_elapsed} s -- {get_t_per_iter} s / iter')
    return get_t_per_iter

In [None]:
get_t_per_iter_vals = []
for n_iter in n_iter_vals:
    print(n_iter)
    get_t_per_iter = measure_data_object_get(n_iter)
    get_t_per_iter_vals.append(get_t_per_iter)

In [None]:
plt.plot(n_iter_vals, create_t_per_iter_vals, label='Create iRODS data object')
plt.plot(n_iter_vals, unlink_t_per_iter_vals, label='Unlink iRODS data object')
plt.plot(n_iter_vals, get_t_per_iter_vals, label='Get iRODS data object')
plt.axhline(1/3500, c='C3', ls=':', label='Upload AWS S3 object')
plt.axhline(1/5500, c='C4', ls=':', label='Retrieve AWS S3 object')
plt.xlabel('N iterations')
plt.ylabel('sec / iteration')
plt.yscale('log')
plt.legend()
plt.grid()