In [1]:
import ujson, csv, json
import random
import time
from pathlib import Path
from tqdm import tqdm_notebook as tqdm

In [2]:
BENCHMARK_DIR = Path("/workspace/drepr/volumes/benchmark_data")
assert BENCHMARK_DIR.exists()

#### Python

In [3]:
start = time.time()
with open(BENCHMARK_DIR / "hr_company_employee.small.json", "r") as f:
    data = ujson.load(f)
end = time.time()
print('[ujson] take %.9f seconds' % (end - start))

start = time.time()
with open(BENCHMARK_DIR / "hr_company_employee.small.json", "r") as f:
    data = json.load(f)
end = time.time()
print('[json] take %.9f seconds' % (end - start))

[ujson] take 0.002174854 seconds
[json] take 0.003240824 seconds


In [16]:
start = time.time()

with open(BENCHMARK_DIR / "hr_employee.csv", "r") as f:
    reader = csv.reader(f, delimiter=',')
    rows = [row for row in reader]

end = time.time()
print('take %.9f seconds' % (end - start))

take 1.415404081 seconds


#### Rust

In [4]:
!cd /workspace/learn-tools/testjson && cargo run --release -- {BENCHMARK_DIR}/hr_company_employee.small.json

[0m[0m[1m[32m    Finished[0m release [optimized] target(s) in 0.07s
[0m[0m[1m[32m     Running[0m `target/release/testjson /workspace/drepr/volumes/benchmark_data/hr_company_employee.small.json`
>>> [D-REPR] runtime: 1.294509ms


In [5]:
!cargo run --release -- {BENCHMARK_DIR}/hr_company_employee.repr.yml default:{BENCHMARK_DIR}/hr_company_employee.small.json test -f read_data

[0m[0m[1m[32m    Finished[0m release [optimized] target(s) in 0.12s
[0m[0m[1m[32m     Running[0m `/workspace/drepr/drepr/rdrepr/target/release/drepr /workspace/drepr/volumes/benchmark_data/hr_company_employee.repr.yml 'default:/workspace/drepr/volumes/benchmark_data/hr_company_employee.small.json' test -f read_data`
>>> [DREPR] read data: 1.874618ms


In [20]:
!cargo run --release -- {BENCHMARK_DIR}/hr_company_employee.repr.yml default:{BENCHMARK_DIR}/hr_employee.csv test -f read_data

[0m[0m[1m[32m    Finished[0m release [optimized] target(s) in 0.30s
[0m[0m[1m[32m     Running[0m `/workspace/drepr/drepr/rdrepr/target/release/drepr /workspace/drepr/volumes/benchmark_data/hr_company_employee.repr.yml 'default:/workspace/drepr/volumes/benchmark_data/hr_employee.csv' test -f read_data`
>>> [DREPR] read data: 641.813223ms


In [15]:
!cargo run --release -- {BENCHMARK_DIR}/hr_company_employee.repr.yml default:{BENCHMARK_DIR}/hr_company_employee.json test -f preprocessing

[0m[0m[1m[32m    Finished[0m release [optimized] target(s) in 0.16s
[0m[0m[1m[32m     Running[0m `/workspace/drepr/drepr/rdrepr/target/release/drepr /workspace/drepr/volumes/benchmark_data/hr_company_employee.repr.yml 'default:/workspace/drepr/volumes/benchmark_data/hr_company_employee.json' test -f preprocessing`
>>> runtime: 2.488597268s


In [11]:
data

{'companies': [{'name': 'company-0',
   'address': '486 Joseph Ford Suite 910\nSouth Lindamouth, TN 19869',
   'phone': '8274489954',
   'employees': ['E000000618',
    'E000000222',
    'E000000786',
    'E000000880',
    'E000000056',
    'E000000616',
    'E000000910',
    'E000000390']},
  {'name': 'company-1',
   'address': '61218 David Mall Suite 587\nPort Claytonside, CT 71606',
   'phone': '696-431-4098x671',
   'employees': ['E000000725',
    'E000000230',
    'E000000722',
    'E000000867',
    'E000000604',
    'E000000012',
    'E000000662']},
  {'name': 'company-2',
   'address': '673 Miller Crescent Apt. 877\nEast Justinfort, DC 84063',
   'phone': '911.421.6319',
   'employees': ['E000000163', 'E000000237', 'E000000467', 'E000000704']},
  {'name': 'company-3',
   'address': '16709 Steven Well Suite 600\nGregoryville, ME 56651',
   'phone': '(242)752-1435x29710',
   'employees': ['E000000488',
    'E000000206',
    'E000000764',
    'E000000891',
    'E000000054',
    'E0