In [1]:
import time

In [2]:
B = 1
KB = 1024 * B
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB

In [3]:
import matplotlib.pyplot as plt; plt.rcdefaults()
from subprocess import Popen, PIPE


#command line tool
def run_cmd(cmd, throw_exception = False):
    p = Popen(cmd , shell=True, stdout=PIPE, stderr=PIPE)
    out, err = p.communicate()
    out_str = out.decode("utf-8")
    err_str = err.decode("utf-8")
    if(throw_exception and err_str != ''):
        raise Exception(err_str)
    return out_str

In [4]:
class LogParser():
    def __init__(self, log):
        self.log = log
        self.lines = log.split("\n")

    def parse_all(self):
        self.base_res_line = None
        self.delay_percentiles_line = None
        for line in self.lines:
            self.extract_basic_res_line(line)
            self.extract_delay_percentiles_line(line)

    def extract_basic_res_line(self, line):
        if self.base_res_line == None and "micros/op" in line and "ops/sec" in line:
            self.base_res_line = line

    def extract_delay_percentiles_line(self, line):
        if self.delay_percentiles_line == None and line.startswith("Percentiles: P50"):
            self.delay_percentiles_line = line

    def dump(self):
        print(self.base_res_line)
        print(self.delay_percentiles_line)

    def get_basic_res(self):
        return self.convert_to_dict(["micros/op", "ops/sec", "MB/s"], self.extract_float(self.base_res_line))

    def get_percentiles(self):
        return self.convert_to_dict(["P50", "P75", "P99", "P99.9", "P99.99"],
                                    self.extract_float(self.delay_percentiles_line))

    def extract_float(self, line):
        strs = self.delay_percentiles_line.split(" ")
        res_list = []
        for s in strs:
            if self.is_float(s):
                res_list.append(float(s))
        return res_list

    def convert_to_dict(self, name_list, num_list):
        res_dict = {}
        for i in range(len(name_list)):
            res_dict[name_list[i]] = num_list[i]
        return res_dict

    def is_float(self, s):
        try:
            f = float(s)
            return True
        except:
            return False

In [5]:
import os
import random
import util

class Benchmark():

    def __init__(self, root_dir, db_bench, key_size, value_size, data_size, other_params="",
                 save_res=True, verbose=True):
        self.db_bench = db_bench
        self.key_size = key_size
        self.value_size = value_size
        self.data_size = data_size
        self.other_params = other_params
        self.save_res = save_res
        self.verbose = verbose
        self.test_dir, self.performance_testing_log_dir = self.setup_test_env(root_dir)
        self.num_keys = self.get_num_keys(self.data_size, self.key_size, self.value_size)

    def fillseq(self):
        return self.run_db_bench(benchmarks="fillseq", use_existing_db=False)

    def readseq(self):
        return self.run_db_bench(benchmarks="readseq")

    def overwrite(self):
        return self.run_db_bench(benchmarks="overwrite")

    def readrandom(self):
        return self.run_db_bench(benchmarks="readrandom")

    def readwhilewriting(self):
        return self.run_db_bench(benchmarks="readwhilewriting")

    def deleterandom(self):
        return self.run_db_bench(benchmarks="deleterandom")

    def benchmark_all(self):
        self.fillseq()
        self.readseq()
        self.overwrite()
        self.readrandom()
        self.readwhilewriting()

    def get_num_keys(self, data_size, key_size, value_size):
        return int(data_size / (key_size + value_size))

    def run_db_bench(self, benchmarks, use_existing_db=True, num_keys = -1):
        if num_keys <= 0:
            num_keys = self.get_num_keys(self.data_size, self.key_size, self.value_size)
        const_params = "\
          --db={} \
          --histogram=1 \
          --num={} \
          --use_existing_db={} \
          --key_size={} \
          --value_size={}  \
          --block_size=4096 \
          --compression_type=snappy \
          --max_write_buffer_number=3 \
          --write_buffer_size=33554432 \
          --cache_size=104857600 \
          --statistics {}".format(self.test_dir, num_keys, 1 if use_existing_db else 0, self.key_size, self.value_size,
                                  self.other_params)
        command = "{} --benchmarks=\"{}\"  {} ".format(self.db_bench, benchmarks, const_params)
        if self.verbose:
            print("command : {}".format(command))
        res = run_cmd(command, False)
        if self.save_res == True:
            self.dump_res_to_file(command, res, benchmarks)
        if self.verbose == True:
            print(res)
        return res

    def dump_res_to_file(self, command, res, benchmarks):
        file_name = "{}/{}-performance-result".format(self.performance_testing_log_dir, benchmarks)
        if self.verbose:
            print("Dump result to file : {}".format(file_name))
        file = open(file_name, "w+")
        file.write("{}\n\n".format(command))
        file.write(res)
        file.close()

    def setup_test_env(self, root_dir):
        test_dir = self.get_random_test_dir(root_dir)
        performance_testing_log_dir = "{}/performance-testing-log".format(test_dir)
        #if self.verbose:
        print("Setup a new test root. Test root dir {}".format(test_dir))
        self.create_new_dir(test_dir)
        self.create_new_dir(performance_testing_log_dir)
        return test_dir, performance_testing_log_dir

    def create_new_dir(self, dir_name):
        os.system("rm -rf {}".format(dir_name))
        os.system("mkdir {}".format(dir_name))

    def get_random_test_dir(self, root_dir):
        return "{}/test-{}".format(root_dir, random.randint(0, 1000000000))


ModuleNotFoundError: No module named 'util'

In [None]:
'''
Benchmark1 : Get/Put latencies for different key/values and different mode (random/sequential/readwhilewrite)

key/value : [10B, 100B], [100B, 1KB], [1KB, 1MB]
'''

def print_log(log):
    parser = LogParser(log)
    parser.parse_all()
    parser.dump()
    
def benchmark1(root, db_bench, data_size):
    
    print("Total Data Size : {}".format(data_size))
    
    total_start = time.time()
    for kv in [[10*B, 100*B], [100*B, 1*KB], [1*KB, 1*MB]]:
        
        benchmark = Benchmark(root_dir = root, db_bench = db_bench, key_size = kv[0], value_size = kv[1],
                       data_size = data_size, other_params = "", save_res=True, verbose=False)
        
        print("Key : {} Value : {} \n".format(kv[0], kv[1]))
        methods = [benchmark.fillseq, benchmark.readseq, benchmark.overwrite, 
                   benchmark.readrandom, benchmark.deleterandom]
        for method in methods:
            start = time.time()
            log = method()
            print_log(log)
            print("time : {}".format(time.time() - start))
            print("\n")
        print("\n\n\n\n")
    print("total time cost : {} seconds".format(time.time() - total_start))

In [None]:
def test_in_laptop():
    ROOT = "/Users/fxie/Desktop/intern_project/samza-azure-storage/milestone2/db_data"
    DB_BENCH = "~/Desktop/intern_project/rocksdb/db_bench"
    DATA_SIZE = 1 * GB
    benchmark1(ROOT, DB_BENCH, DATA_SIZE)

In [None]:
test_in_laptop()