## Level_base_log_data

In [32]:
import csv
import re
import os

log_dir = "./level_base_log_data"
csv_output_path = "./h1_base_output.csv"

# 정규표현식을 이용해 파일 이름에서 정보 추출
filename_pattern = r"h1_(\w+)_cs(\d+)_size(\d+)_base(\d+).log"

# 추출할 stat key 목록
target_keys = [
    "rocksdb.number.keys.written",
    "rocksdb.bytes.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.flush.write.bytes",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal",
]

# CSV 파일 열고 헤더 작성
with open(csv_output_path, mode="w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ["workload", "compaction_style", "value_size", "max_bytes_for_level_base"] + target_keys
    writer.writerow(header)

    # 디렉토리 내 모든 .log 파일 처리
    for log_file_name in os.listdir(log_dir):
        if not log_file_name.endswith(".log"):
            continue

        match = re.match(filename_pattern, log_file_name)
        if not match:
            print(f"파일 이름 형식이 올바르지 않아 건너뜀: {log_file_name}")
            continue

        workload = match.group(1)
        compaction_style = int(match.group(2))
        value_size = int(match.group(3))
        max_bytes_for_level_base = int(match.group(4))

        stats_dict = {key: None for key in target_keys}
        log_file_path = os.path.join(log_dir, log_file_name)

        with open(log_file_path, "r") as f:
            for line in f:
                line = line.strip()
                match_stat = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
                if match_stat:
                    key = match_stat.group(1)
                    value = int(match_stat.group(2))
                    if key in stats_dict:
                        stats_dict[key] = value

        # None을 빈 문자열로 대체
        stat_values = [stats_dict[key] if stats_dict[key] is not None else "" for key in target_keys]
        row = [workload, compaction_style, value_size, max_bytes_for_level_base] + stat_values
        writer.writerow(row)

print("CSV 저장이 완료되었습니다.")


CSV 저장이 완료되었습니다.


In [63]:
import pandas as pd

# CSV 파일 읽기
df = pd.read_csv("./h1_base_output.csv")

# workload, compaction_style 기준 정렬
df_sorted = df.sort_values(by=["workload", "compaction_style"])

# 정렬된 데이터프레임 출력
df_sorted

Unnamed: 0,workload,compaction_style,value_size,max_bytes_for_level_base,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal
2,fillrandom,0,16384,536870912,1000000,16417000000,36052092660,8996302229,16417000000,1000000
5,fillrandom,0,4096,268435456,1000000,4128000000,4003270522,2301632341,4128000000,1000000
6,fillrandom,0,1024,67108864,1000000,1056000000,657101850,554147546,1056000000,1000000
7,fillrandom,0,1024,536870912,1000000,1056000000,588671433,554175641,1056000000,1000000
8,fillrandom,0,1024,268435456,1000000,1056000000,612860902,554235912,1056000000,1000000
9,fillrandom,0,16384,67108864,1000000,16417000000,27054478941,8996874388,16417000000,1000000
10,fillrandom,0,4096,536870912,1000000,4128000000,4390431402,2301548443,4128000000,1000000
13,fillrandom,0,16384,268435456,1000000,16417000000,29512061896,8997147451,16417000000,1000000
16,fillrandom,0,4096,67108864,1000000,4128000000,3625923549,2264390085,4128000000,1000000
0,overwrite,0,4096,67108864,1000000,4128000000,3624640394,2264460227,4128000000,1000000


In [64]:
import pandas as pd

df_waf = df.copy()

# compaction_style 매핑
style_map = {0: 'leveled', 1: 'universal', 2: 'fifo'}
df_waf['compaction_style'] = df['compaction_style'].map(style_map)

# WAF 계산: wal.bytes가 0인 경우는 NaN 처리
df_waf['WAF'] = df.apply(
    lambda row: (row['rocksdb.flush.write.bytes'] + row['rocksdb.compact.write.bytes'] + row['rocksdb.bytes.written']) / row['rocksdb.bytes.written'],
    axis=1
)

# workload, compaction_style 기준 정렬
df_sorted = df_waf.sort_values(by=["workload", "compaction_style"])

# 결과 저장 (선택)
df.to_csv('modified_h1_base_output.csv', index=False)

In [65]:
# 정렬된 데이터프레임 출력
df_sorted

Unnamed: 0,workload,compaction_style,value_size,max_bytes_for_level_base,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal,WAF
2,fillrandom,leveled,16384,536870912,1000000,16417000000,36052092660,8996302229,16417000000,1000000,3.744009
5,fillrandom,leveled,4096,268435456,1000000,4128000000,4003270522,2301632341,4128000000,1000000,2.52735
6,fillrandom,leveled,1024,67108864,1000000,1056000000,657101850,554147546,1056000000,1000000,2.147016
7,fillrandom,leveled,1024,536870912,1000000,1056000000,588671433,554175641,1056000000,1000000,2.082242
8,fillrandom,leveled,1024,268435456,1000000,1056000000,612860902,554235912,1056000000,1000000,2.105205
9,fillrandom,leveled,16384,67108864,1000000,16417000000,27054478941,8996874388,16417000000,1000000,3.195977
10,fillrandom,leveled,4096,536870912,1000000,4128000000,4390431402,2301548443,4128000000,1000000,2.621119
13,fillrandom,leveled,16384,268435456,1000000,16417000000,29512061896,8997147451,16417000000,1000000,3.345691
16,fillrandom,leveled,4096,67108864,1000000,4128000000,3625923549,2264390085,4128000000,1000000,2.426917
0,overwrite,leveled,4096,67108864,1000000,4128000000,3624640394,2264460227,4128000000,1000000,2.426623


## Level_multiplier_log_data

In [42]:
import csv
import re
import os

log_dir = "./level_multiplier_log_data"
csv_output_path = "./h1_multiplier_output.csv"

# 정규표현식을 이용해 파일 이름에서 정보 추출
filename_pattern = r"h1_(\w+)_cs(\d+)_size(\d+)_multiplier(\d+).log"

# 추출할 stat key 목록
target_keys = [
    "rocksdb.number.keys.written",
    "rocksdb.bytes.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.flush.write.bytes",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal",
]

# CSV 파일 열고 헤더 작성
with open(csv_output_path, mode="w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ["workload", "compaction_style", "value_size", "max_bytes_for_level_multiplier"] + target_keys
    writer.writerow(header)

    # 디렉토리 내 모든 .log 파일 처리
    for log_file_name in os.listdir(log_dir):
        if not log_file_name.endswith(".log"):
            continue

        match = re.match(filename_pattern, log_file_name)
        if not match:
            print(f"파일 이름 형식이 올바르지 않아 건너뜀: {log_file_name}")
            continue

        workload = match.group(1)
        compaction_style = int(match.group(2))
        value_size = int(match.group(3))
        max_bytes_for_level_multiplier = int(match.group(4))

        stats_dict = {key: None for key in target_keys}
        log_file_path = os.path.join(log_dir, log_file_name)

        with open(log_file_path, "r") as f:
            for line in f:
                line = line.strip()
                match_stat = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
                if match_stat:
                    key = match_stat.group(1)
                    value = int(match_stat.group(2))
                    if key in stats_dict:
                        stats_dict[key] = value

        # None을 빈 문자열로 대체
        stat_values = [stats_dict[key] if stats_dict[key] is not None else "" for key in target_keys]
        row = [workload, compaction_style, value_size, max_bytes_for_level_multiplier] + stat_values
        writer.writerow(row)

print("CSV 저장이 완료되었습니다.")


CSV 저장이 완료되었습니다.


In [60]:
import pandas as pd

# CSV 파일 읽기
df = pd.read_csv("./h1_multiplier_output.csv")

# workload, compaction_style 기준 정렬
df_sorted = df.sort_values(by=["workload", "compaction_style"])

# 정렬된 데이터프레임 출력
df_sorted

Unnamed: 0,workload,compaction_style,value_size,max_bytes_for_level_multiplier,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal
1,fillrandom,0,1024,10,1000000,1056000000,532065061,554231834,1056000000,1000000
2,fillrandom,0,16384,10,1000000,16417000000,26808615703,8996537425,16417000000,1000000
4,fillrandom,0,16384,20,1000000,16417000000,28859522575,8996966203,16417000000,1000000
5,fillrandom,0,16384,4,1000000,16417000000,26312224465,8997182632,16417000000,1000000
7,fillrandom,0,4096,10,1000000,4128000000,3668790542,2264613878,4128000000,1000000
11,fillrandom,0,1024,20,1000000,1056000000,573779355,554231337,1056000000,1000000
13,fillrandom,0,4096,4,1000000,4128000000,3961650233,2263893245,4128000000,1000000
14,fillrandom,0,4096,20,1000000,4128000000,3812224142,2264247445,4128000000,1000000
17,fillrandom,0,1024,4,1000000,1056000000,518053266,554174033,1056000000,1000000
0,overwrite,0,4096,20,1000000,4128000000,3876164853,2264551781,4128000000,1000000


In [61]:
import pandas as pd

df_waf = df.copy()

# compaction_style 매핑
style_map = {0: 'leveled', 1: 'universal', 2: 'fifo'}
df_waf['compaction_style'] = df['compaction_style'].map(style_map)

# WAF 계산: wal.bytes가 0인 경우는 NaN 처리
df_waf['WAF'] = df.apply(
    lambda row: (row['rocksdb.flush.write.bytes'] + row['rocksdb.compact.write.bytes'] + row['rocksdb.bytes.written']) / row['rocksdb.bytes.written'],
    axis=1
)

# workload, compaction_style 기준 정렬
df_sorted = df_waf.sort_values(by=["workload", "compaction_style"])

# # 결과 저장 (선택)
df.to_csv('modified_h1_multiplier_output.csv', index=False)

In [62]:
# 정렬된 데이터프레임 출력
df_sorted

Unnamed: 0,workload,compaction_style,value_size,max_bytes_for_level_multiplier,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal,WAF
1,fillrandom,leveled,1024,10,1000000,1056000000,532065061,554231834,1056000000,1000000,2.02869
2,fillrandom,leveled,16384,10,1000000,16417000000,26808615703,8996537425,16417000000,1000000,3.18098
4,fillrandom,leveled,16384,20,1000000,16417000000,28859522575,8996966203,16417000000,1000000,3.305932
5,fillrandom,leveled,16384,4,1000000,16417000000,26312224465,8997182632,16417000000,1000000,3.150783
7,fillrandom,leveled,4096,10,1000000,4128000000,3668790542,2264613878,4128000000,1000000,2.437356
11,fillrandom,leveled,1024,20,1000000,1056000000,573779355,554231337,1056000000,1000000,2.068192
13,fillrandom,leveled,4096,4,1000000,4128000000,3961650233,2263893245,4128000000,1000000,2.508126
14,fillrandom,leveled,4096,20,1000000,4128000000,3812224142,2264247445,4128000000,1000000,2.472013
17,fillrandom,leveled,1024,4,1000000,1056000000,518053266,554174033,1056000000,1000000,2.015367
0,overwrite,leveled,4096,20,1000000,4128000000,3876164853,2264551781,4128000000,1000000,2.487577


## Universal Log

In [53]:
import csv
import re
import os

log_dir = "./universal_log_data"
csv_output_path = "./h1_universal_output.csv"

# 정규표현식을 이용해 파일 이름에서 정보 추출
filename_pattern = r"h1_(\w+)_cs(\d+)_size(\d+).log"

# 추출할 stat key 목록
target_keys = [
    "rocksdb.number.keys.written",
    "rocksdb.bytes.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.flush.write.bytes",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal"
]

# CSV 파일 열고 헤더 작성
with open(csv_output_path, mode="w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ["workload", "compaction_style", "value_size"] + target_keys
    writer.writerow(header)

    # 디렉토리 내 모든 .log 파일 처리
    for log_file_name in os.listdir(log_dir):
        if not log_file_name.endswith(".log"):
            continue

        match = re.match(filename_pattern, log_file_name)
        if not match:
            print(f"파일 이름 형식이 올바르지 않아 건너뜀: {log_file_name}")
            continue

        workload = match.group(1)
        compaction_style = int(match.group(2))
        value_size = int(match.group(3))

        stats_dict = {key: None for key in target_keys}
        log_file_path = os.path.join(log_dir, log_file_name)

        with open(log_file_path, "r") as f:
            for line in f:
                line = line.strip()
                match_stat = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
                if match_stat:
                    key = match_stat.group(1)
                    value = int(match_stat.group(2))
                    if key in stats_dict:
                        stats_dict[key] = value

        # None을 빈 문자열로 대체
        stat_values = [stats_dict[key] if stats_dict[key] is not None else "" for key in target_keys]
        row = [workload, compaction_style, value_size] + stat_values
        writer.writerow(row)

print("CSV 저장이 완료되었습니다.")


CSV 저장이 완료되었습니다.


In [56]:
import pandas as pd

# CSV 파일 읽기
df = pd.read_csv("./h1_universal_output.csv")

# workload, compaction_style 기준 정렬
df_sorted = df.sort_values(by=["workload", "compaction_style"])

# 정렬된 데이터프레임 출력
df_sorted

Unnamed: 0,workload,compaction_style,value_size,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal
1,fillrandom,1,4096,1000000,4128000000,3540818396,2301488797,4128000000,1000000
3,fillrandom,1,16384,1000000,16417000000,23330374011,8996719499,16417000000,1000000
4,fillrandom,1,1024,1000000,1056000000,590356007,554189559,1056000000,1000000
0,overwrite,1,1024,1000000,1056000000,589472628,554212973,1056000000,1000000
2,overwrite,1,16384,1000000,16417000000,20922566009,8996726917,16417000000,1000000
5,overwrite,1,4096,1000000,4128000000,3254281401,2263874391,4128000000,1000000


In [57]:
import pandas as pd

df_waf = df.copy()

# compaction_style 매핑
style_map = {0: 'leveled', 1: 'universal', 2: 'fifo'}
df_waf['compaction_style'] = df['compaction_style'].map(style_map)

# WAF 계산: wal.bytes가 0인 경우는 NaN 처리
df_waf['WAF'] = df.apply(
    lambda row: (row['rocksdb.flush.write.bytes'] + row['rocksdb.compact.write.bytes'] + row['rocksdb.bytes.written']) / row['rocksdb.bytes.written'],
    axis=1
)

# workload, compaction_style 기준 정렬
df_sorted = df_waf.sort_values(by=["workload", "compaction_style"])

# 결과 저장 (선택)
df.to_csv('modified_h1_universal_output.csv', index=False)

In [58]:

# 정렬된 데이터프레임 출력
df_sorted

Unnamed: 0,workload,compaction_style,value_size,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal,WAF
1,fillrandom,universal,4096,1000000,4128000000,3540818396,2301488797,4128000000,1000000,2.415288
3,fillrandom,universal,16384,1000000,16417000000,23330374011,8996719499,16417000000,1000000,2.969123
4,fillrandom,universal,1024,1000000,1056000000,590356007,554189559,1056000000,1000000,2.08385
0,overwrite,universal,1024,1000000,1056000000,589472628,554212973,1056000000,1000000,2.083036
2,overwrite,universal,16384,1000000,16417000000,20922566009,8996726917,16417000000,1000000,2.822458
5,overwrite,universal,4096,1000000,4128000000,3254281401,2263874391,4128000000,1000000,2.336763
