## Level_base_log_data

In [26]:
import csv
import re
import os

log_dir = "./level_base_log_data"
csv_output_path = "./h1_base_output.csv"

# 정규표현식을 이용해 파일 이름에서 정보 추출
filename_pattern = r"h1_(\w+)_cs(\d+)_size(\d+)_base(\d+).log"

# 추출할 stat key 목록
target_keys = [
    "rocksdb.number.keys.written",
    "rocksdb.bytes.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.flush.write.bytes",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal"
]

# CSV 파일 열고 헤더 작성
with open(csv_output_path, mode="w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ["workload", "compaction_style", "value_size", "max_bytes_for_level_base"] + target_keys
    writer.writerow(header)

    # 디렉토리 내 모든 .log 파일 처리
    for log_file_name in os.listdir(log_dir):
        if not log_file_name.endswith(".log"):
            continue

        match = re.match(filename_pattern, log_file_name)
        if not match:
            print(f"파일 이름 형식이 올바르지 않아 건너뜀: {log_file_name}")
            continue

        workload = match.group(1)
        compaction_style = int(match.group(2))
        value_size = int(match.group(3))
        max_bytes_for_level_base = int(match.group(4))

        stats_dict = {key: None for key in target_keys}
        log_file_path = os.path.join(log_dir, log_file_name)

        with open(log_file_path, "r") as f:
            for line in f:
                line = line.strip()
                match_stat = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
                if match_stat:
                    key = match_stat.group(1)
                    value = int(match_stat.group(2))
                    if key in stats_dict:
                        stats_dict[key] = value

        # None을 빈 문자열로 대체
        stat_values = [stats_dict[key] if stats_dict[key] is not None else "" for key in target_keys]
        row = [workload, compaction_style, value_size, max_bytes_for_level_base] + stat_values
        writer.writerow(row)

print("CSV 저장이 완료되었습니다.")


CSV 저장이 완료되었습니다.


In [27]:
import pandas as pd

# CSV 파일을 읽어 DataFrame으로 변환
df = pd.read_csv(csv_output_path)
df_sorted = df.sort_values(by=["workload", "compaction_style"])
df_sorted

Unnamed: 0,workload,compaction_style,value_size,max_bytes_for_level_base,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal
2,fillrandom,0,16384,536870912,1000000,16417000000,36052092660,8996302229,16417000000,1000000
5,fillrandom,0,4096,268435456,1000000,4128000000,4003270522,2301632341,4128000000,1000000
6,fillrandom,0,1024,67108864,1000000,1056000000,657101850,554147546,1056000000,1000000
7,fillrandom,0,1024,536870912,1000000,1056000000,588671433,554175641,1056000000,1000000
8,fillrandom,0,1024,268435456,1000000,1056000000,612860902,554235912,1056000000,1000000
9,fillrandom,0,16384,67108864,1000000,16417000000,27054478941,8996874388,16417000000,1000000
10,fillrandom,0,4096,536870912,1000000,4128000000,4390431402,2301548443,4128000000,1000000
13,fillrandom,0,16384,268435456,1000000,16417000000,29512061896,8997147451,16417000000,1000000
16,fillrandom,0,4096,67108864,1000000,4128000000,3625923549,2264390085,4128000000,1000000
0,overwrite,0,4096,67108864,1000000,4128000000,3624640394,2264460227,4128000000,1000000


## Level_multiplier_log_data

In [28]:
import csv
import re
import os

log_dir = "./level_multiplier_log_data"
csv_output_path = "./h1_multiplier_output.csv"

# 정규표현식을 이용해 파일 이름에서 정보 추출
filename_pattern = r"h1_(\w+)_cs(\d+)_size(\d+)_multiplier(\d+).log"

# 추출할 stat key 목록
target_keys = [
    "rocksdb.number.keys.written",
    "rocksdb.bytes.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.flush.write.bytes",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal"
]

# CSV 파일 열고 헤더 작성
with open(csv_output_path, mode="w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ["workload", "compaction_style", "value_size", "max_bytes_for_level_multiplier"] + target_keys
    writer.writerow(header)

    # 디렉토리 내 모든 .log 파일 처리
    for log_file_name in os.listdir(log_dir):
        if not log_file_name.endswith(".log"):
            continue

        match = re.match(filename_pattern, log_file_name)
        if not match:
            print(f"파일 이름 형식이 올바르지 않아 건너뜀: {log_file_name}")
            continue

        workload = match.group(1)
        compaction_style = int(match.group(2))
        value_size = int(match.group(3))
        max_bytes_for_level_multiplier = int(match.group(4))

        stats_dict = {key: None for key in target_keys}
        log_file_path = os.path.join(log_dir, log_file_name)

        with open(log_file_path, "r") as f:
            for line in f:
                line = line.strip()
                match_stat = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
                if match_stat:
                    key = match_stat.group(1)
                    value = int(match_stat.group(2))
                    if key in stats_dict:
                        stats_dict[key] = value

        # None을 빈 문자열로 대체
        stat_values = [stats_dict[key] if stats_dict[key] is not None else "" for key in target_keys]
        row = [workload, compaction_style, value_size, max_bytes_for_level_multiplier] + stat_values
        writer.writerow(row)

print("CSV 저장이 완료되었습니다.")


CSV 저장이 완료되었습니다.


In [29]:
import pandas as pd

# CSV 파일을 읽어 DataFrame으로 변환
df = pd.read_csv(csv_output_path)
df_sorted = df.sort_values(by=["workload", "compaction_style"])
df_sorted

Unnamed: 0,workload,compaction_style,value_size,max_bytes_for_level_multiplier,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal
1,fillrandom,0,1024,10,,,,,,
2,fillrandom,0,16384,10,,,,,,
4,fillrandom,0,16384,20,100000.0,1641700000.0,1022179000.0,823271644.0,1641700000.0,100000.0
5,fillrandom,0,16384,4,,,,,,
7,fillrandom,0,4096,10,,,,,,
8,fillrandom,0,256,4,,,,,,
10,fillrandom,0,256,10,,,,,,
14,fillrandom,0,1024,20,100000.0,105600000.0,0.0,0.0,105600000.0,100000.0
15,fillrandom,0,256,20,100000.0,28800000.0,0.0,0.0,28800000.0,100000.0
17,fillrandom,0,4096,4,,,,,,


## Universal Log

In [30]:
import csv
import re
import os

log_dir = "./universal_log_data"
csv_output_path = "./h1_universal_output.csv"

# 정규표현식을 이용해 파일 이름에서 정보 추출
filename_pattern = r"h1_(\w+)_cs(\d+)_size(\d+).log"

# 추출할 stat key 목록
target_keys = [
    "rocksdb.number.keys.written",
    "rocksdb.bytes.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.flush.write.bytes",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal"
]

# CSV 파일 열고 헤더 작성
with open(csv_output_path, mode="w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ["workload", "compaction_style", "value_size"] + target_keys
    writer.writerow(header)

    # 디렉토리 내 모든 .log 파일 처리
    for log_file_name in os.listdir(log_dir):
        if not log_file_name.endswith(".log"):
            continue

        match = re.match(filename_pattern, log_file_name)
        if not match:
            print(f"파일 이름 형식이 올바르지 않아 건너뜀: {log_file_name}")
            continue

        workload = match.group(1)
        compaction_style = int(match.group(2))
        value_size = int(match.group(3))

        stats_dict = {key: None for key in target_keys}
        log_file_path = os.path.join(log_dir, log_file_name)

        with open(log_file_path, "r") as f:
            for line in f:
                line = line.strip()
                match_stat = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
                if match_stat:
                    key = match_stat.group(1)
                    value = int(match_stat.group(2))
                    if key in stats_dict:
                        stats_dict[key] = value

        # None을 빈 문자열로 대체
        stat_values = [stats_dict[key] if stats_dict[key] is not None else "" for key in target_keys]
        row = [workload, compaction_style, value_size] + stat_values
        writer.writerow(row)

print("CSV 저장이 완료되었습니다.")


CSV 저장이 완료되었습니다.


In [31]:
import pandas as pd

# CSV 파일을 읽어 DataFrame으로 변환
df = pd.read_csv(csv_output_path)
df_sorted = df.sort_values(by=["workload", "compaction_style"])
df_sorted

Unnamed: 0,workload,compaction_style,value_size,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal
1,fillrandom,1,256,100000,28800000,0,0,28800000,100000
2,fillrandom,1,4096,100000,412800000,75853118,172599151,412800000,100000
4,fillrandom,1,16384,100000,1641700000,1002606501,822745630,1641700000,100000
5,fillrandom,1,1024,100000,105600000,0,0,105600000,100000
0,overwrite,1,1024,100000,105600000,0,0,105600000,100000
3,overwrite,1,16384,100000,1641700000,1001344099,822835705,1641700000,100000
6,overwrite,1,4096,100000,412800000,85290093,173036190,412800000,100000
7,overwrite,1,256,100000,28800000,0,0,28800000,100000
