In [5]:
import csv
import re
import os

log_dir = "./log_data"
csv_output_path = "./h2_output.csv"

# 정규표현식을 이용해 파일 이름에서 정보 추출
filename_pattern = r"(\w+)_cs(\d+)_(\w+).log"

# 추출할 stat key 목록
target_keys = [
    "rocksdb.number.keys.written",
    "rocksdb.bytes.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.flush.write.bytes",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal"
]

# CSV 파일 열고 헤더 작성
with open(csv_output_path, mode="w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ["workload", "compaction_style", "config"] + target_keys
    writer.writerow(header)

    # 디렉토리 내 모든 .log 파일 처리
    for log_file_name in os.listdir(log_dir):
        if not log_file_name.endswith(".log"):
            continue

        match = re.match(filename_pattern, log_file_name)
        if not match:
            print(f"파일 이름 형식이 올바르지 않아 건너뜀: {log_file_name}")
            continue

        workload = match.group(1)
        compaction_style = int(match.group(2))
        configs = match.group(3)

        stats_dict = {key: None for key in target_keys}
        log_file_path = os.path.join(log_dir, log_file_name)

        with open(log_file_path, "r") as f:
            for line in f:
                line = line.strip()
                match_stat = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
                if match_stat:
                    key = match_stat.group(1)
                    value = int(match_stat.group(2))
                    if key in stats_dict:
                        stats_dict[key] = value

        row = [workload, compaction_style, configs] + [stats_dict[key] for key in target_keys]
        writer.writerow(row)

print("CSV 저장이 완료되었습니다.")


CSV 저장이 완료되었습니다.


In [12]:
import pandas as pd

# CSV 파일 읽기
df = pd.read_csv("./h2_output.csv")

# workload, compaction_style 기준 정렬
df_sorted = df.sort_values(by=["workload", "compaction_style"])

# 정렬된 데이터프레임 출력
df_sorted

Unnamed: 0,workload,compaction_style,config,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal
0,fillrandom,0,perf,100000,1641700000,141354496,823240788,0,0
2,fillrandom,0,stable,100000,1641700000,1063040346,823101149,1641700000,100000
4,fillrandom,1,stable,100000,1641700000,1002593258,822519262,1641700000,100000
8,fillrandom,1,perf,100000,1641700000,303681536,858545886,0,0
3,fillrandom,2,perf,100000,1641700000,0,822797396,0,0
13,fillrandom,2,stable,100000,1641700000,0,822837515,1641700000,100000
12,overwrite,0,stable,100000,1641700000,1085882756,822364925,1641700000,100000
17,overwrite,0,perf,100000,1641700000,168217600,822098004,0,0
5,overwrite,1,stable,100000,1641700000,1001390441,822601511,1641700000,100000
10,overwrite,1,perf,100000,1641700000,320281088,858279646,0,0


In [10]:
df['compaction_style'].value_counts()

compaction_style
0    6
2    6
1    6
Name: count, dtype: int64

In [None]:
import pandas as pd

df_waf = df.copy()

# compaction_style 매핑
style_map = {0: 'leveled', 1: 'universal', 2: 'fifo'}
df_waf['compaction_style'] = df['compaction_style'].map(style_map)

# WAF 계산: wal.bytes가 0인 경우는 NaN 처리
df_waf['WAF'] = df.apply(
    lambda row: row['rocksdb.bytes.written'] / row['rocksdb.number.keys.written']
    axis=1
)

# workload, compaction_style 기준 정렬
df_sorted = df_waf.sort_values(by=["workload", "compaction_style"])

# 정렬된 데이터프레임 출력
df_sorted

# # 결과 저장 (선택)
# df.to_csv('modified_with_WAF.csv', index=False)

Unnamed: 0,workload,compaction_style,config,rocksdb.number.keys.written,rocksdb.bytes.written,rocksdb.compact.write.bytes,rocksdb.flush.write.bytes,rocksdb.wal.bytes,rocksdb.write.wal,WAF
3,fillrandom,fifo,perf,100000,1641700000,0,822797396,0,0,
13,fillrandom,fifo,stable,100000,1641700000,0,822837515,1641700000,100000,1.0
0,fillrandom,leveled,perf,100000,1641700000,141354496,823240788,0,0,
2,fillrandom,leveled,stable,100000,1641700000,1063040346,823101149,1641700000,100000,1.0
4,fillrandom,universal,stable,100000,1641700000,1002593258,822519262,1641700000,100000,1.0
8,fillrandom,universal,perf,100000,1641700000,303681536,858545886,0,0,
1,overwrite,fifo,stable,100000,1641700000,0,856930432,1641700000,100000,1.0
6,overwrite,fifo,perf,100000,1641700000,0,822333524,0,0,
12,overwrite,leveled,stable,100000,1641700000,1085882756,822364925,1641700000,100000,1.0
17,overwrite,leveled,perf,100000,1641700000,168217600,822098004,0,0,
