In [2]:
import csv
import re
import os

log_dir = "./log_data"
csv_output_path = "./h3_summary.csv"

filename_pattern = r"t(\d+)_hot_(\w+)_cold_(\w+)\.log"

# 추출할 stat key 목록
target_keys = [
    'rocksdb.bytes.written',
    'rocksdb.flush.write.bytes',
    'rocksdb.block.cache.hit',
    'rocksdb.block.cache.miss',
    'rocksdb.compaction.key.drop.new',
    'rocksdb.number.keys.write',
    'rocksdb.compact.read.bytes',
    'rocksdb.compact.write.bytes',
    "rocksdb.number.keys.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.cache.data.bytes.insert",
    "rocksdb.cache.bytes.read",
    "rocksdb.number.keys.read",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal",
    "rocksdb.memtable.hit",
    "rocksdb.memtable.miss",
    "rocksdb.compaction.key.drop.new",
    'rocksdb.bytes.read',
]

# 헤더 구성: 실험 정보 + 타겟 키 + 기타 수치
header = [
    'trial', 'hot_compaction', 'cold_compaction',
    'time(s)', 'hot_column_key', 'default_column_key'
] + target_keys

# 결과 리스트
rows = []

# 디렉토리 내 모든 .log 파일 처리
for log_file_name in os.listdir(log_dir):
    if not log_file_name.endswith('.log'):
        continue

    match = re.match(filename_pattern, log_file_name)
    if not match:
        print(f"Filename {log_file_name} does not match the expected pattern.")
        continue

    trial = match.group(1)
    hot_compaction = match.group(2)
    cold_compaction = match.group(3)

    # 값 저장용 dict 초기화
    stats_dict = {key: 0 for key in target_keys}
    time_sec = 0
    hot_col_keys = 0
    default_col_keys = 0

    log_file_path = os.path.join(log_dir, log_file_name)

    with open(log_file_path, "r") as f:
        for line in f:
            line = line.strip()

            # 총 소요시간
            time_match = re.match(r"총 소요시간: (\d+(?:\.\d+)?)초", line)
            if time_match:
                time_sec = float(time_match.group(1))
                continue


            # hot 컬럼
            hot_match = re.match(r"hot 컬럼에 저장된 키 수: (\d+)", line)
            if hot_match:
                hot_col_keys = int(hot_match.group(1))
                continue

            # default 컬럼
            default_match = re.match(r"default 컬럼에 저장된 키 수: (\d+)", line)
            if default_match:
                default_col_keys = int(default_match.group(1))
                continue

            # Stat 정보 추출
            stat_match = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
            if stat_match:
                key = stat_match.group(1)
                value = int(stat_match.group(2))
                if key in stats_dict:
                    stats_dict[key] = value

    # 한 줄 데이터 구성
    row = [
        trial, hot_compaction, cold_compaction,
        time_sec, hot_col_keys, default_col_keys
    ] + [stats_dict[key] for key in target_keys]
    rows.append(row)

# CSV 파일로 저장
with open(csv_output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    writer.writerows(rows)

print(f"CSV summary saved to {csv_output_path}")


CSV summary saved to ./h3_summary.csv


In [3]:
import pandas as pd

# CSV 파일 읽기
df = pd.read_csv("./h3_summary.csv")

df

Unnamed: 0,trial,hot_compaction,cold_compaction,time(s),hot_column_key,default_column_key,rocksdb.bytes.written,rocksdb.flush.write.bytes,rocksdb.block.cache.hit,rocksdb.block.cache.miss,...,rocksdb.compact.write.bytes.1,rocksdb.cache.data.bytes.insert,rocksdb.cache.bytes.read,rocksdb.number.keys.read,rocksdb.wal.bytes,rocksdb.write.wal,rocksdb.memtable.hit,rocksdb.memtable.miss,rocksdb.compaction.key.drop.new.1,rocksdb.bytes.read
0,1,universal,universal,215.646,88822,543645,16407028699,811590816,7283687,5877595,...,3768128779,0,0,1911178,16407028699,1000000,3778,1907400,194636,10362339328
1,2,universal,level,309.711,88818,543256,16407029052,811597848,8060048,10189216,...,7295480689,0,0,1911182,16407029052,1000000,3839,1907343,241541,10355900416
2,2,universal,universal,252.733,88592,543566,16407028784,811652440,7404430,6005913,...,3886721728,0,0,1911408,16407028784,1000000,3789,1907619,204662,10357276672
3,2,level,level,267.389,88948,543385,16407028895,811595301,8234987,9903499,...,7041300012,0,0,1911052,16407028895,1000000,3763,1907289,253703,10360143872
4,2,level,universal,273.964,89077,543107,16407028477,811540036,7387633,5977240,...,3873078173,0,0,1910923,16407028477,1000000,3792,1907131,207001,10357702656
5,3,level,level,303.68,88748,543769,16407028746,811622172,7548154,10073283,...,7209420559,0,0,1911252,16407028746,1000000,3778,1907474,252794,10363158528
6,1,level,universal,435.653,88794,543040,16407029090,811629101,7242636,6382662,...,4180097864,0,0,1911206,16407029090,1000000,3783,1907423,201501,10351968256
7,3,universal,universal,196.579,88893,543244,16407028558,811621772,7296345,5892069,...,3779265860,0,0,1911107,16407028558,1000000,3761,1907346,187574,10356932608
8,3,level,universal,254.691,88746,543708,16407028018,811725984,7208989,6086631,...,3938471118,0,0,1911254,16407028018,1000000,3806,1907448,190946,10362126336
9,1,universal,level,334.687,89125,542997,16407029204,808394069,8019272,9834256,...,6990263479,0,0,1910875,16407029204,1000000,7764,1903111,239783,10356686848


In [4]:
# case 분류 추가
def classify_case(row):
    hot = row.get('hot_compaction', '').lower()
    cold = row.get('cold_compaction', '').lower()

    if hot == 'level' and cold == 'level':
        return 0
    elif hot == 'universal' and cold == 'universal':
        return 1
    elif hot == 'level' and cold == 'universal':
        return 2
    elif hot == 'universal' and cold == 'level':
        return 3
    else:
        return -1  # 알 수 없는 조합 (예외 처리용)

df_waf = df.copy()

# WAF 계산: wal.bytes가 0인 경우는 NaN 처리
df_waf['WAF'] = df.apply(
    lambda row: (row['rocksdb.flush.write.bytes'] + row['rocksdb.compact.write.bytes']) / row['rocksdb.bytes.written'],
    axis=1
)

# WAF 계산: wal.bytes가 0인 경우는 NaN 처리
df_waf['RAF'] = df.apply(
    lambda row: row['rocksdb.bytes.read'] / (row['rocksdb.number.keys.read'] * 16*1024),
    axis=1
)

df_waf['case'] = df.apply(classify_case, axis=1)

df_waf.to_csv('modified_h3_summary.csv', index=False)

In [5]:
df_waf

Unnamed: 0,trial,hot_compaction,cold_compaction,time(s),hot_column_key,default_column_key,rocksdb.bytes.written,rocksdb.flush.write.bytes,rocksdb.block.cache.hit,rocksdb.block.cache.miss,...,rocksdb.number.keys.read,rocksdb.wal.bytes,rocksdb.write.wal,rocksdb.memtable.hit,rocksdb.memtable.miss,rocksdb.compaction.key.drop.new.1,rocksdb.bytes.read,WAF,RAF,case
0,1,universal,universal,215.646,88822,543645,16407028699,811590816,7283687,5877595,...,1911178,16407028699,1000000,3778,1907400,194636,10362339328,0.279132,0.33093,1
1,2,universal,level,309.711,88818,543256,16407029052,811597848,8060048,10189216,...,1911182,16407029052,1000000,3839,1907343,241541,10355900416,0.494122,0.330724,3
2,2,universal,universal,252.733,88592,543566,16407028784,811652440,7404430,6005913,...,1911408,16407028784,1000000,3789,1907619,204662,10357276672,0.286363,0.330729,1
3,2,level,level,267.389,88948,543385,16407028895,811595301,8234987,9903499,...,1911052,16407028895,1000000,3763,1907289,253703,10360143872,0.47863,0.330882,0
4,2,level,universal,273.964,89077,543107,16407028477,811540036,7387633,5977240,...,1910923,16407028477,1000000,3792,1907131,207001,10357702656,0.285525,0.330827,2
5,3,level,level,303.68,88748,543769,16407028746,811622172,7548154,10073283,...,1911252,16407028746,1000000,3778,1907474,252794,10363158528,0.488878,0.330944,0
6,1,level,universal,435.653,88794,543040,16407029090,811629101,7242636,6382662,...,1911206,16407029090,1000000,3783,1907423,201501,10351968256,0.304243,0.330594,2
7,3,universal,universal,196.579,88893,543244,16407028558,811621772,7296345,5892069,...,1911107,16407028558,1000000,3761,1907346,187574,10356932608,0.279812,0.33077,1
8,3,level,universal,254.691,88746,543708,16407028018,811725984,7208989,6086631,...,1911254,16407028018,1000000,3806,1907448,190946,10362126336,0.289522,0.33091,2
9,1,universal,level,334.687,89125,542997,16407029204,808394069,8019272,9834256,...,1910875,16407029204,1000000,7764,1903111,239783,10356686848,0.475324,0.330802,3
