In [2]:
import csv
import re
import os

log_dir = "./log_data_2"
csv_output_path = "./h3_summary_2.csv"

filename_pattern = r"(\d+)_(\w+)_hot_(\w+)_cold_(\w+)\.log"

# 추출할 stat key 목록
target_keys = [
    'rocksdb.bytes.written',
    'rocksdb.flush.write.bytes',
    'rocksdb.block.cache.hit',
    'rocksdb.block.cache.miss',
    'rocksdb.compaction.key.drop.new',
    'rocksdb.number.keys.write',
    'rocksdb.compact.read.bytes',
    'rocksdb.compact.write.bytes',
    "rocksdb.number.keys.written",
    "rocksdb.compact.write.bytes",
    "rocksdb.cache.data.bytes.insert",
    "rocksdb.cache.bytes.read",
    "rocksdb.number.keys.read",
    "rocksdb.wal.bytes",
    "rocksdb.write.wal",
    "rocksdb.memtable.hit",
    "rocksdb.memtable.miss",
    "rocksdb.compaction.key.drop.new",
    'rocksdb.bytes.read',
]

# 헤더 구성: 실험 정보 + 타겟 키 + 기타 수치
header = [
    'trial', 'work', 'hot_compaction', 'cold_compaction',
    'time(s)', 'hot_column_key', 'default_column_key'
] + target_keys

# 결과 리스트
rows = []

# 디렉토리 내 모든 .log 파일 처리
for log_file_name in os.listdir(log_dir):
    if not log_file_name.endswith('.log'):
        continue

    match = re.match(filename_pattern, log_file_name)
    if not match:
        print(f"Filename {log_file_name} does not match the expected pattern.")
        continue

    trial = match.group(1)
    work = match.group(2)
    hot_compaction = match.group(3)
    cold_compaction = match.group(4)

    # 값 저장용 dict 초기화
    stats_dict = {key: 0 for key in target_keys}
    time_sec = 0
    hot_col_keys = 0
    default_col_keys = 0

    log_file_path = os.path.join(log_dir, log_file_name)

    with open(log_file_path, "r") as f:
        for line in f:
            line = line.strip()

            # 총 소요시간
            time_match = re.match(r"총 소요시간: (\d+(?:\.\d+)?)초", line)
            if time_match:
                time_sec = float(time_match.group(1))
                continue


            # hot 컬럼
            hot_match = re.match(r"hot 컬럼에 저장된 키 수: (\d+)", line)
            if hot_match:
                hot_col_keys = int(hot_match.group(1))
                continue

            # default 컬럼
            default_match = re.match(r"default 컬럼에 저장된 키 수: (\d+)", line)
            if default_match:
                default_col_keys = int(default_match.group(1))
                continue

            # Stat 정보 추출
            stat_match = re.match(r"(rocksdb\.[\w\.]+) COUNT\s*:\s*(\d+)", line)
            if stat_match:
                key = stat_match.group(1)
                value = int(stat_match.group(2))
                if key in stats_dict:
                    stats_dict[key] = value

    # 한 줄 데이터 구성
    row = [
        trial, work, hot_compaction, cold_compaction,
        time_sec, hot_col_keys, default_col_keys
    ] + [stats_dict[key] for key in target_keys]
    rows.append(row)

# CSV 파일로 저장
with open(csv_output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    writer.writerows(rows)

print(f"CSV summary saved to {csv_output_path}")


CSV summary saved to ./h3_summary_2.csv


In [5]:
import pandas as pd

# CSV 파일 읽기
df = pd.read_csv("./h3_summary_2.csv")

df

Unnamed: 0,trial,work,hot_compaction,cold_compaction,time(s),hot_column_key,default_column_key,rocksdb.bytes.written,rocksdb.flush.write.bytes,rocksdb.block.cache.hit,...,rocksdb.compact.write.bytes.1,rocksdb.cache.data.bytes.insert,rocksdb.cache.bytes.read,rocksdb.number.keys.read,rocksdb.wal.bytes,rocksdb.write.wal,rocksdb.memtable.hit,rocksdb.memtable.miss,rocksdb.compaction.key.drop.new.1,rocksdb.bytes.read
0,3,read,universal,universal,205.176,0,0,0,0,1321139,...,3070759,0,0,1000000,0,0,0,1000000,0,10347282432
1,3,read,level,universal,325.054,0,0,0,0,729748,...,6365182,0,0,1000000,0,0,0,1000000,0,10342875136
2,1,write,universal,level,376.56,88674,543638,16407028307,812032160,6267022,...,3025443077,0,0,1911326,16407028307,1000000,3751,1907575,198808,10359799808
3,1,write,universal,universal,369.122,88723,543583,16407028576,811904197,3557411,...,1728629039,0,0,1911277,16407028576,1000000,3737,1907540,221224,10359701504
4,1,read,universal,level,221.967,0,0,0,0,1152497,...,17890583,0,0,1000000,0,0,0,1000000,363,10339631104
5,1,read,universal,universal,272.256,0,0,0,0,1301229,...,3058824,0,0,1000000,0,0,0,1000000,0,10329849856
6,2,read,level,level,338.783,0,0,0,0,1274655,...,3082009,0,0,1000000,0,0,0,1000000,0,10336485376
7,3,write,level,universal,328.092,89166,543019,16407028915,812017703,7738345,...,1778731487,0,0,1910834,16407028915,1000000,3732,1907102,276887,10357719040
8,1,write,level,level,89.41,88655,543998,16407028810,812049935,3797924,...,2933129521,0,0,1911345,16407028810,1000000,3746,1907599,225627,10365386752
9,2,read,level,universal,236.639,0,0,0,0,1334103,...,3052094,0,0,1000000,0,0,0,1000000,0,10349887488


In [8]:
import pandas as pd

# case 분류 함수
def classify_case(row):
    hot = row.get('hot_compaction', '').lower()
    cold = row.get('cold_compaction', '').lower()

    if hot == 'level' and cold == 'level':
        return 0
    elif hot == 'universal' and cold == 'universal':
        return 1
    elif hot == 'level' and cold == 'universal':
        return 2
    elif hot == 'universal' and cold == 'level':
        return 3
    else:
        return -1  # 알 수 없는 조합 (예외 처리용)

df_waf = df.copy()

# WAF 계산 (0으로 나눌 경우 방지 및 NaN은 0으로 처리)
df_waf['WAF'] = df.apply(
    lambda row: (row.get('rocksdb.flush.write.bytes', 0) + row.get('rocksdb.compact.write.bytes', 0)) / row['rocksdb.bytes.written']
    if row['rocksdb.bytes.written'] != 0 else 0,
    axis=1
)

# RAF 계산 (0으로 나눌 경우 방지 및 NaN은 0으로 처리)
df_waf['RAF'] = df.apply(
    lambda row: row.get('rocksdb.bytes.read', 0) / row['rocksdb.number.keys.read']
    if row['rocksdb.number.keys.read'] != 0 else 0,
    axis=1
)

# case 분류
df_waf['case'] = df.apply(classify_case, axis=1)

# NaN 값이 있을 경우 최종적으로 0으로 채움
df_waf.fillna(0, inplace=True)

# 저장
df_waf.to_csv('modified_h3_summary_2.csv', index=False)

In [9]:
df_waf

Unnamed: 0,trial,work,hot_compaction,cold_compaction,time(s),hot_column_key,default_column_key,rocksdb.bytes.written,rocksdb.flush.write.bytes,rocksdb.block.cache.hit,...,rocksdb.number.keys.read,rocksdb.wal.bytes,rocksdb.write.wal,rocksdb.memtable.hit,rocksdb.memtable.miss,rocksdb.compaction.key.drop.new.1,rocksdb.bytes.read,WAF,RAF,case
0,3,read,universal,universal,205.176,0,0,0,0,1321139,...,1000000,0,0,0,1000000,0,10347282432,0.0,10347.282432,1
1,3,read,level,universal,325.054,0,0,0,0,729748,...,1000000,0,0,0,1000000,0,10342875136,0.0,10342.875136,2
2,1,write,universal,level,376.56,88674,543638,16407028307,812032160,6267022,...,1911326,16407028307,1000000,3751,1907575,198808,10359799808,0.233892,5420.216022,3
3,1,write,universal,universal,369.122,88723,543583,16407028576,811904197,3557411,...,1911277,16407028576,1000000,3737,1907540,221224,10359701504,0.154844,5420.303548,1
4,1,read,universal,level,221.967,0,0,0,0,1152497,...,1000000,0,0,0,1000000,363,10339631104,0.0,10339.631104,3
5,1,read,universal,universal,272.256,0,0,0,0,1301229,...,1000000,0,0,0,1000000,0,10329849856,0.0,10329.849856,1
6,2,read,level,level,338.783,0,0,0,0,1274655,...,1000000,0,0,0,1000000,0,10336485376,0.0,10336.485376,0
7,3,write,level,universal,328.092,89166,543019,16407028915,812017703,7738345,...,1910834,16407028915,1000000,3732,1907102,276887,10357719040,0.157905,5420.522683,2
8,1,write,level,level,89.41,88655,543998,16407028810,812049935,3797924,...,1911345,16407028810,1000000,3746,1907599,225627,10365386752,0.228267,5423.085185,0
9,2,read,level,universal,236.639,0,0,0,0,1334103,...,1000000,0,0,0,1000000,0,10349887488,0.0,10349.887488,2
