In [1]:
import csv
import re
import os
from collections import defaultdict

log_dir = "./log_data_2"
csv_output_path = "./h5_summary_default.csv"

# 변경된 파일명 패턴 (preclude, preserve, temp 제거)
filename_pattern = r"zipf_alpha(?P<alpha>[0-9.]+)_run(?P<trial>\d+)\.log"

target_keys = [
    'rocksdb.flush.write.bytes',
    'rocksdb.compact.write.bytes',
    'rocksdb.bytes.written',
    'rocksdb.compaction.total.time.cpu_micros',
    'rocksdb.bytes.compressed.from',
    'rocksdb.bytes.compressed.to',
    'rocksdb.wal.bytes',
    'rocksdb.stall.micros',
    'rocksdb.memtable.payload.bytes.at.flush',
    'rocksdb.memtable.garbage.bytes.at.flush',
    'rocksdb.last.level.read.bytes',
    'rocksdb.last.level.read.count',
    'rocksdb.non.last.level.read.bytes',
    'rocksdb.non.last.level.read.count',
]

latency_fields = [
    'write.P50', 'write.P95', 'write.P99', 'write.P100', 'write.COUNT', 'write.SUM', 'write.AVG',
]

sst_level_fields = []
for i in range(7):
    sst_level_fields.append(f"L{i}_total")
    for temp_type in ["Hot", "Warm", "Cold", "Unknown"]:
        sst_level_fields.append(f"L{i}_{temp_type}")

# CSV 헤더에서 preclude, preserve, temp 제거
header = ['trial', 'alpha', 'time(s)', 'keys', 'db_size_bytes'] + sst_level_fields + target_keys + latency_fields
rows = []

for log_file_name in os.listdir(log_dir):
    if not log_file_name.endswith('.log'):
        continue

    match = re.match(filename_pattern, log_file_name)
    if not match:
        print(f"⚠️ Filename {log_file_name} does not match the expected pattern.")
        continue

    trial = int(match.group("trial"))
    alpha = float(match.group("alpha"))

    log_path = os.path.join(log_dir, log_file_name)

    stats = {key: 0 for key in target_keys}
    latency = {key: 0 for key in latency_fields}
    sst_files = defaultdict(lambda: {"total": 0, "Hot": 0, "Warm": 0, "Cold": 0, "Unknown": 0})
    time_sec = 0
    keys = 0
    db_size_bytes = 0
    current_level = None
    in_sst_block = False

    with open(log_path, "r") as f:
        for line in f:
            line = line.strip()

            if (m := re.match(r"⏱️ Total write time: ([\d\.]+) seconds", line)):
                time_sec = float(m.group(1))
            elif (m := re.match(r"\[💾 Writing (\d+) keys\.\.\.\]", line)):
                keys = int(m.group(1))
            elif (m := re.match(r"DB 디렉토리 사용량: (\d+) bytes", line)):
                db_size_bytes = int(m.group(1))

            elif line.startswith("===== Level별 SST 파일 개수 및 온도별 분포 ====="):
                in_sst_block = True
                continue
            elif line.startswith("==============================================="):
                in_sst_block = False
                current_level = None
                continue

            elif in_sst_block:
                if (m := re.match(r"Level (\d+) ?: ?총 (\d+)개 파일", line)):
                    current_level = int(m.group(1))
                    total_files = int(m.group(2))
                    sst_files[current_level]['total'] = total_files
                elif (m := re.match(r"(Hot|Warm|Cold|Unknown) ?: ?(\d+)개", line)):
                    temp_type, count = m.group(1), int(m.group(2))
                    if current_level is not None:
                        sst_files[current_level][temp_type] = count

            elif (m := re.match(r"(rocksdb\.[\w\.]+)\s+COUNT\s*:\s*(\d+)", line)):
                key, value = m.group(1), int(m.group(2))
                if key in stats:
                    stats[key] = value

            elif (m := re.match(
                r"rocksdb\.db\.write\.micros\s+P50\s*:\s*([\d\.]+)\s+P95\s*:\s*([\d\.]+)\s+P99\s*:\s*([\d\.]+)\s+P100\s*:\s*([\d\.]+)\s+COUNT\s*:\s*(\d+)\s+SUM\s*:\s*(\d+)", line)):
                p50, p95, p99, p100, count, total = map(float, m.groups())
                latency.update({
                    'write.P50': p50, 'write.P95': p95, 'write.P99': p99, 'write.P100': p100,
                    'write.COUNT': int(count), 'write.SUM': int(total),
                    'write.AVG': total / count if count > 0 else 0
                })

    sst_row = []
    for i in range(7):
        level_info = sst_files[i]
        sst_row.append(level_info['total'])
        for temp_type in ['Hot', 'Warm', 'Cold', 'Unknown']:
            sst_row.append(level_info[temp_type])

    # preclude, preserve, temp 제거된 row
    row = [trial, alpha, time_sec, keys, db_size_bytes] + sst_row + [stats[key] for key in target_keys] + [latency[key] for key in latency_fields]
    rows.append(row)

# CSV 저장
with open(csv_output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    writer.writerows(rows)

print(f"✅ CSV summary saved to {csv_output_path}")


✅ CSV summary saved to ./h5_summary_default.csv


In [2]:
import pandas as pd
pd.set_option('display.max_columns', None)  # 모든 열 표시
pd.set_option('display.max_rows', None)  # 모든 행 표시

df = pd.read_csv("./h5_summary_default.csv")
df

Unnamed: 0,trial,alpha,time(s),keys,db_size_bytes,L0_total,L0_Hot,L0_Warm,L0_Cold,L0_Unknown,L1_total,L1_Hot,L1_Warm,L1_Cold,L1_Unknown,L2_total,L2_Hot,L2_Warm,L2_Cold,L2_Unknown,L3_total,L3_Hot,L3_Warm,L3_Cold,L3_Unknown,L4_total,L4_Hot,L4_Warm,L4_Cold,L4_Unknown,L5_total,L5_Hot,L5_Warm,L5_Cold,L5_Unknown,L6_total,L6_Hot,L6_Warm,L6_Cold,L6_Unknown,rocksdb.flush.write.bytes,rocksdb.compact.write.bytes,rocksdb.bytes.written,rocksdb.compaction.total.time.cpu_micros,rocksdb.bytes.compressed.from,rocksdb.bytes.compressed.to,rocksdb.wal.bytes,rocksdb.stall.micros,rocksdb.memtable.payload.bytes.at.flush,rocksdb.memtable.garbage.bytes.at.flush,rocksdb.last.level.read.bytes,rocksdb.last.level.read.count,rocksdb.non.last.level.read.bytes,rocksdb.non.last.level.read.count,write.P50,write.P95,write.P99,write.P100,write.COUNT,write.SUM,write.AVG
0,1,0.9,62.8231,1000000,452293617,12,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,3,0,0,0,3,620594428,1585483563,16409090136,26505851,44228574491,2195644244,16409090136,45013295,16272153227,3881301358,216004746,260191,1683227186,2048328,9.29637,16.019313,31.153197,1971828.0,1000000,60609571,60.609571
1,3,0.5,82.8549,1000000,687778469,5,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,2,0,0,0,2,6,0,0,0,6,810501503,2421952748,16410542657,38024830,64779009110,3217028745,16410542657,63748157,16273594301,105924778,383935270,461846,2310963754,2809714,9.79655,19.185383,68.926683,1595627.0,1000000,80448075,80.448075
2,1,1.2,70.4995,1000000,230172439,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,235695397,736644501,16407152794,12770746,19513332226,968029417,16407152794,57248309,16270230905,11561905505,63751709,77193,820591514,1000438,9.151512,15.413698,27.790873,1730760.0,1000000,68417352,68.417352
3,2,0.5,95.0609,1000000,761809206,3,0,0,0,3,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,2,0,0,0,2,4,0,0,0,4,5,0,0,0,5,810501512,2467662967,16410542657,38059544,65684677918,3262647902,16410542657,78229025,16273594301,105924778,303423400,365224,2373995700,2883642,9.658615,18.327709,61.143819,1286169.0,1000000,92646914,92.646914
4,2,1.2,70.5817,1000000,220121999,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,235695397,732986005,16407152794,12743043,19418864646,963429378,16407152794,57196161,16270230905,11561905505,63605951,76859,822617968,1003089,9.264944,18.023523,30.83594,2640776.0,1000000,68658802,68.658802
5,1,0.5,87.0693,1000000,722741947,3,0,0,0,3,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,2,0,0,0,2,6,0,0,0,6,810501512,2384950749,16410542657,36730416,64008630509,3179069768,16410542657,68872540,16273594301,105924778,357923377,430518,2263348124,2750634,9.736886,18.817998,51.97464,1005184.0,1000000,84712583,84.712583
6,2,0.9,78.6097,1000000,478637823,3,0,0,0,3,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,2,0,0,0,2,3,0,0,0,3,620594431,1733509905,16409090136,27762773,47191582294,2343089623,16409090136,62266391,16272153227,3881301358,183792971,221236,1838148203,2236025,10.108699,17.836624,28.327427,1685483.0,1000000,76420553,76.420553
7,3,0.9,98.4636,1000000,435052995,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,4,0,0,0,4,620594443,1842127262,16409090136,30211310,49358714899,2450287656,16409090136,84060783,16272153227,3881301358,285559623,344761,1884619605,2294167,9.782735,19.109286,38.479117,2021122.0,1000000,96301741,96.301741
8,3,1.2,72.6774,1000000,224617174,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,235445262,730710793,16407152794,12744695,19364569044,960720441,16407152794,59477842,16270230905,11566890708,65127533,78774,814585806,993142,9.308498,16.019366,28.960082,1610221.0,1000000,70745528,70.745528


In [11]:
import pandas as pd

# CSV 로드
df = pd.read_csv("./h5_summary_default.csv")

# === 1. 주요 지표 계산 === #

# WAF (Write Amplification Factor)
df['WAF'] = df.apply(
    lambda row: (
        (row.get('rocksdb.wal.bytes', 0) +
         row.get('rocksdb.flush.write.bytes', 0) +
         row.get('rocksdb.compact.write.bytes', 0))
        / row['rocksdb.bytes.written']
        if row['rocksdb.bytes.written'] != 0 else 0
    ),
    axis=1
)

# Throughput (Bytes Written / Time)
df['throughput'] = df.apply(
    lambda row: (
        row['rocksdb.bytes.written'] / row['time(s)']
        if (row.get('rocksdb.bytes.written', 0) > 0 and row.get('time(s)', 0) > 0)
        else 0
    ),
    axis=1
)

# Stall Time
df['stall_flush_total(us)'] = df.apply(
    lambda row: row.get('rocksdb.stall.micros', 0),
    axis=1
)

# last level read ratio
df['last_level_byte_ratio'] = df.apply(
    lambda row: (
        row.get('rocksdb.last.level.read.bytes', 0) /
        (row.get('rocksdb.last.level.read.bytes', 0) + row.get('rocksdb.non.last.level.read.bytes', 0))
        if (row.get('rocksdb.last.level.read.bytes', 0) + row.get('rocksdb.non.last.level.read.bytes', 0)) > 0
        else 0
    ),
    axis=1
)

df['last_level_count_ratio'] = df.apply(
    lambda row: (
        row.get('rocksdb.last.level.read.count', 0) /
        (row.get('rocksdb.last.level.read.count', 0) + row.get('rocksdb.non.last.level.read.count', 0))
        if (row.get('rocksdb.last.level.read.count', 0) + row.get('rocksdb.non.last.level.read.count', 0)) > 0
        else 0
    ),
    axis=1
)

# === 2. 분석 데이터 생성 === #

analysis_rows = []

for idx, row in df.iterrows():
    total_time = row['time(s)']
    compaction_time = row.get('rocksdb.compaction.total.time.cpu_micros', 0) / 1e6
    compaction_ratio = compaction_time / total_time if total_time > 0 else 0

    stall_micros = row.get('rocksdb.stall.micros', 0)
    stall_flush_total = stall_micros / 1e6

    compressed_from = row.get('rocksdb.bytes.compressed.from', 0)
    compressed_to = row.get('rocksdb.bytes.compressed.to', 0)
    flush_bytes = row.get('rocksdb.flush.write.bytes', 0)
    compact_bytes = row.get('rocksdb.compact.write.bytes', 0)
    latency = row.get('write.AVG', 0)

    analysis_rows.append({
        "trial": row["trial"],
        "alpha": row["alpha"],
        "time(s)": total_time,
        "keys": row["keys"],
        "db_size_bytes": row["db_size_bytes"],

        "L0_total": row["L0_total"],
        "L0_Hot": row["L0_Hot"],
        "L0_Warm": row["L0_Warm"],
        "L0_Cold": row["L0_Cold"],
        "L0_Unknown": row["L0_Unknown"],

        "L1_total": row["L1_total"],
        "L1_Hot": row["L1_Hot"],
        "L1_Warm": row["L1_Warm"],
        "L1_Cold": row["L1_Cold"],
        "L1_Unknown": row["L1_Unknown"],

        "L2_total": row["L2_total"],
        "L2_Hot": row["L2_Hot"],
        "L2_Warm": row["L2_Warm"],
        "L2_Cold": row["L2_Cold"],
        "L2_Unknown": row["L2_Unknown"],

        "L3_total": row["L3_total"],
        "L3_Hot": row["L3_Hot"],
        "L3_Warm": row["L3_Warm"],
        "L3_Cold": row["L3_Cold"],
        "L3_Unknown": row["L3_Unknown"],

        "L4_total": row["L4_total"],
        "L4_Hot": row["L4_Hot"],
        "L4_Warm": row["L4_Warm"],
        "L4_Cold": row["L4_Cold"],
        "L4_Unknown": row["L4_Unknown"],

        "L5_total": row["L5_total"],
        "L5_Hot": row["L5_Hot"],
        "L5_Warm": row["L5_Warm"],
        "L5_Cold": row["L5_Cold"],
        "L5_Unknown": row["L5_Unknown"],

        "L6_total": row["L6_total"],
        "L6_Hot": row["L6_Hot"],
        "L6_Warm": row["L6_Warm"],
        "L6_Cold": row["L6_Cold"],
        "L6_Unknown": row["L6_Unknown"],

        "WAF": row["WAF"],
        "throughput": row["throughput"],
        "last_level_byte_ratio": row["last_level_byte_ratio"],
        "last_level_count_ratio": row["last_level_count_ratio"],
        "compaction_time(s)": compaction_time,
        "compaction_ratio": round(compaction_ratio, 4),
        "stall_flush_total(s)": stall_flush_total,
        "Latency": latency,
        "compressed_from": compressed_from,
        "compressed_to": compressed_to,
        "flush_bytes": flush_bytes,
        "compact_bytes": compact_bytes,
    })

# 분석 결과 DataFrame 생성
df_analysis = pd.DataFrame(analysis_rows)

# === 3. alpha -> case 매핑 === #

def map_alpha_to_case(alpha):
    if alpha == 0.5:
        return 1
    elif alpha == 0.9:
        return 2
    elif alpha == 1.2:
        return 3
    else:
        return "unknown"

df_analysis["case"] = df_analysis["alpha"].apply(map_alpha_to_case)

# case 컬럼을 맨 앞으로 이동 (선택 사항)
cols = df_analysis.columns.tolist()
cols.insert(0, cols.pop(cols.index("case")))
df_analysis = df_analysis[cols]

# 결과 저장
df_analysis.to_csv("modified_h5_default_summary_with_case.csv", index=False)
print("✅ CSV 저장 완료: modified_h5_default_summary_with_case.csv")

✅ CSV 저장 완료: modified_h5_default_summary_with_case.csv


In [12]:
df_analysis

Unnamed: 0,case,trial,alpha,time(s),keys,db_size_bytes,L0_total,L0_Hot,L0_Warm,L0_Cold,L0_Unknown,L1_total,L1_Hot,L1_Warm,L1_Cold,L1_Unknown,L2_total,L2_Hot,L2_Warm,L2_Cold,L2_Unknown,L3_total,L3_Hot,L3_Warm,L3_Cold,L3_Unknown,L4_total,L4_Hot,L4_Warm,L4_Cold,L4_Unknown,L5_total,L5_Hot,L5_Warm,L5_Cold,L5_Unknown,L6_total,L6_Hot,L6_Warm,L6_Cold,L6_Unknown,WAF,throughput,last_level_byte_ratio,last_level_count_ratio,compaction_time(s),compaction_ratio,stall_flush_total(s),Latency,compressed_from,compressed_to,flush_bytes,compact_bytes
0,2,1.0,0.9,62.8231,1000000.0,452293617.0,12.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,3.0,1.134442,261195200.0,0.113733,0.112709,26.505851,0.4219,45.013295,60.609571,44228570000.0,2195644000.0,620594428.0,1585484000.0
1,1,3.0,0.5,82.8549,1000000.0,687778469.0,5.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,2.0,6.0,0.0,0.0,0.0,6.0,1.196974,198063600.0,0.142467,0.14117,38.02483,0.4589,63.748157,80.448075,64779010000.0,3217029000.0,810501503.0,2421953000.0
2,3,1.0,1.2,70.4995,1000000.0,230172439.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.059263,232727200.0,0.072089,0.071632,12.770746,0.1811,57.248309,68.417352,19513330000.0,968029400.0,235695397.0,736644500.0
3,1,2.0,0.5,95.0609,1000000.0,761809206.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,2.0,4.0,0.0,0.0,0.0,4.0,5.0,0.0,0.0,0.0,5.0,1.19976,172631900.0,0.113327,0.112416,38.059544,0.4004,78.229025,92.646914,65684680000.0,3262648000.0,810501512.0,2467663000.0
4,3,2.0,1.2,70.5817,1000000.0,220121999.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.05904,232456200.0,0.071772,0.071169,12.743043,0.1805,57.196161,68.658802,19418860000.0,963429400.0,235695397.0,732986000.0
5,1,1.0,0.5,87.0693,1000000.0,722741947.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,2.0,6.0,0.0,0.0,0.0,6.0,1.194719,188476800.0,0.136546,0.135334,36.730416,0.4219,68.87254,84.712583,64008630000.0,3179070000.0,810501512.0,2384951000.0
6,2,2.0,0.9,78.6097,1000000.0,478637823.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,3.0,1.143463,208741300.0,0.090899,0.090034,27.762773,0.3532,62.266391,76.420553,47191580000.0,2343090000.0,620594431.0,1733510000.0
7,2,3.0,0.9,98.4636,1000000.0,435052995.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,4.0,0.0,0.0,0.0,4.0,1.150083,166651300.0,0.131583,0.130644,30.21131,0.3068,84.060783,96.301741,49358710000.0,2450288000.0,620594443.0,1842127000.0
8,3,3.0,1.2,72.6774,1000000.0,224617174.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.058886,225753200.0,0.074033,0.073489,12.744695,0.1754,59.477842,70.745528,19364570000.0,960720400.0,235445262.0,730710800.0
