In [1]:
# %% ultra-clean A/B pipeline: prep -> run (subprocess) -> collect -> plot
from __future__ import annotations

import os, sys, re, json, time, subprocess, textwrap
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import requests
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------
# Project paths
# -----------------------
PROJECT_ROOT = Path(os.getcwd()).parent.parent  # adjust if needed
DATA_DIR     = PROJECT_ROOT / "data" / "fuzzy"
RESULTS_DIR  = PROJECT_ROOT / "results" / "fuzzy"
for p in (DATA_DIR, RESULTS_DIR): p.mkdir(parents=True, exist_ok=True)

# -----------------------
# Step 1: Prep (URL + SF)
# -----------------------
def _filename_from_url(url: str) -> str:
    return Path(url.split("?")[0]).name

def _download(url: str, base_dir: Path) -> Path:
    filename = _filename_from_url(url)
    name_root = Path(filename).stem
    dst_dir = base_dir / name_root
    dst_dir.mkdir(parents=True, exist_ok=True)
    out = dst_dir / filename
    if out.exists():
        print(f"[download] Using cached: {out}")
        return out
    legacy = base_dir / filename
    if legacy.exists():
        out.write_bytes(legacy.read_bytes())
        print(f"[download] Moved legacy file -> {out}")
        return out
    print(f"[download] Fetch {url}")
    r = requests.get(url, timeout=60); r.raise_for_status()
    out.write_bytes(r.content)
    print(f"[download] Saved {out}")
    return out

def prepare_dataset(url: str, sf: int) -> Dict[str, Any]:
    """
    Returns:
      {
        'dataset_name', 'original',
        'floating_text', 'floating_parquet',
        'fixed_parquet', 'quant_mult'
      }
    Uses your repo scripts:
      - scripts.replicate_file.replicate_file
      - scripts.fixedpoint_normalize.normalize_file
    """
    # Ensure project on PYTHONPATH for *this* process
    if str(PROJECT_ROOT) not in sys.path:
        sys.path.insert(0, str(PROJECT_ROOT))

    original = _download(url, DATA_DIR)
    dataset_name = original.parent.name

    # 1) replicate floating text (SF-concatenated)
    from scripts.replicate_file import replicate_file
    sf = max(1, int(sf))
    floating_text = original.with_name(f"{original.stem}_SF{sf}_floating{original.suffix}")
    if floating_text.exists():
        print(f"[prep] Using existing: {floating_text.name}")
    else:
        replicate_file(str(original), sf, str(floating_text))
        print(f"[prep] Made: {floating_text.name}")

    # 2) make fixed parquet + quant_mult AND floating parquet
    from scripts.fixedpoint_normalize import normalize_file
    stem = floating_text.stem.replace("_floating", "")
    fixed_parquet   = floating_text.with_name(f"{stem}_fixed.parquet")
    quant_file      = floating_text.with_name(f"{stem}_quant_mult.txt")
    floating_parquet= floating_text.with_suffix(".parquet")

    if not fixed_parquet.exists() or not quant_file.exists() or not floating_parquet.exists():
        print("[prep] Running normalize_file to get parquet(s) & quant_mult …")
        # write_fixed_text=False keeps things parquet-only for speed
        _ = normalize_file(str(floating_text), write_fixed_text=False)

    if not fixed_parquet.exists():    raise FileNotFoundError(f"Missing: {fixed_parquet}")
    if not floating_parquet.exists(): raise FileNotFoundError(f"Missing: {floating_parquet}")
    if not quant_file.exists():       raise FileNotFoundError(f"Missing: {quant_file}")

    quant_mult = int(quant_file.read_text().strip())
    return {
        "dataset_name": dataset_name,
        "original": str(original),
        "floating_text": str(floating_text),
        "floating_parquet": str(floating_parquet),
        "fixed_parquet": str(fixed_parquet),
        "quant_mult": quant_mult,
    }

# -------------------------------------------------
# Step 2: Run miners in separate processes (clean)
# -------------------------------------------------
def _run_subprocess(args: List[str], log_path: Path, err_path: Path, cwd: Path, extra_env: Optional[Dict[str, str]]=None) -> int:
    env = os.environ.copy()
    # Ensure the child sees your repo
    env["PYTHONPATH"] = str(PROJECT_ROOT) + os.pathsep + env.get("PYTHONPATH", "")
    if extra_env:
        env.update({k: str(v) for k, v in extra_env.items()})
    print("[run] ", " ".join(args))
    with open(log_path, "w") as out, open(err_path, "w") as err:
        proc = subprocess.run(args, cwd=cwd, env=env, stdout=out, stderr=err, text=True)
    print(f"[run] exit={proc.returncode}  log={log_path.name}  err={err_path.name}")
    return proc.returncode

def run_cuffi_cli(fixed_parquet: str, quant_mult: int, sup_int: int, out_dir: Path,
                  allocator="rmm_managed", gds="off", pinned=True, managed_prefetch=True) -> Dict[str, Any]:
    """
    cuFFIMiner expects pre-scaled parquet (item, prob:uint32, txn_id).
    NOTE (your toggle): gds='off' => force cuFile => GDS ON. gds='on' => POSIX => GDS OFF.
    """
    out_dir.mkdir(parents=True, exist_ok=True)
    patterns_path = out_dir / f"patterns_cuffi_{allocator}_{gds}_{'pin' if pinned else 'nopin'}_sup{sup_int}.txt"
    log_path      = out_dir / f"cuffi_sup{sup_int}.out"
    err_path      = out_dir / f"cuffi_sup{sup_int}.err"

    cmd = [
        sys.executable, "-m", "src.algorithms.fuzzy.cuFFIMiner",
        str(fixed_parquet),
        str(int(sup_int)),
        str(int(quant_mult)),
        "-o", str(patterns_path),
        "--allocator", allocator,
        "--gds", gds,
    ]
    if pinned:           cmd.append("--pinned")
    if managed_prefetch: cmd.append("--managed-prefetch")

    rc = _run_subprocess(cmd, log_path, err_path, cwd=PROJECT_ROOT)
    return {"rc": rc, "patterns": str(patterns_path), "stdout": str(log_path), "stderr": str(err_path)}

def run_naive_cli(floating_text_or_parquet: str, quant_mult: int, sup_int: int, out_dir: Path) -> Dict[str, Any]:
    """
    naiveFFIMiner enforces: KVIKIO_COMPAT_MODE=ON (POSIX path; GDS OFF) + device-only pool.
    It expects float min_support and accepts --quant-mult to lock scaling.
    """
    out_dir.mkdir(parents=True, exist_ok=True)
    sup_float = sup_int / max(1, int(quant_mult))
    patterns_path = out_dir / f"patterns_naive_sup{sup_int}.txt"
    log_path      = out_dir / f"naive_sup{sup_int}.out"
    err_path      = out_dir / f"naive_sup{sup_int}.err"

    cmd = [
        sys.executable, "-m", "src.algorithms.fuzzy.naiveFFIMiner",
        str(floating_text_or_parquet),
        f"{sup_float:.12g}",
        "--quant-mult", str(int(quant_mult)),
        "-o", str(patterns_path),
    ]
    rc = _run_subprocess(cmd, log_path, err_path, cwd=PROJECT_ROOT)
    return {"rc": rc, "patterns": str(patterns_path), "stdout": str(log_path), "stderr": str(err_path)}

# ---------------------------------------
# Step 3: Parse logs -> metrics -> plots
# ---------------------------------------
_METRIC_PATTERNS = {
    "exec_time":          re.compile(r"Execution Time:\s*([0-9.]+)\s*seconds", re.I),
    "cpu_mem_mb":         re.compile(r"Peak CPU Memory Usage:\s*([0-9.]+)\s*MB", re.I),
    "gpu_mem_mb":         re.compile(r"Peak GPU \(driver\) Used:\s*([0-9.]+)\s*MB", re.I),
    "pool_used_mb":       re.compile(r"Peak Pool Used:\s*([0-9.]+)\s*MB", re.I),
    "pool_total_mb":      re.compile(r"Peak Pool Total:\s*([0-9.]+)\s*MB", re.I),
    "rmm_peak_mb":        re.compile(r"RMM Statistics Peak:\s*([0-9.]+)\s*MB", re.I),
    "patterns_found":     re.compile(r"Patterns Found:\s*([0-9]+)", re.I),
}

def parse_metrics_from_log(log_path: Path) -> Dict[str, Optional[float]]:
    text = Path(log_path).read_text(errors="ignore")
    out: Dict[str, Optional[float]] = {}
    for k, rgx in _METRIC_PATTERNS.items():
        m = rgx.search(text)
        out[k] = float(m.group(1)) if m else None
    return out

def collect_results(dataset_name: str, sf: int, quant_mult: int, supports: List[int], out_dir: Path) -> pd.DataFrame:
    rows: List[Dict[str, Any]] = []
    for sup in supports:
        cuffi_log = out_dir / f"cuffi_sup{sup}.out"
        naive_log = out_dir / f"naive_sup{sup}.out"

        if cuffi_log.exists():
            m = parse_metrics_from_log(cuffi_log)
            rows.append({
                "dataset": dataset_name, "sf": sf, "algorithm": "cuFFIMiner",
                "support_quant_int": sup, "quant_mult": quant_mult,
                **m
            })
        if naive_log.exists():
            m = parse_metrics_from_log(naive_log)
            rows.append({
                "dataset": dataset_name, "sf": sf, "algorithm": "naiveFFIMiner",
                "support_quant_int": sup, "quant_mult": quant_mult,
                **m
            })
    df = pd.DataFrame(rows)
    # If only gpu_mem_mb present, keep; if you also want bytes:
    if "gpu_mem_mb" in df.columns and df["gpu_mem_mb"].notna().any():
        df["gpu_mem_bytes"] = df["gpu_mem_mb"] * (1024**2)
    return df

# ---- plotting (PDF, LaTeX-friendly) ----
plt.rcParams.update({
    "pdf.fonttype": 42, "ps.fonttype": 42, "figure.dpi": 150,
    "font.size": 11, "axes.titlesize": 12, "axes.labelsize": 11, "legend.fontsize": 9,
})

_LABELS = {
    "exec_time": "Execution Time (s)",
    "cpu_mem_mb": "Peak CPU Memory (MB)",
    "gpu_mem_mb": "Peak GPU (driver) Used (MB)",
    "patterns_found": "Patterns Found",
}

def _plot_metric(df: pd.DataFrame, metric: str, out_dir: Path, dataset_name: str):
    if metric not in df.columns:
        print(f"[plot] Skip missing metric: {metric}")
        return
    fig, ax = plt.subplots(figsize=(5.0, 3.0))
    for algo, sub in df.groupby("algorithm", sort=False):
        sub = sub.sort_values("support_quant_int")
        ax.plot(sub["support_quant_int"].values, sub[metric].values, marker="o", label=algo)
    ax.set_xlabel("Support Threshold (quantized int)")
    ax.set_ylabel(_LABELS.get(metric, metric))
    ax.set_title(f"{dataset_name} — {_LABELS.get(metric, metric)}")
    ax.grid(alpha=0.25, linestyle=":")
    ax.legend(loc="best")
    fig.tight_layout()
    out_dir.mkdir(parents=True, exist_ok=True)
    pdf = out_dir / f"{dataset_name}_{metric}.pdf"
    fig.savefig(pdf, format="pdf"); plt.close(fig)
    print(f"[plot] wrote {pdf}")

def plot_all(metrics_df: pd.DataFrame, dataset_name: str, figs_dir: Path, metrics: Optional[List[str]]=None):
    ms = metrics or ["exec_time", "cpu_mem_mb", "gpu_mem_mb", "patterns_found"]
    for m in ms: _plot_metric(metrics_df, m, figs_dir, dataset_name)
    print("[plot] done.")

# ----------------------------------------
# Orchestrator (one-liner for your runs)
# ----------------------------------------
def run_pipeline(
    dataset_url: str,
    sf: int,
    supports_quant_int: List[int],
    *,
    # cuFFI toggles
    cuffi_allocator: str = "rmm_managed",
    cuffi_gds: str = "off",      # 'off' => cuFile => GDS ON ; 'on' => POSIX => GDS OFF
    cuffi_pinned: bool = False,
    cuffi_prefetch: bool = True,
    force: bool = False,
) -> pd.DataFrame:
    """
    1) Prep artifacts (URL + SF)
    2) Run cuFFIMiner (GDS+UVM) AND naiveFFIMiner (no-GDS + device-only) in subprocesses
    3) Parse logs -> CSV -> plots
    """
    prep = prepare_dataset(dataset_url, sf)
    dataset = prep["dataset_name"]; quant_mult = prep["quant_mult"]
    ds_dir = RESULTS_DIR / dataset / f"SF{sf}"
    logs_dir = ds_dir / "logs"; logs_dir.mkdir(parents=True, exist_ok=True)

    # Run all supports
    for sup in supports_quant_int:
        cuffi_out = logs_dir / f"cuffi_sup{sup}.out"
        naive_out = logs_dir / f"naive_sup{sup}.out"

        if (not cuffi_out.exists()) or force:
            run_cuffi_cli(
                fixed_parquet=prep["fixed_parquet"],
                quant_mult=quant_mult,
                sup_int=sup,
                out_dir=ds_dir,
                allocator=cuffi_allocator,
                gds=cuffi_gds,
                pinned=cuffi_pinned,
                managed_prefetch=cuffi_prefetch,
            )
        else:
            print(f"[skip] cuFFI sup={sup} (log exists, use force=True to re-run)")

        if (not naive_out.exists()) or force:
            run_naive_cli(
                floating_text_or_parquet=prep["floating_parquet"],
                quant_mult=quant_mult,
                sup_int=sup,
                out_dir=ds_dir,
            )
        else:
            print(f"[skip] naive sup={sup} (log exists, use force=True to re-run)")

    # Collect -> CSV
    df = collect_results(dataset, sf, quant_mult, supports_quant_int, ds_dir)
    metrics_csv = ds_dir / f"metrics_SF{sf}.csv"
    df.to_csv(metrics_csv, index=False)
    print(f"[metrics] saved {metrics_csv}")

    # Plot
    plot_all(df, dataset, ds_dir / "figures")
    return df


In [2]:
retail = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_retail.csv"
retail_sup = [25_000, 50_000, 60_000, 70_000, 80_000, 90_000, 100_000]

run_pipeline(retail, sf=50, supports_quant_int=retail_sup, force=True)

[download] Using cached: /home/tarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail.csv
[prep] Using existing: Fuzzy_retail_SF50_floating.csv
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF50_fixed.parquet 25000 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_retail/SF50/patterns_cuffi_rmm_managed_off_nopin_sup25000.txt --allocator rmm_managed --gds off --managed-prefetch
[run] exit=0  log=cuffi_sup25000.out  err=cuffi_sup25000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.naiveFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF50_floating.parquet 2500 --quant-mult 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_retail/SF50/patterns_naive_sup25000.txt
[run] exit=1  log=naive_sup25000.out  err=naive_sup25000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuFFIMiner /home/tarun/cuda_

Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,pool_used_mb,pool_total_mb,rmm_peak_mb,patterns_found,gpu_mem_bytes
0,Fuzzy_retail,50,cuFFIMiner,25000,10,24.3605,738.2,6002.69,0.0,0.0,4228.78,9986.0,6294277000.0
1,Fuzzy_retail,50,naiveFFIMiner,25000,10,,,,,,,,
2,Fuzzy_retail,50,cuFFIMiner,50000,10,8.1902,735.2,5660.69,0.0,0.0,3657.45,3530.0,5935664000.0
3,Fuzzy_retail,50,naiveFFIMiner,50000,10,8.259,3328.83,6010.69,0.0,0.0,,3530.0,6302665000.0
4,Fuzzy_retail,50,cuFFIMiner,60000,10,6.2595,725.67,5382.69,0.0,0.0,3507.07,2678.0,5644160000.0
5,Fuzzy_retail,50,naiveFFIMiner,60000,10,6.3707,3295.09,6024.69,0.0,0.0,,2678.0,6317345000.0
6,Fuzzy_retail,50,cuFFIMiner,70000,10,5.0255,737.2,5312.69,0.0,0.0,3377.11,2073.0,5570759000.0
7,Fuzzy_retail,50,naiveFFIMiner,70000,10,5.1445,3293.86,6026.69,0.0,0.0,,2073.0,6319442000.0
8,Fuzzy_retail,50,cuFFIMiner,80000,10,4.3309,734.79,5058.69,0.0,0.0,3285.93,1695.0,5304421000.0
9,Fuzzy_retail,50,naiveFFIMiner,80000,10,4.4785,3298.75,6006.69,0.0,0.0,,1695.0,6298471000.0


In [3]:
connect = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_connect.csv"
connect_sup = [8_000_000, 8_500_000, 9_000_000, 9_500_000, 10_000_000]

run_pipeline(connect, sf=25, supports_quant_int=connect_sup, force=True)

[download] Using cached: /home/tarun/cuda_pami/data/fuzzy/Fuzzy_connect/Fuzzy_connect.csv
[prep] Using existing: Fuzzy_connect_SF25_floating.csv
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_connect/Fuzzy_connect_SF25_fixed.parquet 8000000 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_connect/SF25/patterns_cuffi_rmm_managed_off_nopin_sup8000000.txt --allocator rmm_managed --gds off --managed-prefetch
[run] exit=0  log=cuffi_sup8000000.out  err=cuffi_sup8000000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.naiveFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_connect/Fuzzy_connect_SF25_floating.parquet 800000 --quant-mult 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_connect/SF25/patterns_naive_sup8000000.txt
[run] exit=1  log=naive_sup8000000.out  err=naive_sup8000000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuF

Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,pool_used_mb,pool_total_mb,rmm_peak_mb,patterns_found,gpu_mem_bytes
0,Fuzzy_connect,25,cuFFIMiner,8000000,10,14.4815,723.72,5868.69,0.0,0.0,4984.99,1062.0,6153767000.0
1,Fuzzy_connect,25,naiveFFIMiner,8000000,10,,,,,,,,
2,Fuzzy_connect,25,cuFFIMiner,8500000,10,9.3207,726.79,5526.69,0.0,0.0,4380.99,689.0,5795154000.0
3,Fuzzy_connect,25,naiveFFIMiner,8500000,10,9.8458,4664.95,6016.69,0.0,0.0,,689.0,6308957000.0
4,Fuzzy_connect,25,cuFFIMiner,9000000,10,6.3639,724.88,5540.69,0.0,0.0,4380.99,457.0,5809835000.0
5,Fuzzy_connect,25,naiveFFIMiner,9000000,10,6.9055,4657.63,6022.69,0.0,0.0,,457.0,6315248000.0
6,Fuzzy_connect,25,cuFFIMiner,9500000,10,4.4729,723.84,5426.69,0.0,0.0,4331.47,303.0,5690297000.0
7,Fuzzy_connect,25,naiveFFIMiner,9500000,10,5.0755,4660.43,6008.69,0.0,0.0,,303.0,6300568000.0
8,Fuzzy_connect,25,cuFFIMiner,10000000,10,3.4289,733.86,5504.69,0.0,0.0,4279.97,211.0,5772086000.0
9,Fuzzy_connect,25,naiveFFIMiner,10000000,10,3.9879,4664.88,6014.69,0.0,0.0,,211.0,6306860000.0


In [4]:
kosarak = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_kosarak.csv"
kosarak_sup = [250_000, 300_000, 350_000, 400_000, 450_000, 500_000, 550_000, 600_000, 650_000, 700_000]

run_pipeline(kosarak, sf=10, supports_quant_int=kosarak_sup, force=True)


[download] Using cached: /home/tarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak.csv
[prep] Using existing: Fuzzy_kosarak_SF10_floating.csv
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF10_fixed.parquet 250000 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_kosarak/SF10/patterns_cuffi_rmm_managed_off_nopin_sup250000.txt --allocator rmm_managed --gds off --managed-prefetch
[run] exit=0  log=cuffi_sup250000.out  err=cuffi_sup250000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.naiveFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF10_floating.parquet 25000 --quant-mult 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_kosarak/SF10/patterns_naive_sup250000.txt
[run] exit=1  log=naive_sup250000.out  err=naive_sup250000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuFFIMiner 

Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,pool_used_mb,pool_total_mb,rmm_peak_mb,patterns_found,gpu_mem_bytes
0,Fuzzy_kosarak,10,cuFFIMiner,250000,10,10.8575,733.59,6940.69,0.0,0.0,5655.52,1974.0,7277841000.0
1,Fuzzy_kosarak,10,naiveFFIMiner,250000,10,,,,,,,,
2,Fuzzy_kosarak,10,cuFFIMiner,300000,10,8.0456,735.09,6902.69,0.0,0.0,5520.66,1403.0,7237995000.0
3,Fuzzy_kosarak,10,naiveFFIMiner,300000,10,,,,,,,,
4,Fuzzy_kosarak,10,cuFFIMiner,350000,10,6.3677,730.06,6720.69,0.0,0.0,5385.24,1048.0,7047154000.0
5,Fuzzy_kosarak,10,naiveFFIMiner,350000,10,,,,,,,,
6,Fuzzy_kosarak,10,cuFFIMiner,400000,10,5.3734,734.15,6802.69,0.0,0.0,5267.26,823.0,7133137000.0
7,Fuzzy_kosarak,10,naiveFFIMiner,400000,10,,,,,,,,
8,Fuzzy_kosarak,10,cuFFIMiner,450000,10,4.6076,739.66,6488.69,0.0,0.0,5176.9,655.0,6803885000.0
9,Fuzzy_kosarak,10,naiveFFIMiner,450000,10,5.2888,5179.2,7388.69,0.0,0.0,,655.0,7747603000.0


In [2]:
pumsb = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_pumsb.csv"
pumsb_sup = [6_000_000, 5_500_000, 5_000_000, 4_500_000, 4_000_000]

run_pipeline(pumsb, sf=25, supports_quant_int=pumsb_sup, force=True)

[download] Using cached: /home/tarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb.csv
[prep] Using existing: Fuzzy_pumsb_SF25_floating.csv
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF25_fixed.parquet 6000000 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_pumsb/SF25/patterns_cuffi_rmm_managed_off_nopin_sup6000000.txt --allocator rmm_managed --gds off --managed-prefetch
[run] exit=0  log=cuffi_sup6000000.out  err=cuffi_sup6000000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.naiveFFIMiner /home/tarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF25_floating.parquet 600000 --quant-mult 10 -o /home/tarun/cuda_pami/results/fuzzy/Fuzzy_pumsb/SF25/patterns_naive_sup6000000.txt
[run] exit=0  log=naive_sup6000000.out  err=naive_sup6000000.err
[run]  /home/tarun/miniforge3/envs/rapids-25.08/bin/python -m src.algorithms.fuzzy.cuFFIMiner /home/taru

Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,pool_used_mb,pool_total_mb,rmm_peak_mb,patterns_found,gpu_mem_bytes
0,Fuzzy_pumsb,25,cuFFIMiner,6000000,10,7.0347,725.72,6517.06,0.0,0.0,5342.79,928.0,6833633000.0
1,Fuzzy_pumsb,25,naiveFFIMiner,6000000,10,7.7815,5711.48,7388.56,0.0,0.0,,928.0,7747467000.0
2,Fuzzy_pumsb,25,cuFFIMiner,5500000,10,12.6421,727.01,6324.56,0.0,0.0,5438.75,1744.0,6631782000.0
3,Fuzzy_pumsb,25,naiveFFIMiner,5500000,10,13.5672,5726.76,7388.56,0.0,0.0,,1744.0,7747467000.0
4,Fuzzy_pumsb,25,cuFFIMiner,5000000,10,24.7675,735.79,6556.56,0.0,0.0,5665.62,3384.0,6875051000.0
5,Fuzzy_pumsb,25,naiveFFIMiner,5000000,10,25.5514,5710.29,7390.56,0.0,0.0,,3384.0,7749564000.0
6,Fuzzy_pumsb,25,cuFFIMiner,4500000,10,53.0698,725.6,6696.56,0.0,0.0,5889.4,6983.0,7021852000.0
7,Fuzzy_pumsb,25,naiveFFIMiner,4500000,10,53.911,5727.79,7392.56,0.0,0.0,,6983.0,7751661000.0
8,Fuzzy_pumsb,25,cuFFIMiner,4000000,10,121.7131,747.18,7390.56,0.0,0.0,6084.39,15240.0,7749564000.0
9,Fuzzy_pumsb,25,naiveFFIMiner,4000000,10,122.2995,5720.69,7391.38,0.0,0.0,,15240.0,7750424000.0
