In [None]:
# pipeline: prep -> run (subprocess) -> collect -> plot
from __future__ import annotations

import os, sys, re, json, time, subprocess, textwrap
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import requests
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------
# Project paths
# -----------------------
PROJECT_ROOT = Path(os.getcwd()).parent.parent
DATA_DIR     = PROJECT_ROOT / "data" / "fuzzy"
RESULTS_DIR  = PROJECT_ROOT / "results" / "fuzzy"
for p in (DATA_DIR, RESULTS_DIR): p.mkdir(parents=True, exist_ok=True)

# -----------------------
# Step 1: Prep (URL + SF)
# -----------------------
def _filename_from_url(url: str) -> str:
    return Path(url.split("?")[0]).name

def _download(url: str, base_dir: Path) -> Path:
    filename = _filename_from_url(url)
    name_root = Path(filename).stem
    dst_dir = base_dir / name_root
    dst_dir.mkdir(parents=True, exist_ok=True)
    out = dst_dir / filename
    if out.exists():
        print(f"[download] Using cached: {out}")
        return out
    legacy = base_dir / filename
    if legacy.exists():
        out.write_bytes(legacy.read_bytes())
        print(f"[download] Moved legacy file -> {out}")
        return out
    print(f"[download] Fetch {url}")
    r = requests.get(url, timeout=60); r.raise_for_status()
    out.write_bytes(r.content)
    print(f"[download] Saved {out}")
    return out

def prepare_dataset(url: str, sf: int) -> Dict[str, Any]:
    if str(PROJECT_ROOT) not in sys.path:
        sys.path.insert(0, str(PROJECT_ROOT))

    original = _download(url, DATA_DIR)
    dataset_name = original.parent.name

    # 1) replicate floating text
    from scripts.fuzzy.replicate_file import replicate_file
    sf = max(1, int(sf))
    floating_text = original.with_name(f"{original.stem}_SF{sf}{original.suffix}")
    if floating_text.exists():
        print(f"[prep] Using existing: {floating_text.name}")
    else:
        replicate_file(str(original), sf, str(floating_text))
        print(f"[prep] Made: {floating_text.name}")

    # 2) make fixed text + quant_mult
    from scripts.fuzzy.fixedpoint_normalize import process_file
    fixed_test = process_file(str(floating_text))

    # 3) convert both to parquet
    from scripts.fuzzy.convert_to_parquet import convert_text_to_parquet
    fixed_parquet = convert_text_to_parquet(fixed_test)
    floating_parquet = convert_text_to_parquet(floating_text)

    quant_mult = int(Path(fixed_parquet).stem.split("_fixed_")[-1])
    return {
        "dataset_name": dataset_name,
        "original": str(original),
        "floating_text": str(floating_text),
        "floating_parquet": str(floating_parquet),
        "fixed_text": str(fixed_test),
        "fixed_parquet": str(fixed_parquet),
        "quant_mult": quant_mult,
    }

# -------------------------------------------------
# Step 2: Run miners in separate processes
# -------------------------------------------------
def _run_subprocess(args: List[str], log_path: Path, err_path: Path, cwd: Path, extra_env: Optional[Dict[str, str]]=None) -> int:
    log_path.parent.mkdir(parents=True, exist_ok=True)
    err_path.parent.mkdir(parents=True, exist_ok=True)

    env = os.environ.copy()
    env["PYTHONPATH"] = str(PROJECT_ROOT) + os.pathsep + env.get("PYTHONPATH", "")
    if extra_env:
        env.update({k: str(v) for k, v in extra_env.items()})
    print("[run] ", " ".join(args))
    with open(log_path, "w") as out, open(err_path, "w") as err:
        proc = subprocess.run(args, cwd=cwd, env=env, stdout=out, stderr=err, text=True)
    print(f"[run] exit={proc.returncode}  log={log_path}  err={err_path}")
    return proc.returncode

def run_cuffi_cli(fixed_parquet, quant_mult, sup_int, out_dir, allocator="rmm_managed", gds="off", pinned=True, managed_prefetch=True) -> Dict[str, Any]:
    out_dir.mkdir(parents=True, exist_ok=True)
    logs_dir = out_dir / "logs"
    patterns_path = out_dir / f"patterns_cuffi_{allocator}_{gds}_{'pin' if pinned else 'nopin'}_sup{sup_int}.txt"
    log_path      = logs_dir / f"cuffi_sup{sup_int}.out"
    err_path      = logs_dir / f"cuffi_sup{sup_int}.err"

    cmd = [
        sys.executable, "-m", "src.algorithms.fuzzy.cuFFIMiner",
        str(fixed_parquet), str(int(sup_int)), str(int(quant_mult)),
        "-o", str(patterns_path), "--allocator", allocator, "--gds", gds,
    ]
    rc = _run_subprocess(cmd, log_path, err_path, cwd=PROJECT_ROOT)
    return {"rc": rc, "patterns": str(patterns_path), "stdout": str(log_path), "stderr": str(err_path)}

def run_naive_cli(floating_text_or_parquet, quant_mult, sup_int, out_dir) -> Dict[str, Any]:
    out_dir.mkdir(parents=True, exist_ok=True)
    logs_dir = out_dir / "logs"
    sup_float = sup_int / max(1, int(quant_mult))
    patterns_path = out_dir / f"patterns_naive_floating_sup{sup_int}.txt"
    log_path      = logs_dir / f"naive_floating_sup{sup_int}.out"
    err_path      = logs_dir / f"naive_floating_sup{sup_int}.err"

    cmd = [
        sys.executable, "-m", "src.algorithms.fuzzy.naiveFFIMiner",
        str(floating_text_or_parquet), f"{sup_float:.12g}", "-o", str(patterns_path),
    ]
    rc = _run_subprocess(cmd, log_path, err_path, cwd=PROJECT_ROOT)
    return {"rc": rc, "patterns": str(patterns_path), "stdout": str(log_path), "stderr": str(err_path), "algorithm_tag": "naiveFFIMiner_floating"}

def run_naive_fixed_cli(fixed_parquet, quant_mult, sup_int, out_dir) -> Dict[str, Any]:
    out_dir.mkdir(parents=True, exist_ok=True)
    logs_dir = out_dir / "logs"
    sup_float = sup_int / max(1, int(quant_mult))
    patterns_path = out_dir / f"patterns_naive_fixed_sup{sup_int}.txt"
    log_path      = logs_dir / f"naive_fixed_sup{sup_int}.out"
    err_path      = logs_dir / f"naive_fixed_sup{sup_int}.err"

    cmd = [
        sys.executable, "-m", "src.algorithms.fuzzy.naiveFFIMiner",
        str(fixed_parquet), f"{sup_float:.12g}", "-o", str(patterns_path),
    ]
    rc = _run_subprocess(cmd, log_path, err_path, cwd=PROJECT_ROOT)
    return {"rc": rc, "patterns": str(patterns_path), "stdout": str(log_path), "stderr": str(err_path), "algorithm_tag": "naiveFFIMiner_fixed"}

def run_ffiminer_cli(fixed_text, sup_int, out_dir) -> Dict[str, Any]:
    out_dir.mkdir(parents=True, exist_ok=True)
    logs_dir = out_dir / "logs"
    patterns_path = out_dir / f"patterns_ffiminer_sup{sup_int}.txt"
    log_path      = logs_dir / f"ffiminer_sup{sup_int}.out"
    err_path      = logs_dir / f"ffiminer_sup{sup_int}.err"

    cmd = [
        sys.executable, "-m", "src.algorithms.fuzzy.ffiminer",
        str(fixed_text), str(int(sup_int)), "-o", str(patterns_path),
    ]
    rc = _run_subprocess(cmd, log_path, err_path, cwd=PROJECT_ROOT)
    return {"rc": rc, "patterns": str(patterns_path), "stdout": str(log_path), "stderr": str(err_path), "algorithm_tag": "ffiMiner"}

# ---------------------------------------
# Step 3: Parse logs -> metrics -> plots
# ---------------------------------------
_METRIC_PATTERNS = {
    "exec_time":      re.compile(r"Execution Time:\s*([0-9.]+)\s*seconds", re.I),
    "cpu_mem_mb":     re.compile(r"(?:Peak\s+)?CPU Memory(?: Usage)?:\s*([0-9.]+)\s*MB", re.I),
    "gpu_mem_mb":     re.compile(r"Peak GPU \(driver\) Used:\s*([0-9.]+)\s*MB", re.I),
    "patterns_found": re.compile(r"(?:Total\s+)?Patterns Found:\s*([0-9]+)", re.I),
}

def parse_metrics_from_log(log_path: Path) -> Dict[str, Optional[float]]:
    text = Path(log_path).read_text(errors="ignore")
    out: Dict[str, Optional[float]] = {}
    for k, rgx in _METRIC_PATTERNS.items():
        m = rgx.search(text)
        out[k] = float(m.group(1)) if m else None
    return out

def collect_results(dataset_name: str, sf: int, quant_mult: int, supports: List[int], ds_dir: Path) -> pd.DataFrame:
    logs_dir = ds_dir / "logs"
    rows: List[Dict[str, Any]] = []
    
    run_configs = [
        ("cuFFIMiner",             f"cuffi_sup{{sup}}.out"),
        ("naiveFFIMiner_floating", f"naive_floating_sup{{sup}}.out"),
        ("naiveFFIMiner_fixed",    f"naive_fixed_sup{{sup}}.out"),
        ("ffiMiner",               f"ffiminer_sup{{sup}}.out"),
    ]

    for sup in supports:
        for algo_name, log_template in run_configs:
            log_filename = log_template.format(sup=sup)
            log_path = logs_dir / log_filename

            print(f"[collect] Checking for {algo_name} log: {log_path}")
            
            # Legacy fallbacks
            if not log_path.exists() and algo_name == "cuFFIMiner":
                alt = ds_dir / log_filename
                if alt.exists(): log_path = alt
            if not log_path.exists() and algo_name == "naiveFFIMiner_floating":
                old = logs_dir / f"naive_sup{sup}.out"
                if old.exists(): log_path = old
            
            if log_path.exists():
                m = parse_metrics_from_log(log_path)
                final_algo_name = algo_name.replace("_floating", " (floating)").replace("_fixed", " (fixed)")
                rows.append({
                    "dataset": dataset_name, "sf": sf, 
                    "algorithm": final_algo_name,
                    "support_quant_int": sup, "quant_mult": quant_mult,
                    **m
                })

    return pd.DataFrame(rows)

# ---- plotting (PDF, LaTeX-friendly) ----
plt.rcParams.update({
    "pdf.fonttype": 42, "ps.fonttype": 42, "figure.dpi": 150,
    "font.size": 11, "axes.titlesize": 12, "axes.labelsize": 11, "legend.fontsize": 9,
})

_LABELS = {
    "exec_time": "Execution Time (s)",
    "cpu_mem_mb": "Peak CPU Memory (MB)",
    "patterns_found": "Patterns Found",
}

def _plot_metric(df: pd.DataFrame, metric: str, out_dir: Path, dataset_name: str):
    if metric not in df.columns: return
    dfm = df.dropna(subset=[metric])
    if dfm.empty: return
    fig, ax = plt.subplots(figsize=(5.0, 5.0))
    
    algo_order = ["cuFFIMiner", "naiveFFIMiner (floating)", "naiveFFIMiner (fixed)", "ffiMiner"]
    colors = {"cuFFIMiner": "C0", "naiveFFIMiner (floating)": "C1", "naiveFFIMiner (fixed)": "C2", "ffiMiner": "C3"}
    markers = {"cuFFIMiner": "o", "naiveFFIMiner (floating)": "o", "naiveFFIMiner (fixed)": "x", "ffiMiner": "s"}

    for algo in algo_order:
        sub = dfm[dfm["algorithm"] == algo]
        if not sub.empty:
            sub = sub.sort_values("support_quant_int")
            ax.plot(sub["support_quant_int"].values, sub[metric].values, 
                    marker=markers.get(algo, "."), label=algo, color=colors.get(algo))
            
    ax.set_xlabel("Support Threshold")
    # y axis log
    ax.set_yscale("log", base=10)
    ax.set_ylabel(_LABELS.get(metric, metric))
    ax.set_title(f"{dataset_name} â€” {_LABELS.get(metric, metric)}")
    ax.grid(alpha=0.25, linestyle=":")
    ax.legend(loc="best")
    fig.tight_layout()
    out_dir.mkdir(parents=True, exist_ok=True)
    pdf = out_dir / f"{dataset_name}_{metric}.pdf"
    fig.savefig(pdf, format="pdf"); plt.close(fig)
    print(f"[plot] wrote {pdf}")

def plot_all(metrics_df: pd.DataFrame, dataset_name: str, figs_dir: Path, metrics: Optional[List[str]]=None):
    ms = metrics or ["exec_time", "cpu_mem_mb", "patterns_found"]
    for m in ms: _plot_metric(metrics_df, m, figs_dir, dataset_name)
    print("[plot] done.")

# ----------------------------------------
# Orchestrator
# ----------------------------------------
def run_pipeline(
    dataset_url: str, sf: int, supports_quant_int: List[int], *,
    cuffi_allocator: str = "rmm_device", cuffi_gds: str = "off",
    cuffi_pinned: bool = False, cuffi_prefetch: bool = True, force: bool = False,
) -> pd.DataFrame:
    prep = prepare_dataset(dataset_url, sf)
    print(prep)
    
    dataset = prep["dataset_name"]; quant_mult = prep["quant_mult"]
    ds_dir = RESULTS_DIR / dataset / f"SF{sf}"
    logs_dir = ds_dir / "logs"; logs_dir.mkdir(parents=True, exist_ok=True)

    cpu_supports = set(sorted(supports_quant_int, reverse=True)[:1])

    for sup in supports_quant_int:
        # 1. cuFFIMiner (Always run all)
        cuffi_log = logs_dir / f"cuffi_sup{sup}.out"
        if (not cuffi_log.exists()) or force:
            run_cuffi_cli(prep["fixed_parquet"], quant_mult, sup, ds_dir,
                          cuffi_allocator, cuffi_gds, cuffi_pinned, cuffi_prefetch)
        else:
            print(f"[skip] cuFFI sup={sup} (log exists)")

        # 2. naiveFFIMiner (floating)
        naive_flt_log = logs_dir / f"naive_floating_sup{sup}.out"
        if (not naive_flt_log.exists() and not (logs_dir / f"naive_sup{sup}.out").exists()) or force:
                run_naive_cli(prep["floating_parquet"], quant_mult, sup, ds_dir)
        else:
            print(f"[skip] naive (floating) sup={sup} (log exists)")

        # 3. naiveFFIMiner (fixed)
        naive_fix_log = logs_dir / f"naive_fixed_sup{sup}.out"
        if (not naive_fix_log.exists()) or force:
            run_naive_fixed_cli(prep["fixed_parquet"], 1, sup, ds_dir)
        else:
            print(f"[skip] naive (fixed) sup={sup} (log exists)")

        # CPU Miners (Top 2 largest only)
        if sup in cpu_supports:


            # 4. ffiMiner (CPU fixed text)
            ffi_log = logs_dir / f"ffiminer_sup{sup}.out"
            if (not ffi_log.exists()) or force:
                print(f"[info] Running ffiMiner for sup={sup}...")
                run_ffiminer_cli(prep["fixed_text"], sup, ds_dir)
            else:
                print(f"[skip] ffiMiner sup={sup} (log exists)")

    # Collect & Plot
    df = collect_results(dataset, sf, quant_mult, supports_quant_int, ds_dir)
    metrics_csv = ds_dir / f"metrics_SF{sf}.csv"
    df.to_csv(metrics_csv, index=False)
    print(f"[metrics] saved {metrics_csv}")

    plot_all(df, dataset, ds_dir / "figures")
    return df

In [2]:
retail = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_retail.csv"
# retail_sup = [25_000, 50_000, 60_000, 70_000, 80_000, 90_000, 100_000]
retail_sup = [80,90,100,110,1000]

run_pipeline(retail, sf=1, supports_quant_int=retail_sup, force=False)

sf = 10
retail_sup = [x * sf for x in retail_sup]
run_pipeline(retail, sf=sf, supports_quant_int=retail_sup, force=False)

[download] Using cached: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail.csv
[prep] Using existing: Fuzzy_retail_SF1.csv
Processing Fuzzy_retail_SF1.csv...
Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1_fixed_10.csv...
[convert] method=cudf rows=1256571 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1_fixed_10.parquet
[convert] method=cudf rows=1256571 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1.parquet
{'dataset_name': 'Fuzzy_retail', 'original': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail.csv', 'floating_text': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1.csv', 'floating_parquet': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1.parquet', 'fixed_text': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1_fixed_10.csv', '

Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,patterns_found
0,Fuzzy_retail,10,cuFFIMiner,800,10,2.5784,2409.14,41092.56,136556.0
1,Fuzzy_retail,10,naiveFFIMiner (floating),800,10,2.1689,2409.14,41064.56,41169.0
2,Fuzzy_retail,10,naiveFFIMiner (fixed),800,10,2.8053,2458.25,41064.56,136556.0
3,Fuzzy_retail,10,cuFFIMiner,900,10,2.5631,2409.14,41092.56,114415.0
4,Fuzzy_retail,10,naiveFFIMiner (floating),900,10,2.177,2409.14,41064.56,35185.0
5,Fuzzy_retail,10,naiveFFIMiner (fixed),900,10,2.6963,2409.14,41064.56,114415.0
6,Fuzzy_retail,10,cuFFIMiner,1000,10,2.4556,2409.14,41092.56,98151.0
7,Fuzzy_retail,10,naiveFFIMiner (floating),1000,10,2.1097,2432.1,41064.56,30615.0
8,Fuzzy_retail,10,naiveFFIMiner (fixed),1000,10,2.6807,2442.9,41064.56,98151.0
9,Fuzzy_retail,10,cuFFIMiner,1100,10,2.2112,2409.14,41092.56,85504.0


In [3]:
kosarak = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_kosarak.csv"
kosarak_sup = [4000,4500,5000,5500,100000]

run_pipeline(kosarak, sf=1, supports_quant_int=kosarak_sup, force=False)

sf = 10
kosarak_sup = [x * sf for x in kosarak_sup]
run_pipeline(kosarak, sf=sf, supports_quant_int=kosarak_sup, force=False)


[download] Using cached: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak.csv
[prep] Using existing: Fuzzy_kosarak_SF1.csv
Processing Fuzzy_kosarak_SF1.csv...


Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1_fixed_10.csv...
[convert] method=cudf rows=0 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1_fixed_10.parquet
[convert] method=cudf rows=11041780 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1.parquet
{'dataset_name': 'Fuzzy_kosarak', 'original': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak.csv', 'floating_text': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1.csv', 'floating_parquet': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1.parquet', 'fixed_text': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1_fixed_10.csv', 'fixed_parquet': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1_fixed_10.parquet', 'quant_mult': 10}
[skip] cuFFI sup=4000 (log exists)
[sk

Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,patterns_found
0,Fuzzy_kosarak,10,cuFFIMiner,40000,10,13.2102,14088.52,41098.56,175185.0
1,Fuzzy_kosarak,10,naiveFFIMiner (floating),40000,10,8.6202,14289.19,41074.56,44885.0
2,Fuzzy_kosarak,10,naiveFFIMiner (fixed),40000,10,18.2972,14323.35,41074.56,175185.0
3,Fuzzy_kosarak,10,cuFFIMiner,45000,10,10.6116,14088.52,41098.56,97848.0
4,Fuzzy_kosarak,10,naiveFFIMiner (floating),45000,10,8.7254,14817.03,41074.56,28590.0
5,Fuzzy_kosarak,10,naiveFFIMiner (fixed),45000,10,15.3646,14203.06,41074.56,97848.0
6,Fuzzy_kosarak,10,cuFFIMiner,50000,10,8.5808,14088.52,41098.56,64760.0
7,Fuzzy_kosarak,10,naiveFFIMiner (floating),50000,10,7.7058,14088.52,41074.56,20393.0
8,Fuzzy_kosarak,10,naiveFFIMiner (fixed),50000,10,13.3272,14266.2,41074.56,64760.0
9,Fuzzy_kosarak,10,cuFFIMiner,55000,10,7.404,14088.52,41098.56,47495.0


In [None]:
pumsb = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_pumsb.csv"
pumsb_sup = [100000,110000,120000,130000,280000]

run_pipeline(pumsb, sf=1, supports_quant_int=pumsb_sup, force=False)

sf = 10
pumsb_sup = [x * sf for x in pumsb_sup]
run_pipeline(pumsb, sf=sf, supports_quant_int=pumsb_sup, force=False)

[download] Using cached: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb.csv
[prep] Using existing: Fuzzy_pumsb_SF1.csv
Processing Fuzzy_pumsb_SF1.csv...


Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1_fixed_10.csv...
[convert] method=cudf rows=4913349 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1_fixed_10.parquet
[convert] method=cudf rows=4913349 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1.parquet
{'dataset_name': 'Fuzzy_pumsb', 'original': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb.csv', 'floating_text': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1.csv', 'floating_parquet': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1.parquet', 'fixed_text': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1_fixed_10.csv', 'fixed_parquet': '/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1_fixed_10.parquet', 'quant_mult': 10}
[skip] cuFFI sup=100000 (log exists)
[skip] naive (floating) sup=10

In [None]:
t10 = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_T10I4D100K.csv"
t10_sup = [3000,3500,4000,4500,5000]

run_pipeline(t10, sf=1, supports_quant_int=t10_sup, force=False)

sf = 100

t10_sup = [x * sf for x in t10_sup]
run_pipeline(t10, sf=sf, supports_quant_int=t10_sup, force=False)


[download] Fetch https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_T10I4D100K.csv
[download] Saved /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K.csv
[replicate] Wrote: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1.csv (SF=1)
[prep] Made: Fuzzy_T10I4D100K_SF1.csv
Processing Fuzzy_T10I4D100K_SF1.csv...
Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1_fixed_10.csv...
Completed: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1_fixed_10.csv
[convert] method=cudf rows=1091692 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1_fixed_10.parquet
[convert] method=cudf rows=1091692 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1.parquet
{'dataset_name': 'Fuzzy_T10I4D100K', 'original': '/export/home1/ltarun/cuda_pami/data/fuzzy/F

Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,patterns_found
0,Fuzzy_T10I4D100K,100,cuFFIMiner,300000,10,2.2295,14640.06,41068.56,2325.0
1,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),300000,10,6.15,14640.06,41044.56,2045.0
2,Fuzzy_T10I4D100K,100,naiveFFIMiner (fixed),300000,10,6.6838,14640.06,41044.56,2325.0
3,Fuzzy_T10I4D100K,100,cuFFIMiner,350000,10,2.2593,14640.06,41068.56,1557.0
4,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),350000,10,6.0322,14640.06,41044.56,1407.0
5,Fuzzy_T10I4D100K,100,naiveFFIMiner (fixed),350000,10,6.7029,14640.06,41044.56,1557.0
6,Fuzzy_T10I4D100K,100,cuFFIMiner,400000,10,2.1924,14640.06,41068.56,1154.0
7,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),400000,10,6.1527,14640.06,41044.56,1069.0
8,Fuzzy_T10I4D100K,100,naiveFFIMiner (fixed),400000,10,6.1526,14640.06,41044.56,1154.0
9,Fuzzy_T10I4D100K,100,cuFFIMiner,450000,10,2.0946,14640.06,41068.56,922.0
