In [8]:
#!/usr/bin/env python3
# pipeline: prep -> run (subprocess) -> collect -> plot (jpg+pgf) -> latex
from __future__ import annotations

import os, sys, re, json, time, subprocess, textwrap
import math
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple, Set

import requests
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------
# Project paths
# -----------------------
PROJECT_ROOT = Path(os.getcwd()).parent.parent
DATA_DIR     = PROJECT_ROOT / "data" / "fuzzy"
RESULTS_DIR  = PROJECT_ROOT / "results" / "fuzzy"
for p in (DATA_DIR, RESULTS_DIR): p.mkdir(parents=True, exist_ok=True)


class DatasetPreparer:
    """
    Handles all dataset preparation: downloading, replicating, and converting.
    """
    def __init__(self, project_root: Path, data_dir: Path):
        self.project_root = project_root
        self.data_dir = data_dir
        if str(self.project_root) not in sys.path:
            sys.path.insert(0, str(self.project_root))

    def _filename_from_url(self, url: str) -> str:
        return Path(url.split("?")[0]).name

    def _download(self, url: str) -> Path:
        filename = self._filename_from_url(url)
        name_root = Path(filename).stem
        dst_dir = self.data_dir / name_root
        dst_dir.mkdir(parents=True, exist_ok=True)
        out = dst_dir / filename
        if out.exists():
            print(f"[download] Using cached: {out}")
            return out
        legacy = self.data_dir / filename
        if legacy.exists():
            out.write_bytes(legacy.read_bytes())
            print(f"[download] Moved legacy file -> {out}")
            return out
        print(f"[download] Fetch {url}")
        r = requests.get(url, timeout=60); r.raise_for_status()
        out.write_bytes(r.content)
        print(f"[download] Saved {out}")
        return out

    def prepare(self, url: str, sf: int) -> Tuple[Dict[str, Path], int, str]:
        """
        Runs the full dataset preparation pipeline.
        Returns (paths_dict, quant_mult, dataset_name)
        """
        original = self._download(url)
        dataset_name = original.parent.name
        sf = max(1, int(sf))

        # 1) replicate floating text
        from scripts.fuzzy.replicate_file import replicate_file
        floating_text = original.with_name(f"{original.stem}_SF{sf}{original.suffix}")
        if floating_text.exists():
            print(f"[prep] Using existing: {floating_text.name}")
        else:
            replicate_file(str(original), sf, str(floating_text))
            print(f"[prep] Made: {floating_text.name}")

        # 2) make fixed text + quant_mult
        from scripts.fuzzy.fixedpoint_normalize import process_file
        fixed_text = process_file(str(floating_text))

        # 3) convert both to parquet
        from scripts.fuzzy.convert_to_parquet import convert_text_to_parquet
        fixed_parquet = convert_text_to_parquet(fixed_text)
        floating_parquet = convert_text_to_parquet(floating_text)

        quant_mult = int(Path(fixed_parquet).stem.split("_fixed_")[-1])
        
        paths = {
            "original": original,
            "floating_text": floating_text,
            "floating_parquet": floating_parquet,
            "fixed_text": fixed_text,
            "fixed_parquet": fixed_parquet,
        }
        print(f"[prep] Dataset '{dataset_name}' (SF={sf}) prepared.")
        return paths, quant_mult, dataset_name


class ExperimentRunner:
    """
    Runs all mining subprocesses for a given prepared dataset.
    """
    def __init__(self, project_root: Path, ds_dir: Path, 
                 dataset_paths: Dict[str, Path], quant_mult: int):
        self.project_root = project_root
        self.ds_dir = ds_dir
        self.logs_dir = self.ds_dir / "logs"
        self.paths = dataset_paths
        self.quant_mult = quant_mult
        
        self.logs_dir.mkdir(parents=True, exist_ok=True)
        
    def _run_subprocess(self, args: List[str], log_path: Path, err_path: Path, 
                        extra_env: Optional[Dict[str, str]] = None) -> int:
        env = os.environ.copy()
        env["PYTHONPATH"] = str(self.project_root) + os.pathsep + env.get("PYTHONPATH", "")
        if extra_env:
            env.update({k: str(v) for k, v in extra_env.items()})
            
        print("[run] ", " ".join(args))
        with open(log_path, "w") as out, open(err_path, "w") as err:
            proc = subprocess.run(args, cwd=self.project_root, env=env, 
                                  stdout=out, stderr=err, text=True)
        print(f"[run] exit={proc.returncode}  log={log_path}  err={err_path}")
        return proc.returncode

    def run_cuffi(self, sup_int: int, params: Dict[str, Any]):
        allocator = params.get("allocator", "rmm_managed")
        gds = params.get("gds", "off")
        
        patterns_path = self.ds_dir / f"patterns_cuffi_{allocator}_{gds}_sup{sup_int}.txt"
        log_path = self.logs_dir / f"cuffi_sup{sup_int}.out"
        err_path = self.logs_dir / f"cuffi_sup{sup_int}.err"

        cmd = [
            sys.executable, "-m", "src.algorithms.fuzzy.cuFFIMiner",
            str(self.paths["fixed_parquet"]), str(int(sup_int)), str(int(self.quant_mult)),
            "-o", str(patterns_path), "--allocator", allocator, "--gds", gds,
        ]
        self._run_subprocess(cmd, log_path, err_path)

    def run_naive_floating(self, sup_int: int):
        sup_float = sup_int / max(1, int(self.quant_mult))
        patterns_path = self.ds_dir / f"patterns_naive_floating_sup{sup_int}.txt"
        log_path = self.logs_dir / f"naive_floating_sup{sup_int}.out"
        err_path = self.logs_dir / f"naive_floating_sup{sup_int}.err"

        cmd = [
            sys.executable, "-m", "src.algorithms.fuzzy.naiveFFIMiner",
            str(self.paths["floating_parquet"]), f"{sup_float:.12g}", "-o", str(patterns_path),
        ]
        self._run_subprocess(cmd, log_path, err_path)

    def run_naive_fixed(self, sup_int: int):
        sup_float = sup_int / max(1, int(self.quant_mult))
        patterns_path = self.ds_dir / f"patterns_naive_fixed_sup{sup_int}.txt"
        log_path = self.logs_dir / f"naive_fixed_sup{sup_int}.out"
        err_path = self.logs_dir / f"naive_fixed_sup{sup_int}.err"
        
        cmd = [
            sys.executable, "-m", "src.algorithms.fuzzy.naiveFFIMiner",
            str(self.paths["fixed_parquet"]), f"{sup_float:.12g}", "-o", str(patterns_path),
        ]
        self._run_subprocess(cmd, log_path, err_path)

    def run_ffiminer(self, sup_int: int):
        patterns_path = self.ds_dir / f"patterns_ffiminer_sup{sup_int}.txt"
        log_path = self.logs_dir / f"ffiminer_sup{sup_int}.out"
        err_path = self.logs_dir / f"ffiminer_sup{sup_int}.err"

        cmd = [
            sys.executable, "-m", "src.algorithms.fuzzy.ffiminer",
            str(self.paths["fixed_text"]), str(int(sup_int)), "-o", str(patterns_path),
        ]
        self._run_subprocess(cmd, log_path, err_path)

    def run_all_experiments(self, supports: List[int], 
                            cuffi_params: Dict[str, Any], force: bool = False):
        """Orchestrates running all miners for all specified supports."""
        
        cpu_supports: Set[int] = set(sorted(supports, reverse=True)[:1])

        for sup in supports:
            # 1. cuFFIMiner
            cuffi_log = self.logs_dir / f"cuffi_sup{sup}.out"
            if (not cuffi_log.exists()) or force:
                self.run_cuffi(sup, cuffi_params)
            else:
                print(f"[skip] cuFFI sup={sup} (log exists)")

            # 2. naiveFFIMiner (floating)
            naive_flt_log = self.logs_dir / f"naive_floating_sup{sup}.out"
            legacy_naive_log = self.logs_dir / f"naive_sup{sup}.out"
            if (not naive_flt_log.exists() and not legacy_naive_log.exists()) or force:
                    self.run_naive_floating(sup)
            else:
                print(f"[skip] naive (floating) sup={sup} (log exists)")

            # 3. naiveFFIMiner (fixed)
            naive_fix_log = self.logs_dir / f"naive_fixed_sup{sup}.out"
            if (not naive_fix_log.exists()) or force:
                self.run_naive_fixed(sup, 1)
            else:
                print(f"[skip] naive (fixed) sup={sup} (log exists)")

            # 4. CPU Miners (Top 1 largest support only)
            if sup in cpu_supports:
                ffi_log = self.logs_dir / f"ffiminer_sup{sup}.out"
                if (not ffi_log.exists()) or force:
                    print(f"[info] Running ffiMiner for sup={sup}...")
                    self.run_ffiminer(sup)
                else:
                    print(f"[skip] ffiMiner sup={sup} (log exists)")
        
        print("[run] All experiments complete.")


class ResultsCollector:
    """
    Parses all log files, collects metrics, and saves to a CSV.
    """
    _METRIC_PATTERNS = {
        "exec_time":      re.compile(r"Execution Time:\s*([0-9.]+)\s*seconds", re.I),
        "cpu_mem_mb":     re.compile(r"(?:Peak\s+)?CPU Memory(?: Usage)?:\s*([0-9.]+)\s*MB", re.I),
        "gpu_mem_mb":     re.compile(r"Peak GPU \(driver\) Used:\s*([0_9.]+)\s*MB", re.I),
        "patterns_found": re.compile(r"^(?:Total\s+)?Patterns Found:\s*(\d+)$", re.I | re.M)
    }
    
    _RUN_CONFIGS = [
        ("cuFFIMiner",             f"cuffi_sup{{sup}}.out"),
        ("naiveFFIMiner_floating", f"naive_floating_sup{{sup}}.out"),
        # ("naiveFFIMiner_fixed",    f"naive_fixed_sup{{sup}}.out"),
        ("ffiMiner",               f"ffiminer_sup{{sup}}.out"),
    ]

    def __init__(self, dataset_name: str, sf: int, quant_mult: int, 
                 supports: List[int], ds_dir: Path):
        self.dataset_name = dataset_name
        self.sf = sf
        self.quant_mult = quant_mult
        self.supports = supports
        self.ds_dir = ds_dir
        self.logs_dir = self.ds_dir / "logs"

    def _parse_metrics_from_log(self, log_path: Path) -> Dict[str, Optional[float]]:
        text = log_path.read_text(errors="ignore")
        out: Dict[str, Optional[float]] = {}
        for k, rgx in self._METRIC_PATTERNS.items():
            m = rgx.search(text)
            # Handle potential underscores in numbers
            out[k] = float(m.group(1).replace('_', '')) if m else None
        return out

    def collect(self) -> pd.DataFrame:
        """Collects all metrics from all logs into a single DataFrame."""
        rows: List[Dict[str, Any]] = []

        for sup in self.supports:
            for algo_name, log_template in self._RUN_CONFIGS:
                log_filename = log_template.format(sup=sup)
                log_path = self.logs_dir / log_filename

                if not log_path.exists() and algo_name == "cuFFIMiner":
                    alt = self.ds_dir / log_filename
                    if alt.exists(): log_path = alt
                if not log_path.exists() and algo_name == "naiveFFIMiner_floating":
                    old = self.logs_dir / f"naive_sup{sup}.out"
                    if old.exists(): log_path = old
            
                if log_path.exists():
                    print(f"[collect] Parsing {log_path.name}")
                    m = self._parse_metrics_from_log(log_path)
                    final_algo_name = algo_name.replace("_floating", " (floating)").replace("_fixed", " (fixed)")
                    rows.append({
                        "dataset": self.dataset_name, "sf": self.sf, 
                        "algorithm": final_algo_name,
                        "support_quant_int": sup, "quant_mult": self.quant_mult,
                        **m
                    })
                else:
                    print(f"[collect] Skipping {algo_name} (sup={sup}): log not found.")

        return pd.DataFrame(rows)

    def collect_and_save(self) -> pd.DataFrame:
        """Collects metrics, saves to CSV, and returns the DataFrame."""
        df = self.collect()
        if df.empty:
            print("[collect] No data collected, DataFrame is empty.")
            return df
            
        metrics_csv = self.ds_dir / f"metrics_SF{self.sf}.csv"
        df.to_csv(metrics_csv, index=False)
        print(f"[collect] Metrics saved to {metrics_csv}")
        return df


class PlotGenerator:
    """
    Generates static image plots (JPG) using Matplotlib.
    """
    _LABELS = {
        "exec_time": "Execution Time (s)",
        "patterns_found": "Patterns Found",
    }
    
    _ALGO_ORDER = ["cuFFIMiner", "naiveFFIMiner (floating)", "naiveFFIMiner (fixed)", "ffiMiner"]
    _COLORS = {"cuFFIMiner": "C0", "naiveFFIMiner (floating)": "C1", "naiveFFIMiner (fixed)": "C2", "ffiMiner": "C3"}
    _MARKERS = {"cuFFIMiner": "o", "naiveFFIMiner (floating)": "o", "naiveFFIMiner (fixed)": "x", "ffiMiner": "s"}

    def __init__(self, df: pd.DataFrame, dataset_name: str, sf: int, ds_dir: Path):
        self.df = df
        self.dataset_name = dataset_name
        self.sf = sf
        self.jpg_figs_dir = ds_dir / "figures_jpg"
        self.jpg_figs_dir.mkdir(parents=True, exist_ok=True)
        
        # Configure Matplotlib for small, high-DPI plots
        plt.rcParams.update({
            "backend": "agg", # Use non-interactive backend for scripts
            "pdf.fonttype": 42,
            "ps.fonttype": 42,
            "figure.dpi": 500,
            "font.size": 8,
            "axes.titlesize": 8,
            "axes.labelsize": 7,
            "xtick.labelsize": 7,
            "ytick.labelsize": 7,
            "legend.fontsize": 6, # Small legend
        })

    @property
    def pretty_names(self):
        # Use pretty names for plot legend
        return {
            "cuFFIMiner": "cuFFPM",
            "naiveFFIMiner (floating)": "Naive",
            "naiveFFIMiner (fixed)": "Naive (Fixed)",
            "ffiMiner": "FFI-M"
        }

    def _save_jpg_plot(self, metric: str):
        """Generates and saves a single JPG plot WITHOUT a legend."""
        if metric not in self.df.columns: return
        dfm = self.df.dropna(subset=[metric])
        if dfm.empty: return

        fig, ax = plt.subplots(figsize=(1, 1)) # Small figure size
        
        for algo in self._ALGO_ORDER:
            sub = dfm[dfm["algorithm"] == algo]
            if not sub.empty:
                sub = sub.sort_values("support_quant_int")
                ax.plot(sub["support_quant_int"].values, sub[metric].values, 
                        marker=self._MARKERS.get(algo, "."), 
                        color=self._COLORS.get(algo),
                        markersize=3, 
                        linewidth=1)
                        
        ax.set_xlabel("Support Threshold")
        ax.set_ylabel(self._LABELS.get(metric, metric))
        
        ax.set_xscale("log", base=10)
        ax.set_yscale("log", base=10)
        
        ax.grid(alpha=0.25, linestyle=":")
        fig.tight_layout(pad=0.1)
        
        jpg_path = self.jpg_figs_dir / f"{self.dataset_name}_{metric}_SF{self.sf}.jpg"
        fig.savefig(jpg_path, format="jpg", dpi=500, bbox_inches='tight')
        plt.close(fig)
        print(f"[plot] wrote JPG: {jpg_path}")

    def generate_jpg_legend(self):
        """Generates a standalone JPG image for the legend."""
        
        legend_handles = []
        legend_labels = []

        for algo in self._ALGO_ORDER:
            line = plt.Line2D([], [], color=self._COLORS.get(algo), 
                              marker=self._MARKERS.get(algo, '.'), 
                              markersize=3, linewidth=1, linestyle='None')
            legend_handles.append(line)
            legend_labels.append(self.pretty_names.get(algo, algo))

        fig_legend = plt.figure(figsize=(4.0, 0.5))
        ax_legend = fig_legend.add_subplot(111)

        legend = ax_legend.legend(legend_handles, legend_labels, 
                                  loc='center', ncol=len(legend_labels),
                                  frameon=False, fontsize=6)
        
        ax_legend.set_axis_off()
        fig_legend.tight_layout(pad=0.1)

        legend_path = self.jpg_figs_dir / f"legend_SF{self.sf}.jpg"
        fig_legend.savefig(legend_path, format="jpg", dpi=500, bbox_inches='tight')
        plt.close(fig_legend)
        print(f"[plot] wrote JPG legend: {legend_path}")

    def generate_all_jpg_plots(self, metrics: Optional[List[str]] = None):
        """Generates .jpg files for all specified metrics and a separate legend."""
        ms = metrics or ["exec_time", "patterns_found"]
        for m in ms:
            self._save_jpg_plot(m)
        self.generate_jpg_legend()
        print("[plot] JPG plot generation done.")


class MatplotlibPGFGenerator:
    """
    Generates static PGF plots using Matplotlib's PGF backend.
    """
    _LABELS = {
        "exec_time": "Execution Time (s)",
        "patterns_found": "Patterns Found",
    }
    
    _ALGO_ORDER = ["cuFFIMiner", "naiveFFIMiner (floating)", "naiveFFIMiner (fixed)", "ffiMiner"]
    _COLORS = {"cuFFIMiner": "C0", "naiveFFIMiner (floating)": "C1", "naiveFFIMiner (fixed)": "C2", "ffiMiner": "C3"}
    _MARKERS = {"cuFFIMiner": "o", "naiveFFIMiner (floating)": "o", "naiveFFIMiner (fixed)": "x", "ffiMiner": "s"}

    def __init__(self, df: pd.DataFrame, dataset_name: str, sf: int, ds_dir: Path):
        self.df = df
        self.dataset_name = dataset_name
        self.sf = sf
        self.pgf_figs_dir = ds_dir / "figures_pgf"
        self.pgf_figs_dir.mkdir(parents=True, exist_ok=True)
        
        # Configure Matplotlib for PGF backend
        plt.rcParams.update({
            "backend": "pgf",
            "font.family": "serif", # Use document font
            "font.size": 8,
            "axes.labelsize": 7,
            "xtick.labelsize": 7,
            "ytick.labelsize": 7,
            "legend.fontsize": 6,
            "pgf.texsystem": "pdflatex",
            "pgf.preamble": (
                r"\usepackage[utf8x]{inputenc}"
                r"\usepackage[T1]{fontenc}"
                r"\usepackage{amssymb}" # For markers
            ),
        })

    @property
    def pretty_names(self):
        return {
            "cuFFIMiner": "cuFFPM",
            "naiveFFIMiner (floating)": "Naive",
            "naiveFFIMiner (fixed)": "Naive (Fixed)",
            "ffiMiner": "FFI-M"
        }

    def _save_pgf_plot(self, metric: str):
        """Generates and saves a single PGF plot WITHOUT a legend."""
        if metric not in self.df.columns: return
        dfm = self.df.dropna(subset=[metric])
        if dfm.empty: return

        fig, ax = plt.subplots(figsize=(2, 2)) 
        
        for algo in self._ALGO_ORDER:
            sub = dfm[dfm["algorithm"] == algo]
            if not sub.empty:
                sub = sub.sort_values("support_quant_int")
                ax.plot(sub["support_quant_int"].values, sub[metric].values, 
                        marker=self._MARKERS.get(algo, "."), 
                        color=self._COLORS.get(algo),
                        markersize=3, 
                        linewidth=1)
                        
        ax.set_xlabel("Support Threshold")
        ax.set_ylabel(self._LABELS.get(metric, metric))
        
        ax.set_xscale("log", base=10)
        ax.set_yscale("log", base=10)
        
        ax.grid(alpha=0.25, linestyle=":")
        fig.tight_layout(pad=0.05)
        
        pgf_path = self.pgf_figs_dir / f"{self.dataset_name}_{metric}_SF{self.sf}.pgf"
        fig.savefig(pgf_path, format="pgf", bbox_inches='tight', pad_inches=0.05)
        plt.close(fig)
        print(f"[plot] wrote PGF: {pgf_path}")

    def generate_pgf_legend(self):
        """Generates a standalone PGF image for the legend."""
        
        legend_handles = []
        legend_labels = []

        for algo in self._ALGO_ORDER:
            line = plt.Line2D([], [], color=self._COLORS.get(algo), 
                              marker=self._MARKERS.get(algo, '.'), 
                              markersize=3, linewidth=1, linestyle='None')
            legend_handles.append(line)
            legend_labels.append(self.pretty_names.get(algo, algo))

        fig_legend = plt.figure(figsize=(4.0, 0.5))
        ax_legend = fig_legend.add_subplot(111)

        legend = ax_legend.legend(legend_handles, legend_labels, 
                                  loc='center', ncol=len(legend_labels),
                                  frameon=False, fontsize=6)
        
        ax_legend.set_axis_off()
        fig_legend.tight_layout(pad=0)

        legend_path = self.pgf_figs_dir / f"legend_SF{self.sf}.pgf"
        fig_legend.savefig(legend_path, format="pgf", bbox_inches='tight', pad_inches=0)
        plt.close(fig_legend)
        print(f"[plot] wrote PGF legend: {legend_path}")

    def generate_all_pgf_plots(self, metrics: Optional[List[str]] = None):
        """Generates .pgf files for all metrics and a separate legend."""
        ms = metrics or ["exec_time", "patterns_found"]
        for m in ms:
            self._save_pgf_plot(m)
        self.generate_pgf_legend()
        print("[plot] PGF plot generation done.")


class LatexGenerator:
    """
    Generates LaTeX files: tables and figure wrappers.
    """
    def __init__(self, df: pd.DataFrame, dataset_name: str, sf: int, ds_dir: Path):
        self.df = df
        self.dataset_name = dataset_name
        self.sf = sf
        self.ds_dir = ds_dir
        self.pgf_figs_dir = self.ds_dir / "figures_pgf" 

    def _format_sci_latex(self, n: float) -> str:
        """Helper for formatting table numbers."""
        if n == 0: return "$0$"
        if not pd.notna(n): return "---"
        
        exponent = int(math.floor(math.log10(abs(n))))
        mantissa = n / (10**exponent)
        
        if mantissa == 1.0:
            return f"$1 \\times 10^{{{exponent}}}$"
        else:
            return f"${mantissa:.1f} \\times 10^{{{exponent}}}$"

    def generate_latex_tables(self):
        """Generates a .tex file containing the results subtable."""
        
        table_algo_order = ["cuFFIMiner", "naiveFFIMiner (floating)", "ffiMiner"]
        table_pretty_names = {
            "cuFFIMiner": "cuFFPM",
            "naiveFFIMiner (floating)": "Naive",
            "ffiMiner": "FFI-M"
        }
        
        available_algos = [a for a in table_algo_order if a in self.df['algorithm'].unique()]
        num_algos = len(available_algos)

        if num_algos == 0:
            print("[latex] No algorithms found in data. Skipping table generation.")
            return
            
        output = [f"% --- LaTeX Table for {self.dataset_name} (SF={self.sf}) ---"]
        
        try:
            df_time = self.df.pivot(index="support_quant_int", columns="algorithm", values="exec_time")
            df_time = df_time[available_algos].rename(columns=table_pretty_names)
            df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
            df_time.columns = pd.MultiIndex.from_product([['Time (s)'], df_time.columns])

            df_patterns = self.df.pivot(index="support_quant_int", columns="algorithm", values="patterns_found")
            df_patterns = df_patterns[available_algos].rename(columns=table_pretty_names)
            df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")
            df_patterns.columns = pd.MultiIndex.from_product([['Patterns'], df_patterns.columns])

            df_combined = pd.concat([df_time, df_patterns], axis=1).sort_index(ascending=False)
            
            df_combined = df_combined.reset_index()
            df_combined['support_quant_int'] = df_combined['support_quant_int'].apply(self._format_sci_latex)
            df_combined = df_combined.set_index('support_quant_int')
            df_combined.index.name = "MinSup" 

            col_format = f"l{'r' * num_algos}{'r' * num_algos}"
            latex_tabular = df_combined.to_latex(
                index=True, header=True, na_rep="---", column_format=col_format,
                multicolumn=True, escape=False
            )

            col_start_time = 2
            col_end_time = col_start_time + num_algos - 1
            col_start_patterns = col_end_time + 1
            col_end_patterns = col_start_patterns + num_algos - 1
            cmid_rule = f"\\cmidrule(lr){{{col_start_time}-{col_end_time}}} \\cmidrule(lr){{{col_start_patterns}-{col_end_patterns}}}"

            lines = latex_tabular.split('\n')
            lines.insert(3, cmid_rule)
            latex_tabular_with_cmid = "\n".join(lines)

            subtable_caption = f"{self.dataset_name} (SF={self.sf})"
            subtable_label = f"tab:{self.dataset_name}_sf{self.sf}"
            final_latex = f"""\\begin{{subtable}}{{\\linewidth}}
\\centering
\\caption{{{subtable_caption}}}
\\label{{{subtable_label}}}
{latex_tabular_with_cmid}
\\end{{subtable}}%"""
            output.append(final_latex)
            
        except Exception as e:
            output.append(f"% Could not generate combined table for {self.dataset_name}: {e}")

        output.append(f"% --- End LaTeX Table ---")
        
        latex_path = self.ds_dir / f"tables_SF{self.sf}.tex"
        latex_path.write_text("\n".join(output))
        print(f"[latex] Generated LaTeX table: {latex_path}")

    def generate_latex_figures(self):
        """Generates a .tex file that wraps the .pgf plots in a 'figure' env."""
        output = [
            f"% --- LaTeX Figures for {self.dataset_name} (SF={self.sf}) ---",
            f"% Requires: \\usepackage{{pgf}}, \\usepackage{{subcaption}}",
        ]

        time_pgf = self.pgf_figs_dir / f"{self.dataset_name}_exec_time_SF{self.sf}.pgf"
        patterns_pgf = self.pgf_figs_dir / f"{self.dataset_name}_patterns_found_SF{self.sf}.pgf"
        legend_pgf = self.pgf_figs_dir / f"legend_SF{self.sf}.pgf"

        time_pgf_path_latex = f"{self.pgf_figs_dir.name}/{time_pgf.name}"
        patterns_pgf_path_latex = f"{self.pgf_figs_dir.name}/{patterns_pgf.name}"
        legend_pgf_path_latex = f"{self.pgf_figs_dir.name}/{legend_pgf.name}"

        has_time_plot = time_pgf.exists()
        has_patterns_plot = patterns_pgf.exists()

        if not has_time_plot and not has_patterns_plot:
            print(f"[latex] No .pgf plots found in {self.pgf_figs_dir}. Skipping figure wrapper.")
            return

        figure_caption = f"Performance and pattern count for {self.dataset_name} (SF={self.sf})."
        figure_label = f"fig:{self.dataset_name}_sf{self.sf}"
        subfig_width = "0.49\\linewidth"

        latex_content = f"""\\begin{{figure}}[tbh!]
    \\centering
    % Input the standalone PGF legend
    \\input{{{legend_pgf_path_latex}}}
    \\caption{{{figure_caption}}}
    \\label{{{figure_label}}}
"""

        if has_time_plot:
            latex_content += f"""
    \\begin{{subfigure}}[b]{{{subfig_width}}}
        \\centering
        % Input the PGF file directly
        \\input{{{time_pgf_path_latex}}}
        \\caption{{Execution Time}}
        \\label{{{figure_label}_time}}
    \\end{{subfigure}}"""
        
        if has_time_plot and has_patterns_plot:
            latex_content += "\n    \\hfill % Separator\n"

        if has_patterns_plot:
            latex_content += f"""
    \\begin{{subfigure}}[b]{{{subfig_width}}}
        \\centering
        % Input the PGF file directly
        \\input{{{patterns_pgf_path_latex}}}
        \\caption{{Patterns Found}}
        \\label{{{figure_label}_patterns}}
    \\end{{subfigure}}"""

        latex_content += "\n\\end{figure}"
        output.append(latex_content)
        output.append(f"% --- End LaTeX Figures ---")
        
        latex_path = self.ds_dir / f"figures_SF{self.sf}.tex"
        latex_path.write_text("\n".join(output))
        print(f"[latex] Generated LaTeX figures wrapper: {latex_path}")

    def generate_all(self):
        """Generates all LaTeX files (tables and wrappers)."""
        if self.df.empty:
            print("[latex] DataFrame is empty, skipping all file generation.")
            return
            
        print(f"[latex] Generating all LaTeX outputs for {self.dataset_name} (SF={self.sf})...")
        self.generate_latex_tables()
        self.generate_latex_figures()
        print(f"[latex] All LaTeX outputs generated in {self.ds_dir}")


class Orchestrator:
    """
    Wires all pipeline components together and runs the full process.
    """
    def __init__(self, config: Dict[str, Any], project_root: Path, 
                 data_dir: Path, results_dir: Path):
        self.config = config
        self.project_root = project_root
        self.data_dir = data_dir
        self.results_dir = results_dir
        
        self.url = config["url"]
        self.sf = config["sf"]
        self.supports = config["supports"]
        self.cuffi_params = config.get("cuffi_params", {})
        self.force_run = config.get("force_run", False)

    def run(self):
        """Executes the full prep -> run -> collect -> plot (jpg+pgf) -> latex pipeline."""
        print(f"--- üöÄ Starting Pipeline for {self.url} (SF={self.sf}) ---")
        
        # 1. Prep
        print("--- 1. Preparing Dataset ---")
        preparer = DatasetPreparer(self.project_root, self.data_dir)
        paths, quant_mult, dataset_name = preparer.prepare(self.url, self.sf)
        
        ds_dir = self.results_dir / dataset_name / f"SF{self.sf}"
        ds_dir.mkdir(parents=True, exist_ok=True)

        # 2. Run
        print("--- 2. Running Experiments ---")
        runner = ExperimentRunner(self.project_root, ds_dir, paths, quant_mult)
        runner.run_all_experiments(self.supports, self.cuffi_params, self.force_run)

        # 3. Collect
        print("--- 3. Collecting Results ---")
        collector = ResultsCollector(dataset_name, self.sf, quant_mult, self.supports, ds_dir)
        df = collector.collect_and_save()

        if df.empty:
            print("--- ‚ö†Ô∏è Pipeline Halting: No results were collected. ---")
            return None

        # 4. Generate JPG Plots
        print("--- 4. Generating JPG Plots (using Matplotlib) ---")
        plot_gen = PlotGenerator(df, dataset_name, self.sf, ds_dir)
        plot_gen.generate_all_jpg_plots()

        # 5. Generate PGF Plots
        print("--- 5. Generating PGF Plots (using Matplotlib) ---")
        # This will reconfigure plt.rcParams for PGF
        pgf_plot_gen = MatplotlibPGFGenerator(df, dataset_name, self.sf, ds_dir)
        pgf_plot_gen.generate_all_pgf_plots()

        # 6. Generate LaTeX Outputs (Tables and Figure Wrappers)
        print("--- 6. Generating LaTeX Outputs ---")
        latex_gen = LatexGenerator(df, dataset_name, self.sf, ds_dir)
        latex_gen.generate_all()

        print(f"--- ‚úÖ Pipeline Complete for {dataset_name} (SF={self.sf}) ---")
        return df


# ----------------------------------------
# High-level functional wrapper
# ----------------------------------------
def run_pipeline(
    dataset_url: str, 
    sf: int, 
    supports_quant_int: List[int], *,
    cuffi_params: Optional[Dict[str, Any]] = None,
    force: bool = False
) -> pd.DataFrame | None:
    """
    High-level functional wrapper to run the entire pipeline.
    """
    
    if cuffi_params is None:
        cuffi_params = {
            "allocator": "rmm_device",
            "gds": "off",
        }

    pipeline_config = {
        "url": dataset_url,
        "sf": sf,
        "supports": supports_quant_int,
        "cuffi_params": cuffi_params,
        "force_run": force
    }

    print(f"--- üöÄ Starting Pipeline for {dataset_url} (SF={sf}) ---")
    print(f"Supports: {supports_quant_int}")

    try:
        orchestrator = Orchestrator(
            config=pipeline_config,
            project_root=PROJECT_ROOT,
            data_dir=DATA_DIR,
            results_dir=RESULTS_DIR
        )
        results_df = orchestrator.run()
        
        if results_df is not None:
            print(f"--- ‚úÖ Pipeline Complete for {dataset_url} (SF={sf}) ---")
        else:
            print(f"--- ‚ö†Ô∏è Pipeline Finished (no results) for {dataset_url} (SF={sf}) ---")
            
        return results_df

    except ImportError as e:
        print(f"\n[ERROR] Failed to import a script: {e}", file=sys.stderr)
        print("Please ensure your PYTHONPATH is correct and all dependencies are installed.", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred during SF={sf} run: {e}", file=sys.stderr)
        import traceback
        traceback.print_exc()
        sys.exit(1)



In [9]:
retail = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_retail.csv"
retail_sup = [80,90,100,110,1000]

print(run_pipeline(retail, sf=1, supports_quant_int=retail_sup, force=False))

sf = 10
retail_sup = [x * sf for x in retail_sup]
run_pipeline(retail, sf=sf, supports_quant_int=retail_sup, force=False)

--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_retail.csv (SF=1) ---
Supports: [80, 90, 100, 110, 1000]
--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_retail.csv (SF=1) ---
--- 1. Preparing Dataset ---
[download] Using cached: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail.csv
[prep] Using existing: Fuzzy_retail_SF1.csv
Processing Fuzzy_retail_SF1.csv...
Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1_fixed_10.csv...
[convert] method=cudf rows=1256571 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1_fixed_10.parquet
[convert] method=cudf rows=1256571 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF1.parquet
[prep] Dataset 'Fuzzy_retail' (SF=1) prepared.
--- 2. Running Experiments ---
[skip] cuFFI sup=80 (log exists)
[skip] naive (float

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF10_fixed_10.csv...
[convert] method=cudf rows=12565710 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF10_fixed_10.parquet
[convert] method=cudf rows=12565710 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_retail/Fuzzy_retail_SF10.parquet
[prep] Dataset 'Fuzzy_retail' (SF=10) prepared.
--- 2. Running Experiments ---
[skip] cuFFI sup=800 (log exists)
[skip] naive (floating) sup=800 (log exists)
[skip] naive (fixed) sup=800 (log exists)
[skip] cuFFI sup=900 (log exists)
[skip] naive (floating) sup=900 (log exists)
[skip] naive (fixed) sup=900 (log exists)
[skip] cuFFI sup=1000 (log exists)
[skip] naive (floating) sup=1000 (log exists)
[skip] naive (fixed) sup=1000 (log exists)
[skip] cuFFI sup=1100 (log exists)
[skip] naive (floating) sup=1100 (log exists)
[skip] naive (fixed) sup=1100 (log exists)
[skip] cuFFI sup=10000 (

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,patterns_found
0,Fuzzy_retail,10,cuFFIMiner,800,10,2.5784,2409.14,,136556.0
1,Fuzzy_retail,10,naiveFFIMiner (floating),800,10,2.1689,2409.14,,41169.0
2,Fuzzy_retail,10,cuFFIMiner,900,10,2.5631,2409.14,,114415.0
3,Fuzzy_retail,10,naiveFFIMiner (floating),900,10,2.177,2409.14,,35185.0
4,Fuzzy_retail,10,cuFFIMiner,1000,10,2.4556,2409.14,,98151.0
5,Fuzzy_retail,10,naiveFFIMiner (floating),1000,10,2.1097,2432.1,,30615.0
6,Fuzzy_retail,10,cuFFIMiner,1100,10,2.2112,2409.14,,85504.0
7,Fuzzy_retail,10,naiveFFIMiner (floating),1100,10,1.954,2409.14,,27117.0
8,Fuzzy_retail,10,cuFFIMiner,10000,10,1.2986,2409.14,,3530.0
9,Fuzzy_retail,10,naiveFFIMiner (floating),10000,10,1.1328,2409.14,,1905.0


In [10]:
kosarak = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_kosarak.csv"
kosarak_sup = [4000,4500,5000,5500,100000]

run_pipeline(kosarak, sf=1, supports_quant_int=kosarak_sup, force=False)

sf = 10
kosarak_sup = [x * sf for x in kosarak_sup]
run_pipeline(kosarak, sf=sf, supports_quant_int=kosarak_sup, force=False)

--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_kosarak.csv (SF=1) ---
Supports: [4000, 4500, 5000, 5500, 100000]
--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_kosarak.csv (SF=1) ---
--- 1. Preparing Dataset ---
[download] Using cached: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak.csv
[prep] Using existing: Fuzzy_kosarak_SF1.csv
Processing Fuzzy_kosarak_SF1.csv...
Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1_fixed_10.csv...
[convert] method=cudf rows=0 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1_fixed_10.parquet
[convert] method=cudf rows=11041780 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF1.parquet
[prep] Dataset 'Fuzzy_kosarak' (SF=1) prepared.
--- 2. Running Experiments ---
[skip] cuFFI sup=4000 (log exists)
[

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF10_fixed_10.csv...
[convert] method=cudf rows=110417800 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF10_fixed_10.parquet
[convert] method=cudf rows=110417800 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_kosarak/Fuzzy_kosarak_SF10.parquet
[prep] Dataset 'Fuzzy_kosarak' (SF=10) prepared.
--- 2. Running Experiments ---
[skip] cuFFI sup=40000 (log exists)
[skip] naive (floating) sup=40000 (log exists)
[skip] naive (fixed) sup=40000 (log exists)
[skip] cuFFI sup=45000 (log exists)
[skip] naive (floating) sup=45000 (log exists)
[skip] naive (fixed) sup=45000 (log exists)
[skip] cuFFI sup=50000 (log exists)
[skip] naive (floating) sup=50000 (log exists)
[skip] naive (fixed) sup=50000 (log exists)
[skip] cuFFI sup=55000 (log exists)
[skip] naive (floating) sup=55000 (log exists)
[skip] naive (fixed) sup=55000 (log exist

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,patterns_found
0,Fuzzy_kosarak,10,cuFFIMiner,40000,10,13.2102,14088.52,,175185.0
1,Fuzzy_kosarak,10,naiveFFIMiner (floating),40000,10,8.6202,14289.19,,44885.0
2,Fuzzy_kosarak,10,cuFFIMiner,45000,10,10.6116,14088.52,,97848.0
3,Fuzzy_kosarak,10,naiveFFIMiner (floating),45000,10,8.7254,14817.03,,28590.0
4,Fuzzy_kosarak,10,cuFFIMiner,50000,10,8.5808,14088.52,,64760.0
5,Fuzzy_kosarak,10,naiveFFIMiner (floating),50000,10,7.7058,14088.52,,20393.0
6,Fuzzy_kosarak,10,cuFFIMiner,55000,10,7.404,14088.52,,47495.0
7,Fuzzy_kosarak,10,naiveFFIMiner (floating),55000,10,7.623,14088.52,,15718.0
8,Fuzzy_kosarak,10,cuFFIMiner,1000000,10,1.5531,14088.52,,191.0
9,Fuzzy_kosarak,10,naiveFFIMiner (floating),1000000,10,4.2766,14088.52,,105.0


In [11]:
pumsb = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_pumsb.csv"
pumsb_sup = [100000,110000,120000,130000,280000]

run_pipeline(pumsb, sf=1, supports_quant_int=pumsb_sup, force=False)

sf = 10
pumsb_sup = [x * sf for x in pumsb_sup]
run_pipeline(pumsb, sf=sf, supports_quant_int=pumsb_sup, force=False)

--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_pumsb.csv (SF=1) ---
Supports: [100000, 110000, 120000, 130000, 280000]
--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_pumsb.csv (SF=1) ---
--- 1. Preparing Dataset ---
[download] Using cached: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb.csv
[prep] Using existing: Fuzzy_pumsb_SF1.csv
Processing Fuzzy_pumsb_SF1.csv...
Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1_fixed_10.csv...
[convert] method=cudf rows=4913349 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1_fixed_10.parquet
[convert] method=cudf rows=4913349 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF1.parquet
[prep] Dataset 'Fuzzy_pumsb' (SF=1) prepared.
--- 2. Running Experiments ---
[skip] cuFFI sup=100000 (log exists)
[skip] naive

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF10_fixed_10.csv...
[convert] method=cudf rows=49133490 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF10_fixed_10.parquet
[convert] method=cudf rows=49133490 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_pumsb/Fuzzy_pumsb_SF10.parquet
[prep] Dataset 'Fuzzy_pumsb' (SF=10) prepared.
--- 2. Running Experiments ---
[skip] cuFFI sup=1000000 (log exists)
[skip] naive (floating) sup=1000000 (log exists)
[skip] naive (fixed) sup=1000000 (log exists)
[skip] cuFFI sup=1100000 (log exists)
[skip] naive (floating) sup=1100000 (log exists)
[skip] naive (fixed) sup=1100000 (log exists)
[skip] cuFFI sup=1200000 (log exists)
[skip] naive (floating) sup=1200000 (log exists)
[skip] naive (fixed) sup=1200000 (log exists)
[skip] cuFFI sup=1300000 (log exists)
[skip] naive (floating) sup=1300000 (log exists)
[skip] naive (fixed) sup=1300000 (l

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,patterns_found
0,Fuzzy_pumsb,10,cuFFIMiner,1000000,10,100.7956,5672.14,,283443.0
1,Fuzzy_pumsb,10,naiveFFIMiner (floating),1000000,10,14.7923,6109.31,,40214.0
2,Fuzzy_pumsb,10,cuFFIMiner,1100000,10,55.8643,5672.14,,159707.0
3,Fuzzy_pumsb,10,naiveFFIMiner (floating),1100000,10,9.4286,6090.95,,24829.0
4,Fuzzy_pumsb,10,cuFFIMiner,1200000,10,32.6661,5672.14,,94084.0
5,Fuzzy_pumsb,10,naiveFFIMiner (floating),1200000,10,6.6761,6077.79,,15721.0
6,Fuzzy_pumsb,10,cuFFIMiner,1300000,10,20.071,5672.14,,57209.0
7,Fuzzy_pumsb,10,naiveFFIMiner (floating),1300000,10,5.1354,6058.49,,10208.0
8,Fuzzy_pumsb,10,cuFFIMiner,2800000,10,1.3068,5672.14,,282.0
9,Fuzzy_pumsb,10,naiveFFIMiner (floating),2800000,10,1.848,5715.3,,99.0


In [12]:
t10 = "https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_T10I4D100K.csv"
t10_sup = [3000,3500,4000,4500,5000]

run_pipeline(t10, sf=1, supports_quant_int=t10_sup, force=False)

sf = 100

t10_sup = [x * sf for x in t10_sup]
run_pipeline(t10, sf=sf, supports_quant_int=t10_sup, force=False)

--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_T10I4D100K.csv (SF=1) ---
Supports: [3000, 3500, 4000, 4500, 5000]
--- üöÄ Starting Pipeline for https://u-aizu.ac.jp/~udayrage/datasets/fuzzyDatabases/Fuzzy_T10I4D100K.csv (SF=1) ---
--- 1. Preparing Dataset ---
[download] Using cached: /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K.csv
[prep] Using existing: Fuzzy_T10I4D100K_SF1.csv
Processing Fuzzy_T10I4D100K_SF1.csv...
Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1_fixed_10.csv...
[convert] method=cudf rows=1091692 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1_fixed_10.parquet
[convert] method=cudf rows=1091692 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF1.parquet
[prep] Dataset 'Fuzzy_T10I4D100K' (SF=1) prepared.
--- 2. Running Experiment

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Scaling factor determined: 10 (10^1)
Writing fixed point file to /export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF100_fixed_10.csv...
[convert] method=cudf rows=109169200 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF100_fixed_10.parquet
[convert] method=cudf rows=109169200 file=/export/home1/ltarun/cuda_pami/data/fuzzy/Fuzzy_T10I4D100K/Fuzzy_T10I4D100K_SF100.parquet
[prep] Dataset 'Fuzzy_T10I4D100K' (SF=100) prepared.
--- 2. Running Experiments ---
[skip] cuFFI sup=300000 (log exists)
[skip] naive (floating) sup=300000 (log exists)
[skip] naive (fixed) sup=300000 (log exists)
[skip] cuFFI sup=350000 (log exists)
[skip] naive (floating) sup=350000 (log exists)
[skip] naive (fixed) sup=350000 (log exists)
[skip] cuFFI sup=400000 (log exists)
[skip] naive (floating) sup=400000 (log exists)
[skip] naive (fixed) sup=400000 (log exists)
[skip] cuFFI sup=450000 (log exists)
[skip] naive (floating) sup=450000 (log exists)
[skip

  df_time = df_time.applymap(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
  df_patterns = df_patterns.applymap(lambda x: f"{x:,.0f}" if pd.notna(x) else "---")


Unnamed: 0,dataset,sf,algorithm,support_quant_int,quant_mult,exec_time,cpu_mem_mb,gpu_mem_mb,patterns_found
0,Fuzzy_T10I4D100K,100,cuFFIMiner,300000,10,2.2295,14640.06,,2325.0
1,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),300000,10,6.15,14640.06,,2045.0
2,Fuzzy_T10I4D100K,100,cuFFIMiner,350000,10,2.2593,14640.06,,1557.0
3,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),350000,10,6.0322,14640.06,,1407.0
4,Fuzzy_T10I4D100K,100,cuFFIMiner,400000,10,2.1924,14640.06,,1154.0
5,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),400000,10,6.1527,14640.06,,1069.0
6,Fuzzy_T10I4D100K,100,cuFFIMiner,450000,10,2.0946,14640.06,,922.0
7,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),450000,10,6.3009,14640.06,,866.0
8,Fuzzy_T10I4D100K,100,cuFFIMiner,500000,10,2.2103,14640.06,,787.0
9,Fuzzy_T10I4D100K,100,naiveFFIMiner (floating),500000,10,6.0697,14640.06,,733.0
