Notebook Example: axisfuzzy.analysis Pipeline Guide

Preparation

In the first cell of the Notebook, we need to import all the necessary libraries and components, and register our example tools.

In [None]:
# Cell 1: Setup and Imports
import pandas as pd
import numpy as np
from typing import List, Dict

# Import core components from our analysis module
from axisfuzzy.analysis.pipeline import FuzzyPipeline
from axisfuzzy.analysis.registry.base import register_tool
from axisfuzzy.analysis.dataframe import FuzzyDataFrame
from axisfuzzy.fuzzifier import Fuzzifier

# Ensure the accessor is registered
import axisfuzzy.analysis.accessor

# --- Register a suite of tools for our examples ---

@register_tool(inputs="CrispTable", outputs="FuzzyTable")
def fuzzify_tool(data: pd.DataFrame, fuzzifier: Fuzzifier) -> FuzzyDataFrame:
    """Converts a crisp DataFrame to a FuzzyDataFrame."""
    print(f"--- Executing: fuzzify_tool ---")
    return FuzzyDataFrame.from_pandas(data, fuzzifier=fuzzifier)

@register_tool(inputs="FuzzyTable", outputs="WeightVector")
def uniform_weight_tool(matrix: FuzzyDataFrame) -> np.ndarray:
    """Calculates uniform weights based on the number of columns."""
    print(f"--- Executing: uniform_weight_tool ---")
    return np.full(matrix.shape[1], 1 / matrix.shape[1])

@register_tool(inputs="FuzzyTable", outputs="WeightVector")
def constant_weight_tool(matrix: FuzzyDataFrame, const_weights: list) -> np.ndarray:
    """Returns a predefined constant weight vector."""
    print(f"--- Executing: constant_weight_tool with weights {const_weights} ---")
    return np.array(const_weights)

@register_tool(
    inputs={"matrix": "FuzzyTable", "weights": "WeightVector"},
    outputs="ScoreVector"
)
def aggregate_tool(matrix: FuzzyDataFrame, weights: np.ndarray) -> np.ndarray:
    """A mock aggregation tool."""
    print(f"--- Executing: aggregate_tool ---")
    # Simplified aggregation for demonstration
    crisp_matrix = np.array([[f.mean() if hasattr(f, 'mean') else 0.5 for f in matrix[col]] for col in matrix.columns]).T
    return np.dot(crisp_matrix, weights)

@register_tool(
    inputs="ScoreVector",
    outputs={"ranking": "RankingResult", "top_performer": "str"}
)
def decision_tool(scores: np.ndarray, alternative_names: list) -> dict:
    """A mock multi-output decision tool."""
    print(f"--- Executing: decision_tool ---")
    ranking_indices = np.argsort(scores)[::-1] # Sort descending
    return {
        "ranking": [alternative_names[i] for i in ranking_indices],
        "top_performer": alternative_names[ranking_indices[0]]
    }

# --- Prepare initial data ---
crisp_df = pd.DataFrame({
    'cost': [0.5, 0.7, 0.3],
    'safety': [0.2, 0.9, 0.4],
    'comfort': [0.8, 0.3, 0.1]
}, index=['CarA', 'CarB', 'CarC'])

fuzzifier = Fuzzifier(mtype='qrofn', mf='GaussianMF', mf_params=[{"sigma": 0.2, "c": 0.5}])

print("✅ Setup complete. All tools are registered and data is ready.")

Example 1: Simple linear Pipeline (executed via Accessor)

This is the most basic use case, demonstrating a linear workflow starting from a pd.DataFrame, running through the .fuzzy accessor, and directly obtaining the final result.

In [None]:
# Cell 2: Example 1 - Simple Linear Pipeline via Accessor
print("### Example 1: Simple Linear Pipeline ###\n")

# 1. Define the pipeline
p_linear = FuzzyPipeline()

# Input name can be anything because there's only one input
crisp_input = p_linear.input("my_data", contract="CrispTable")
fuzz_table = p_linear.tool("fuzzify_tool")(data=crisp_input, fuzzifier=fuzzifier)
weights = p_linear.tool("uniform_weight_tool")(matrix=fuzz_table)

print("Pipeline defined:")
print(p_linear)
print("-" * 20)

# 2. Execute using the FuzzyAccessor
# The `crisp_df` is automatically injected into the single input node.
final_weights = crisp_df.fuzzy.run(p_linear)

# 3. Check the result
print("\n--- Execution Finished ---")
print(f"Type of final result: {type(final_weights)}")
print(f"Final weights: {final_weights}")

# Because the pipeline has a single, unambiguous output, the result is returned directly.
assert isinstance(final_weights, np.ndarray)


Example 2: Complex Nonlinear DAG (Multiple Source Inputs and Multiple Outputs)

This example demonstrates the ability of our framework to handle branching and merging, as well as how to handle Pipelines with multiple final outputs.

In [None]:
# Cell 3: Example 2 - Non-linear DAG with Multiple Outputs
print("### Example 2: Non-linear DAG ###\n")

# 1. Define the pipeline
p_dag = FuzzyPipeline()

crisp_input = p_dag.input("init_data", contract="CrispTable")

# Branch A: Fuzzification
fuzz_table = p_dag.tool("fuzzify_tool")(data=crisp_input, fuzzifier=fuzzifier)

# Branch B: Get some constant weights (this also depends on fuzz_table in our mock tool)
const_weights = p_dag.tool("constant_weight_tool")(matrix=fuzz_table, const_weights=[0.6, 0.1, 0.3])

# Merge branches: Use the results from both branches as input for the next step
scores = p_dag.tool("aggregate_tool")(matrix=fuzz_table, weights=const_weights)

# Final step with multiple outputs
# Note: This is now the single terminal node of the graph.
decision_outputs = p_dag.tool("decision_tool")(scores=scores, alternative_names=list(crisp_df.index))

print("Pipeline defined:")
print(p_dag)
print("-" * 20)

# 2. Execute the pipeline
# The terminal node `decision_tool` returns a dictionary, so the final result will be that dictionary.
final_decision = crisp_df.fuzzy.run(p_dag)

# 3. Check the result
print("\n--- Execution Finished ---")
print(f"Type of final result: {type(final_decision)}")
print("Final decision results:")
import json
print(json.dumps(final_decision, indent=2))

assert isinstance(final_decision, dict)
assert "ranking" in final_decision


Example 3: Starting directly from FuzzyDataFrame

This example demonstrates the flexibility of the framework, allowing users to skip the fuzzification step and start the analysis directly from existing fuzzy data.

In [None]:
# Cell 4: Example 3 - Starting with a FuzzyDataFrame
print("### Example 3: Starting with a FuzzyDataFrame ###\n")

# 1. Create a pre-fuzzified DataFrame
fuzzy_df = FuzzyDataFrame.from_pandas(crisp_df, fuzzifier=fuzzifier)
print("Created a FuzzyDataFrame to start with:")
print(fuzzy_df)
print("-" * 20)

# 2. Define a pipeline that expects a FuzzyTable
p_from_fuzzy = FuzzyPipeline()
fuzzy_input = p_from_fuzzy.input(contract="FuzzyTable") # Name is omitted for single input
weights = p_from_fuzzy.tool("uniform_weight_tool")(matrix=fuzzy_input)

print("Pipeline defined:")
print(p_from_fuzzy)
print("-" * 20)

# 3. Execute by calling p.run() directly and passing the FuzzyDataFrame
# We don't use the accessor here, as we are not starting from a crisp df.
final_weights = p_from_fuzzy.run(fuzzy_df)

# 4. Check the result
print("\n--- Execution Finished ---")
print(f"Type of final result: {type(final_weights)}")
print(f"Final weights: {final_weights}")

assert np.allclose(final_weights, [1/3, 1/3, 1/3])

Example 4: Returning Intermediate Results

This example demonstrates how to use return_intermediate=True to debug and inspect the output of each step in the Pipeline.

In [None]:
# Cell 5: Example 4 - Returning Intermediate Results
print("### Example 4: Returning Intermediate Results ###\n")

# We reuse the linear pipeline from Example 1
p_linear = FuzzyPipeline()
crisp_input = p_linear.input(contract="CrispTable")
fuzz_table = p_linear.tool("fuzzify_tool")(data=crisp_input, fuzzifier=fuzzifier)
weights = p_linear.tool("uniform_weight_tool")(matrix=fuzz_table)

# Execute with return_intermediate=True
final_output, intermediate_states = crisp_df.fuzzy.run(p_linear, return_intermediate=True)

print("--- Execution Finished ---\n")

print("Final Output:")
print(final_output)
print("\n" + "="*40 + "\n")

print("Intermediate States (a dictionary mapping step_id to its result):")
# We can iterate through the states to see what each step produced
for step_id, result in intermediate_states.items():
    # Find the tool name for this step_id for better display
    tool_name = "input_node"
    for step_info in p_linear.steps:
        if step_info['id'] == step_id:
            tool_name = step_info['tool']
            break

    print(f"\n--- State after step '{tool_name}' (id: {step_id[:8]}) ---")
    # Pretty print DataFrames
    if isinstance(result, (pd.DataFrame, FuzzyDataFrame)):
        print(result)
    else:
        print(result)