# WranglesPY – Map Wrangle Guide

This notebook showcases the `map` wrangle, which automatically maps input column names to a user-defined list of target names using semantic similarity.

- Parameters:
  - `targets` (list[str], required): desired target column names.
  - `input` (str|int|list, optional): columns to consider; defaults to all columns.
  - `threshold` (float, default 0.6): minimum similarity to accept a mapping.
  - `drop_unmapped` (bool, default False): drop columns that don’t map.
  - `case_sensitive` (bool, default False): when true, case differences reduce similarity.

- Behavior:
  - Greedy unique assignment: each target is used at most once.
  - Skips identity pairs: avoids trivial self-mapping.
  - Drops conflicting existing target columns before renaming to prevent duplicates.

In [None]:
# Import Dependencies and Setup
import pandas as pd
import wrangles

# Optional: print versions for reproducibility
print("pandas:", pd.__version__)
print("wrangles:", getattr(wrangles, "__version__", "local"))

# If using any cloud-backed wrangles, set env vars as needed (example):
# import os
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["WRANGLES_USER"] = "..."
# os.environ["WRANGLES_PASSWORD"] = "..."

In [None]:
# Create Sample DataFrames

# Basic example
df_basic = pd.DataFrame({
    "Product Name": ["Widget"],
    "Unit Price": [9.99],
    "Qty": [5],
})

# Subset mapping example
df_subset = df_basic.copy()

def _make_alpha_beta_df():
    return pd.DataFrame({"Alpha": ["X"], "Beta": ["Y"]})

df_threshold = _make_alpha_beta_df()

# Conflicts: similar inputs
df_conflict = pd.DataFrame({
    "Price": [10],
    "Unit Price": [9.5],
})

# Drop unmapped example
df_drop = pd.DataFrame({
    "Product Name": ["X"],
    "FooBar": ["Z"],
})

# Existing target example
df_existing_target = pd.DataFrame({
    "price": [1.0],
    "Unit Price": [2.0],
})

# Case-sensitivity examples
df_case_ins = pd.DataFrame({
    "SKU": ["X"],
    "Name": ["Widget"],
})

df_case_sens = df_case_ins.copy()

In [None]:
# Basic Column Mapping to Targets
recipe = """
wrangles:
  - map:
      targets:
        - name
        - price
        - quantity
"""

result = wrangles.recipe.run(recipe, dataframe=df_basic)
print(result.columns.tolist())
assert result.columns.tolist() == ["name", "price", "quantity"]
result.head()

In [None]:
# Map a Subset of Inputs
recipe = """
wrangles:
  - map:
      input:
        - Product Name
        - Qty
      targets:
        - name
        - quantity
"""

result = wrangles.recipe.run(recipe, dataframe=df_subset)
print(result.columns.tolist())
assert result.columns.tolist() == ["name", "Unit Price", "quantity", "Notes"] or result.columns.tolist() == ["name", "Unit Price", "quantity"]
result.head()

In [None]:
# Configure Similarity Threshold
recipe = """
wrangles:
  - map:
      targets:
        - gamma
        - delta
      threshold: 0.99
"""

result = wrangles.recipe.run(recipe, dataframe=df_threshold)
print(result.columns.tolist())
assert result.columns.tolist() == ["Alpha", "Beta"]
result.head()

In [None]:
# Resolve Conflicts Between Similar Inputs
recipe = """
wrangles:
  - map:
      targets:
        - price
        - unit price
"""

result = wrangles.recipe.run(recipe, dataframe=df_conflict)
print(result.columns.tolist())
assert sorted(result.columns.tolist()) == sorted(["price", "unit price"]) 
result.head()

In [None]:
# Drop Unmapped Columns
recipe = """
wrangles:
  - map:
      targets:
        - name
      drop_unmapped: true
"""

result = wrangles.recipe.run(recipe, dataframe=df_drop)
print(result.columns.tolist())
assert result.columns.tolist() == ["name"]
result.head()

In [None]:
# Preserve Existing Target Columns (No Duplicates)
recipe = """
wrangles:
  - map:
      targets:
        - price
"""

result = wrangles.recipe.run(recipe, dataframe=df_existing_target)
print(result.columns.tolist())
assert result.columns.tolist() == ["price"]
result.head()

In [None]:
# Case-Insensitive Mapping
recipe = """
wrangles:
  - map:
      targets:
        - sku
        - name
      case_sensitive: false
"""

result = wrangles.recipe.run(recipe, dataframe=df_case_ins)
print(result.columns.tolist())
assert result.columns.tolist() == ["sku", "name"]
result.head()

In [None]:
# Case-Sensitive Mapping
recipe = """
wrangles:
  - map:
      targets:
        - sku
        - name
      case_sensitive: true
      threshold: 0.9
"""

result = wrangles.recipe.run(recipe, dataframe=df_case_sens)
print(result.columns.tolist())
assert result.columns.tolist() == ["SKU", "Name"]
result.head()

In [None]:
# Validate and Inspect Mapping Results

# Example: validate the quickstart mapping again
recipe = """
wrangles:
  - map:
      targets:
        - name
        - price
        - quantity
"""
res = wrangles.recipe.run(recipe, dataframe=df_basic)
print("Columns:", res.columns.tolist())
assert res.columns.tolist() == ["name", "price", "quantity"]
print(res.head())

In [None]:
# Error Handling for Invalid Parameters
# Invalid targets type
try:
    bad_recipe = """
wrangles:
  - map:
      targets: 123
"""
    wrangles.recipe.run(bad_recipe, dataframe=df_basic)
except ValueError as e:
    print("Caught ValueError (invalid targets type):", e)

# Invalid threshold
try:
    bad_recipe_2 = """
wrangles:
  - map:
      targets:
        - a
      threshold: -0.1
"""
    wrangles.recipe.run(bad_recipe_2, dataframe=df_basic)
except ValueError as e:
    print("Caught ValueError (invalid threshold):", e)

In [None]:
# Tips and Best Practices
print("Tips:")
print("- Start with threshold ~0.6; raise if mappings look too loose.")
print("- Set case_sensitive=True if your columns differ only by case.")
print("- For faster runs, provide an explicit input list to limit comparisons.")
print("- Resolve conflicts by cleaning existing target columns before mapping.")
print("- Validate results by inspecting df.columns and a small sample of rows.")