In [1]:
import polars as pl

sales_series = pl.Series("sales", [150.00, 300.00, 250.00])
sales_series

sales
f64
150.0
300.0
250.0


## Cast python list to polars enum series

In [2]:
reals_labels = ["real_negatives", "real_positives", "real_competing", "real_censored"]
censoring_assumption_labels = ["excluded", "adjusted"]
competing_assumption_labels = ["excluded", "adjusted_as_negative", "adjusted_as_censored"]

reals_enum = pl.Enum(reals_labels)
censoring_assumption_enum = pl.Enum(censoring_assumption_labels)
competing_assumption_enum = pl.Enum(competing_assumption_labels)

df_reals = pl.DataFrame({"reals": pl.Series(reals_labels, dtype=reals_enum)})
df_censoring_assumption = pl.DataFrame({"censoring_assumption": pl.Series(censoring_assumption_labels, dtype=censoring_assumption_enum)})
df_competing_assumption = pl.DataFrame({"competing_assumption": pl.Series(competing_assumption_labels, dtype=competing_assumption_enum)})


aj_data_combinations = (
    df_reals
    .join(df_censoring_assumption, how="cross")
    .join(df_competing_assumption, how="cross")
)

print(aj_data_combinations)

shape: (24, 3)
┌────────────────┬──────────────────────┬──────────────────────┐
│ reals          ┆ censoring_assumption ┆ competing_assumption │
│ ---            ┆ ---                  ┆ ---                  │
│ enum           ┆ enum                 ┆ enum                 │
╞════════════════╪══════════════════════╪══════════════════════╡
│ real_negatives ┆ excluded             ┆ excluded             │
│ real_negatives ┆ excluded             ┆ adjusted_as_negative │
│ real_negatives ┆ excluded             ┆ adjusted_as_censored │
│ real_negatives ┆ adjusted             ┆ excluded             │
│ real_negatives ┆ adjusted             ┆ adjusted_as_negative │
│ …              ┆ …                    ┆ …                    │
│ real_censored  ┆ excluded             ┆ adjusted_as_negative │
│ real_censored  ┆ excluded             ┆ adjusted_as_censored │
│ real_censored  ┆ adjusted             ┆ excluded             │
│ real_censored  ┆ adjusted             ┆ adjusted_as_negative │
│ real_cen

## Map numerics to Enums

In [3]:
reals_map = {
    0: "real_negatives",
    1: "real_positives",
    2: "real_competing"
}

df = pl.DataFrame(
    {
        "reals_codes": [0, 0, 1, 2, 1],
    }
)

df


df.with_columns(
    pl.col("reals_codes").replace_strict(reals_map, return_dtype=reals_enum).alias("reals")
)

reals_codes,reals
i64,enum
0,"""real_negatives"""
0,"""real_negatives"""
1,"""real_positives"""
2,"""real_competing"""
1,"""real_positives"""


## Working with `np.digitize()`

In [3]:
import polars as pl
import numpy as np

# Input DataFrame
df = pl.DataFrame({
    "prob": [0.0, 0.02, 0.15, 0.33, 0.78, 0.99, 1.0]
})

# Step size
by = 0.1

# Generate breaks: [0.0, 0.1, ..., 1.0]
breaks = np.arange(0, 1 + by, by)
last_bin_index = len(breaks) - 2  # last valid bin is [0.9, 1.0]

# Assign bin indices (left-inclusive, right-exclusive)
probs = df["prob"].to_numpy()
bin_indices = np.digitize(probs, bins=breaks, right=False) - 1

# Force 1.0 into the last bin
bin_indices = np.where(probs == 1.0, last_bin_index, bin_indices)

# Construct bounds and labels
lower_bounds = breaks[bin_indices]
upper_bounds = breaks[bin_indices + 1]

include_upper_bounds = bin_indices == last_bin_index  # only include upper bound in last bin

labels = np.where(
    include_upper_bounds,
    [f"[{lo:.2f}, {hi:.2f}]" for lo, hi in zip(lower_bounds, upper_bounds)],
    [f"[{lo:.2f}, {hi:.2f})" for lo, hi in zip(lower_bounds, upper_bounds)]
)

# Attach to DataFrame if desired
df = df.with_columns([
    pl.Series("bin_index", bin_indices),
    pl.Series("bin_label", labels)
])

print(df)


shape: (7, 3)
┌──────┬───────────┬──────────────┐
│ prob ┆ bin_index ┆ bin_label    │
│ ---  ┆ ---       ┆ ---          │
│ f64  ┆ i64       ┆ str          │
╞══════╪═══════════╪══════════════╡
│ 0.0  ┆ 0         ┆ [0.00, 0.10) │
│ 0.02 ┆ 0         ┆ [0.00, 0.10) │
│ 0.15 ┆ 1         ┆ [0.10, 0.20) │
│ 0.33 ┆ 3         ┆ [0.30, 0.40) │
│ 0.78 ┆ 7         ┆ [0.70, 0.80) │
│ 0.99 ┆ 9         ┆ [0.90, 1.00] │
│ 1.0  ┆ 9         ┆ [0.90, 1.00] │
└──────┴───────────┴──────────────┘
