# Constraint Filter

The criterion can be adjusted by the task's requirement.

In [None]:
import pandas as pd
import numpy as np

# Check physcial rules
# This function should be adjusted according to real situation
def rule_violation(df: pd.DataFrame) -> pd.Series:
    v = pd.Series(False, index=df.index)
    # v |= df['burden'] > df['spacing']            # Normally S/B > 1
    # v |= df['powder_factor'] < 0.55              # Depend on the task's requirement
    # v |= df['vibration_ppv'] > 250               # Risk of free surface erosion
    # v |= df['charge_weight'] > 85                # Basing on the local policy
    # v |= df['fragmentation_index'].between(5, 25, inclusive='neither')
    return v        # 'True': Date breaks physical rules.

# Unify and process
def postprocess(df: pd.DataFrame) -> pd.DataFrame:
    # clip to limit upper bound
    df['vibration_ppv'] = df['vibration_ppv'].clip(upper=250)
    df['charge_weight'] = df['charge_weight'].clip(upper=85)
    df['num_cartridges'] = df['num_cartridges'].round().astype(int)
    return df

# Main function
def apply_constraints(fake_df: pd.DataFrame, target_n: int) -> pd.DataFrame:
    kept = []
    need = target_n
    attempts = 0
    while need > 0 and attempts < 5:            # Max attempt: 5
        batch = fake_df.sample(need * 2, replace=True, random_state=attempts)
        batch = postprocess(batch)
        batch_valid = batch[~rule_violation(batch)]
        kept.append(batch_valid)
        need = target_n - sum(len(k) for k in kept)
        attempts += 1

    final = pd.concat(kept).iloc[:target_n]
    discard_rate = 1 - len(final) / (target_n * attempts)
    print(f"Discard rate ≈ {discard_rate:.2%}")
    if discard_rate > 0.5:
        print("Half of the data do not pass the filter.")
    return final.reset_index(drop=True)