Skip to content

Commit

Permalink
fix: remove the duplicated cardinality threshold under categorical an…
Browse files Browse the repository at this point in the history
…d text settings
  • Loading branch information
ricardodcpereira authored and aquemy committed Oct 10, 2023
1 parent f4886a2 commit 80a1024
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
7 changes: 3 additions & 4 deletions src/ydata_profiling/config.py
Expand Up @@ -50,17 +50,16 @@ class TextVars(BaseModel):
words: bool = True
characters: bool = True
redact: bool = False
# if text has more than threshold categories, its not category
categorical_threshold: int = 50
# if text has more than threshold % distinct values, its not category
percentage_cat_threshold: float = 0.5


class CatVars(BaseModel):
length: bool = True
characters: bool = True
words: bool = True
# if var has more than threshold categories, it's a text var
cardinality_threshold: int = 50
# if var has more than threshold % distinct values, it's a text var
percentage_cat_threshold: float = 0.5
imbalance_threshold: float = 0.5
n_obs: int = 5
# Set to zero to disable
Expand Down
4 changes: 2 additions & 2 deletions src/ydata_profiling/model/typeset_relations.py
Expand Up @@ -73,8 +73,8 @@ def string_is_category(series: pd.Series, state: dict, k: Settings) -> bool:
- (distinct values / count of all values) is less than threshold
- is not bool"""
n_unique = series.nunique()
unique_threshold = k.vars.text.percentage_cat_threshold
threshold = k.vars.text.categorical_threshold
unique_threshold = k.vars.cat.percentage_cat_threshold
threshold = k.vars.cat.cardinality_threshold
return (
1 <= n_unique <= threshold
and (
Expand Down

0 comments on commit 80a1024

Please sign in to comment.