Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [v0.2.3] - 2025-08-26

- Better support for merging schemas
- Small code improvements

## [v0.2.2] - 2025-08-25

Expand Down
35 changes: 16 additions & 19 deletions vecorel_cli/conversion/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,27 +243,24 @@ def read_data(self, paths, **kwargs):
return pd.concat(gdfs)

def filter_rows(self, gdf):
if len(self.column_filters) > 0:
self.info("Applying filters")
for key, fn in self.column_filters.items():
if key in gdf.columns:
result = fn(gdf[key])
if len(self.column_filters) == 0:
return gdf

self.info("Applying filters")
for key, fn in self.column_filters.items():
if key in gdf.columns:
result = fn(gdf[key])
if isinstance(result, tuple):
# If the result is a tuple, the second value is a flag to potentially invert the mask
if isinstance(result, tuple):
if result[1]:
# Invert mask
mask = ~result[0]
else:
# Use mask as is
mask = result[0]
else:
# Just got a mask, proceed
mask = result

# Filter columns based on the mask
gdf = gdf[mask]
mask = ~result[0] if result[1] else result[0]
else:
self.warning(f"Column '{key}' not found in dataset, skipping filter")
# Just got a mask, proceed
mask = result

# Filter columns based on the mask
gdf = gdf[mask]
else:
self.warning(f"Column '{key}' not found in dataset, skipping filter")
return gdf

def get_title(self):
Expand Down
3 changes: 2 additions & 1 deletion vecorel_cli/converters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import importlib
import os
from typing import Sequence

import click
import pandas as pd
Expand Down Expand Up @@ -111,7 +112,7 @@ def list_ids(self) -> list:
ids = [f[:-3] for f in files if self.is_converter(f)]
return ids

def list_all(self, keys=["short_name", "license"]) -> dict:
def list_all(self, keys: Sequence[str] = ("short_name", "license")) -> dict:
converters = {}
for id in self.list_ids():
obj = {}
Expand Down
Loading