Skip to content

Commit

Permalink
Fix factorizing
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed May 12, 2023
1 parent bbdaa0c commit a0740eb
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions flox/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def factorize_(
# this is important in shared-memory parallelism with dask
# TODO: figure out how to avoid this
idx = flat.copy()
found_groups.append(np.array(expect))
found_groups.append(np.array(expect, like=flat))
# TODO: fix by using masked integers
idx[idx > expect[-1]] = -1

Expand All @@ -537,7 +537,7 @@ def factorize_(
right = expect.closed_right
idx = np.digitize(
flat,
bins=bins.view(np.intp) if bins.dtype.kind == "M" else bins,
bins=np.array(bins.view(np.intp) if bins.dtype.kind == "M" else bins, like=flat),
right=right,
)
idx -= 1
Expand All @@ -560,9 +560,13 @@ def factorize_(
idx = sorter[(idx,)]
idx[mask] = -1
else:
idx, groups = pd.factorize(flat, sort=sort)
if isinstance(flat, np.ndarray):
idx, groups = pd.factorize(flat, sort=sort)
else:
assert sort
groups, idx = np.unique(flat, return_inverse=True)

found_groups.append(np.array(groups))
found_groups.append(groups)
factorized.append(idx.reshape(groupvar.shape))

grp_shape = tuple(len(grp) for grp in found_groups)
Expand Down

0 comments on commit a0740eb

Please sign in to comment.