Skip to content

Commit

Permalink
Remove use of .view when casting timestamp to int (#72)
Browse files Browse the repository at this point in the history
Signed-off-by: Aivin V. Solatorio <avsolatorio@gmail.com>
  • Loading branch information
avsolatorio committed Apr 24, 2024
1 parent 4d14472 commit 04ceddd
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions src/realtabformer/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,19 @@ def process_datetime_data(
# Make sure that we don't convert the NaT
# to some integer.
series = series.copy()
series.loc[series.notnull()] = (series[series.notnull()].view(int) / 1e9).astype(
int
)
series = series.fillna(pd.NA)

# Track null values (NaT)
null_idx = series.isnull()

# Convert to the numerical representation
# of the datetime (UNIX timestamp)
series = (series.astype(int) / 1e9)

# Fill NA
series.loc[null_idx] = pd.NA

# Cast as integer type
series = series.astype("Int64")

# Take the mean value to re-align the data.
# This will help reduce the scale of the numeric
Expand Down

0 comments on commit 04ceddd

Please sign in to comment.