Skip to content

Commit

Permalink
handle large magnitude negative floats consistently
Browse files Browse the repository at this point in the history
  • Loading branch information
jamie256 committed Apr 7, 2022
1 parent 14238c6 commit cc12a3d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
7 changes: 5 additions & 2 deletions src/whylogs/core/summaryconverters.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def single_quantile_from_sketch(sketch: kll_floats_sketch, quantile: float):
def _calculate_bins(end: float, start: float, n: int, avg_per_bucket: float, max_buckets: int):
# Include the max value in the right-most bin
end += abs(end) * 1e-7
abs_end = abs(end)
abs_start = abs(start)
max_magnitude = max(abs_end, abs_start)

# the kll_floats_sketch use 32bit floats, so we check precision against np.float32
float_mantissa_bits = np.finfo(np.float32).nmant
Expand All @@ -133,7 +136,7 @@ def _calculate_bins(end: float, start: float, n: int, avg_per_bucket: float, max
width = (end - start) / n_buckets

# check for precision of width with respect to float_mantissa_bits and bin width:
bits_in_max = math.floor(math.log2(end))
bits_in_max = math.floor(math.log2(max_magnitude))
width_bits = math.floor(math.log2((end - start) / n_buckets))
logger.debug(f"bits_in_max is: {bits_in_max}")
logger.debug(f"width_bits is: {width_bits}")
Expand All @@ -143,7 +146,7 @@ def _calculate_bins(end: float, start: float, n: int, avg_per_bucket: float, max
logger.info(f"Width must be larger than {bits_in_width} bits.")
new_buckets = math.floor((end - start) / math.pow(2, bits_in_width))
logger.warn(f"Avoiding bin edge collisions by resizing to {new_buckets} buckets")
n_buckets = new_buckets
n_buckets = max(new_buckets, 1)
width = (end - start) / n_buckets

# Calculate histograms from the Probability Mass Function
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/app/test_session.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from logging import getLogger

import pandas as pd
import pytest

from whylogs.app.config import SessionConfig
Expand Down Expand Up @@ -78,6 +79,19 @@ def test_session_profile_small(df_single):
assert len(flat_summary) == 1


def test_session_profile_negative_ints():
df = pd.DataFrame(range(-22335544310, -22335542310), columns=["negative"])
session = session_from_config(SessionConfig("default-project", "default-pipeline", [], False))
profile = session.log_dataframe(df)
TEST_LOGGER.debug(f"logged df: {df.shape} ")
TEST_LOGGER.debug(f"logged profile: {profile} ")
summary = profile.flat_summary()
TEST_LOGGER.debug(f"logged summary: {summary} ")
flat_summary = summary["summary"]
TEST_LOGGER.debug(f"logged flat_summary: {flat_summary} ")
assert len(flat_summary) == 1


def test_session_profile_two_column(df_two_int_col):
TEST_LOGGER.debug(f"About to log {df_two_int_col.describe()} with columns {df_two_int_col.columns}")
session = session_from_config(SessionConfig("default-project", "default-pipeline", [], False))
Expand Down

0 comments on commit cc12a3d

Please sign in to comment.