Skip to content

Commit

Permalink
Merge 3cc77d1 into d5f9c08
Browse files Browse the repository at this point in the history
  • Loading branch information
lalmei committed Jan 27, 2021
2 parents d5f9c08 + 3cc77d1 commit d589157
Show file tree
Hide file tree
Showing 30 changed files with 821 additions and 61 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.13-dev8
current_version = 0.2.0-dev0
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def setup(app):
# built documents.
#
# The short X.Y version.
version = "0.1.13-dev8"
version = "0.2.0-dev0"
# The full version, including alpha/beta/rc tags.
release = "" # Is set by calling `setup.py docs`

Expand Down
5 changes: 4 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ nodeenv==1.5.0
numpy==1.19.4
oauthlib==3.1.0
packaging==20.7
pandas==1.1.4
pandas==1.2.1
parso==0.7.1
pathspec==0.8.1
pexpect==4.8.0
Expand Down Expand Up @@ -116,3 +116,6 @@ websocket-client==0.57.0
Werkzeug==1.0.1
whylabs-datasketches==2.0.0b7
zipp==3.4.0
puremagic == 1.10
xlrd==2.0.1
openpyxl==3.0.6
7 changes: 5 additions & 2 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jmespath==0.10.0
marshmallow==3.9.1
matplotlib==3.3.3
numpy==1.19.4
pandas==1.1.4
pandas<=1.2.1
protobuf==4.0.0rc2
python-dateutil==2.8.1
pytz==2020.4
Expand All @@ -24,4 +24,7 @@ scikit-learn==0.24.0
Pillow==8.1.0
moto==1.3.16
pytest-cov>=2.11.1
mlflow==1.13.1
mlflow==1.13.1
puremagic ==1.10
xlrd==2.0.1
openpyxl==3.0.6
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fsspec==0.8.4
jmespath==0.10.0
marshmallow==3.9.1
numpy==1.19.4
pandas==1.1.4
pandas<=1.2.1
protobuf==3.14.0
python-dateutil==2.8.1
pytz==2020.4
Expand All @@ -16,3 +16,6 @@ s3transfer==0.3.3
six==1.15.0
urllib3==1.26.2
whylabs-datasketches==2.0.0b7
xlrd==2.0.1
openpyxl==3.0.6
puremagic == 1.10
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[metadata]
name = whylogs
version = 0.1.13-dev8
version = 0.2.0-dev0
description = Profile and monitor your ML data pipeline end-to-end
author = WhyLabs.ai
author-email = support@whylabs.ai
Expand Down
2 changes: 1 addition & 1 deletion src/whylogs/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""WhyLabs version number."""

__version__ = "0.1.13-dev8"
__version__ = "0.2.0-dev0"
57 changes: 40 additions & 17 deletions src/whylogs/app/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
import json
from pathlib import Path
from typing import List, Optional, Dict, Union, Callable, AnyStr
from tqdm import tqdm

import pandas as pd
from typing.io import IO

from whylogs.app.writers import Writer
from whylogs.core import DatasetProfile, TrackImage, METADATA_DEFAULT_ATTRIBUTES

from whylogs.core import DatasetProfile, TrackImage, METADATA_DEFAULT_ATTRIBUTES, TrackBB
from whylogs.io import LocalDataset
TIME_ROTATION_VALUES = ["s", "m", "h", "d"]

# TODO upgrade to Classes
Expand Down Expand Up @@ -99,6 +100,15 @@ def profile(self, ) -> DatasetProfile:
"""
return self._profiles[-1]["full_profile"]

def tracking_checks(self):

if not self._active:
return False

if self.should_rotate():
self._rotate_time()
return True

@property
def segmented_profiles(self, ) -> Dict[str, DatasetProfile]:
"""
Expand Down Expand Up @@ -287,12 +297,9 @@ def log(
:param value: value of as single feature. Cannot be specified if 'features' is specified
"""
if not self._active:
if not self.tracking_checks():
return None

if self.should_rotate():
self._rotate_time()

if features is None and feature_name is None:
return

Expand Down Expand Up @@ -341,10 +348,8 @@ def log_image(self,
:param feature_transforms: a list of callables to transform the input into metrics
:type image: Union[str, PIL.image]
"""
if not self._active:
return
if self.should_rotate():
self._rotate_time()
if not self.tracking_checks():
return None

if isinstance(image, str):
track_image = TrackImage(image, feature_transforms=feature_transforms,
Expand All @@ -355,6 +360,30 @@ def log_image(self,

track_image(self._profiles[-1]["full_profile"])

def log_local_dataset(self, root_dir, folder_feature_name="folder_feature"):
from PIL.Image import Image as ImageType
dst = LocalDataset(root_dir)
for idx in tqdm(range(len(dst))):
((data, magic_data), fmt), segment_value = dst[idx]
self.log(feature_name="file_format", value=fmt)
self.log(feature_name=folder_feature_name, value=segment_value)
self.log(features=magic_data)
if isinstance(data, pd.DataFrame):
self.log_dataframe(data)
elif isinstance(data, Dict) or isinstance(data, list):
self.log_annotation(annotation_data=data)
elif isinstance(data, ImageType):
self.log_image(data)
else:
raise NotImplementedError(
"File format not supported {}, format:{}".format(type(data), fmt))

def log_annotation(self, annotation_data):
if not self.tracking_checks():
return None
track_bounding_box = TrackBB(obj=annotation_data)
track_bounding_box(self._profiles[-1]["full_profile"])

def log_csv(self,
filepath_or_buffer: Union[str, Path, IO[AnyStr]],
segments: Optional[Union[List[Segment], List[str]]] = None,
Expand All @@ -369,10 +398,6 @@ def log_csv(self,
:param profile_full_dataset: when segmenting dataset, an option to keep the full unsegmented profile of the
dataset.
"""
if not self._active:
return
if self.should_rotate():
self._rotate_time()

self.profile_full_dataset = profile_full_dataset
if segments is not None:
Expand All @@ -392,10 +417,8 @@ def log_dataframe(self, df,
:param segments: specify the tag key value pairs for segments
:param df: the Pandas dataframe to log
"""
if not self._active:
if not self.tracking_checks():
return None
if self.should_rotate():
self._rotate_time()

# segment check in case segments are just keys
self.profile_full_dataset = profile_full_dataset
Expand Down
3 changes: 3 additions & 0 deletions src/whylogs/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from .columnprofile import ColumnProfile
from .datasetprofile import DatasetProfile
from .image_profiling import TrackImage, _METADATA_DEFAULT_ATTRIBUTES as METADATA_DEFAULT_ATTRIBUTES
from .annotation_profiling import TrackBB, BB_ATTRIBUTES

__ALL__ = [
ColumnProfile,
DatasetProfile,
TrackImage,
METADATA_DEFAULT_ATTRIBUTES,
TrackBB,
BB_ATTRIBUTES
]
158 changes: 158 additions & 0 deletions src/whylogs/core/annotation_profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from typing import Callable, Optional, Dict, List

import pandas as pd
import numpy as np

from whylogs.io.file_loader import file_loader


class Rectangle:

"""
Helper class to compute minimal bouding box intersections and/or iou
minimal stats properties of boudning box
Attributes:
area (float): Description
aspect_ratio (TYPE): Description
boundingBox (TYPE): Description
centroid (TYPE): Description
confidence (TYPE): Description
height (TYPE): Description
labels (TYPE): Description
width (TYPE): Description
"""

# replace with shapley functions and methods
# or move to cpp/cpython
def __init__(self, boundingBox, confidence=None, labels=None):
self.boundingBox = boundingBox
self._x1 = boundingBox[0][0]
self._x2 = boundingBox[1][0]
self._y1 = boundingBox[0][1]
self._y2 = boundingBox[1][0]
self.confidence = confidence
self.labels = labels
self.area = abs(self.x2 - self.x1)*abs(self.y2-self.y1)
self.width = abs(self.x2 - self.x1)
self.height = abs(self.y2 - self.y1)
self.aspect_ratio = self.width / self.height if self.height > 0 else 0.0
self.centroid = [self.x1 + self.width/2, self.y1 + self.height/2]

@property
def x1(self):
return self._x1

@property
def x2(self):
return self._x2

@property
def y1(self):
return self._y1

@property
def y2(self):
return self._y2

def intersection(self, Rectangle_2):
x_left = max(self.x1, Rectangle_2.x1)
y_top = max(self.y1, Rectangle_2.y1)
x_right = min(self.x2, Rectangle_2.x2)
y_bottom = min(self.y2, Rectangle_2.y2)
if x_right < x_left or y_bottom < y_top:
return 0.0
intersection_area = (x_right - x_left) * (y_bottom - y_top)

return intersection_area

def iou(self, Rectangle_2):
intersection_area = self.intersection(Rectangle_2)
if Rectangle_2.area <= 0 or self.area <= 0:
return 0.0
return intersection_area / \
(self.area+Rectangle_2.area - intersection_area)


BB_ATTRIBUTES = ("annotation_count", "annotation_density",
"area_coverage", "bb_width", "bb_height",
"bb_area", "bb_aspect_ratio", "confidence", "dist_to_center"
)


class TrackBB:

def __init__(self, filepath: str = None,
obj: Dict = None,
feature_transforms: Optional[List[Callable]] = None,
feature_names: str = ""
):

if filepath is None and obj is None:
raise ValueError("Need filepath or object data")
if filepath is not None:
(self.obj, magic_data), self.fmt = file_loader(filepath)

else:
self.obj = obj
self.per_image_stats = []
self.all_bboxes = []
self.calculate_metrics()

def calculate_metrics(self,):

for obj in self.obj:

annotation_metrics = {}
annotations = obj.get("annotation", None)
if annotations is None:
continue
img_height_pixel = annotations["size"]["height"]
img_width_pixel = annotations["size"]["width"]
img_rect = Rectangle([[0, 0], [img_width_pixel, img_height_pixel]])
annotation_metrics["annotation_count"] = len(
annotations["object"])
annotation_metrics["annotation_density"] = annotation_metrics["annotation_count"]/img_rect.area

# Get individual bbox metrics
annotation_metrics["area_coverage"] = 0

for bb_obj in filter(lambda x: "bndbox" in x,
annotations["object"]):
bounding_box_metric = {}

rect1 = Rectangle([[bb_obj["bndbox"]["xmin"], bb_obj["bndbox"]["ymin"]],
[bb_obj["bndbox"]["xmax"], bb_obj["bndbox"]["ymax"]]],
confidence=bb_obj["confidence"])

bounding_box_metric["confidence"] = rect1.confidence

bounding_box_metric["bb_width"] = rect1.width
bounding_box_metric["bb_height"] = rect1.height
bounding_box_metric["bb_area"] = rect1.area

bounding_box_metric["bb_aspect_ratio"] = rect1.aspect_ratio

bounding_box_metric["dist_to_center"] = np.linalg.norm([
rect1.centroid[0] - (img_width_pixel / 2.0),
rect1.centroid[1] - (img_height_pixel / 2.0)], ord=2)

annotation_metrics["area_coverage"] += rect1.intersection(
img_rect) / (img_rect.area*annotation_metrics["annotation_count"])

self.all_bboxes.append(bounding_box_metric)

# Send object to metrics
self.per_image_stats.append(annotation_metrics)

def __call__(self, profiles):

if not isinstance(profiles, list):
profiles = [profiles]

per_image_dataframe = pd.DataFrame(self.per_image_stats)
bounding_boxes = pd.DataFrame(self.all_bboxes)
for each_profile in profiles:

each_profile.track_dataframe(per_image_dataframe)
each_profile.track_dataframe(bounding_boxes)
2 changes: 1 addition & 1 deletion src/whylogs/core/image_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
except ImportError as e:
ImageType = None
logger.debug(str(e))
logger.debug("Unable to load PIL; install pillow for image support")
logger.debug("Unable to load PIL; install Pillow for image support")

DEFAULT_IMAGE_FEATURES = [Hue(), Saturation(), Brightness()]

Expand Down
6 changes: 5 additions & 1 deletion src/whylogs/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
_file_plugins = [


from .local_dataset import LocalDataset

__ALL__ = [
LocalDataset
]
Loading

0 comments on commit d589157

Please sign in to comment.