Skip to content

Commit

Permalink
πŸ› fix local dataset logger
Browse files Browse the repository at this point in the history
  • Loading branch information
lalmei committed Jan 27, 2021
1 parent 2140a66 commit 3cc77d1
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 8 deletions.
17 changes: 10 additions & 7 deletions src/whylogs/app/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
from pathlib import Path
from typing import List, Optional, Dict, Union, Callable, AnyStr
from tqdm import tqdm

import pandas as pd
from typing.io import IO
Expand Down Expand Up @@ -359,21 +360,23 @@ def log_image(self,

track_image(self._profiles[-1]["full_profile"])

def log_local_dataset(self, root_dir):
def log_local_dataset(self, root_dir, folder_feature_name="folder_feature"):
from PIL.Image import Image as ImageType
dst = LocalDataset(root_dir, file_loader=lambda x: x)
for idx in range(len(dst)):
(data, magic_data), fmt = dst[idx]
self.log(feature_name="file_format", data=fmt)
self.log(feature=magic_data)
dst = LocalDataset(root_dir)
for idx in tqdm(range(len(dst))):
((data, magic_data), fmt), segment_value = dst[idx]
self.log(feature_name="file_format", value=fmt)
self.log(feature_name=folder_feature_name, value=segment_value)
self.log(features=magic_data)
if isinstance(data, pd.DataFrame):
self.log_dataframe(data)
elif isinstance(data, Dict) or isinstance(data, list):
self.log_annotation(annotation_data=data)
elif isinstance(data, ImageType):
self.log_image(data)
else:
raise NotImplementedError("File format not supported")
raise NotImplementedError(
"File format not supported {}, format:{}".format(type(data), fmt))

def log_annotation(self, annotation_data):
if not self.tracking_checks():
Expand Down
4 changes: 3 additions & 1 deletion src/whylogs/io/file_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def valid_file(fname: str):
Returns:
bool
"""
if os.path.isdir(fname):
return False
extension = os.path.splitext(fname)[1]
return extension in EXTENSIONS

Expand Down Expand Up @@ -62,7 +64,7 @@ def extension_file(path: str):
return file_extension_given, magicdata
else:
magicdata = {
"byte_match": format_file[0].byte_match,
"byte_match": "{}".format(format_file[0].byte_match),
"mime_type": format_file[0].mime_type,
"name": format_file[0].name,
}
Expand Down
1 change: 1 addition & 0 deletions src/whylogs/io/local_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def _init_dataset(self, ) -> List[Tuple[str, int]]:
self.items = []
for folder_feature_value in sorted(self.folder_feature_dict.keys()):
print(folder_feature_value)

folder_index = self.folder_feature_dict[folder_feature_value]
folder_feature_value = os.path.join(
self.root_folder, folder_feature_value)
Expand Down

0 comments on commit 3cc77d1

Please sign in to comment.