From 3cc77d189de6f9273c6e172e229e69cf2a3e0183 Mon Sep 17 00:00:00 2001 From: "Leandro G. Almeida" Date: Tue, 26 Jan 2021 18:36:57 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix=20local=20dataset=20logger?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/whylogs/app/logger.py | 17 ++++++++++------- src/whylogs/io/file_loader.py | 4 +++- src/whylogs/io/local_dataset.py | 1 + 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/whylogs/app/logger.py b/src/whylogs/app/logger.py index e4bcea47a2..f7a417ca97 100644 --- a/src/whylogs/app/logger.py +++ b/src/whylogs/app/logger.py @@ -6,6 +6,7 @@ import json from pathlib import Path from typing import List, Optional, Dict, Union, Callable, AnyStr +from tqdm import tqdm import pandas as pd from typing.io import IO @@ -359,13 +360,14 @@ def log_image(self, track_image(self._profiles[-1]["full_profile"]) - def log_local_dataset(self, root_dir): + def log_local_dataset(self, root_dir, folder_feature_name="folder_feature"): from PIL.Image import Image as ImageType - dst = LocalDataset(root_dir, file_loader=lambda x: x) - for idx in range(len(dst)): - (data, magic_data), fmt = dst[idx] - self.log(feature_name="file_format", data=fmt) - self.log(feature=magic_data) + dst = LocalDataset(root_dir) + for idx in tqdm(range(len(dst))): + ((data, magic_data), fmt), segment_value = dst[idx] + self.log(feature_name="file_format", value=fmt) + self.log(feature_name=folder_feature_name, value=segment_value) + self.log(features=magic_data) if isinstance(data, pd.DataFrame): self.log_dataframe(data) elif isinstance(data, Dict) or isinstance(data, list): @@ -373,7 +375,8 @@ def log_local_dataset(self, root_dir): elif isinstance(data, ImageType): self.log_image(data) else: - raise NotImplementedError("File format not supported") + raise NotImplementedError( + "File format not supported {}, format:{}".format(type(data), fmt)) def log_annotation(self, annotation_data): if not self.tracking_checks(): diff --git a/src/whylogs/io/file_loader.py b/src/whylogs/io/file_loader.py index aca1dd889f..2db5b9fc71 100644 --- a/src/whylogs/io/file_loader.py +++ b/src/whylogs/io/file_loader.py @@ -26,6 +26,8 @@ def valid_file(fname: str): Returns: bool """ + if os.path.isdir(fname): + return False extension = os.path.splitext(fname)[1] return extension in EXTENSIONS @@ -62,7 +64,7 @@ def extension_file(path: str): return file_extension_given, magicdata else: magicdata = { - "byte_match": format_file[0].byte_match, + "byte_match": "{}".format(format_file[0].byte_match), "mime_type": format_file[0].mime_type, "name": format_file[0].name, } diff --git a/src/whylogs/io/local_dataset.py b/src/whylogs/io/local_dataset.py index 989a4d663b..01451d8585 100644 --- a/src/whylogs/io/local_dataset.py +++ b/src/whylogs/io/local_dataset.py @@ -60,6 +60,7 @@ def _init_dataset(self, ) -> List[Tuple[str, int]]: self.items = [] for folder_feature_value in sorted(self.folder_feature_dict.keys()): print(folder_feature_value) + folder_index = self.folder_feature_dict[folder_feature_value] folder_feature_value = os.path.join( self.root_folder, folder_feature_value)