Skip to content

Commit

Permalink
Merge pull request #140 from whylabs/remove_s3fs_refs
Browse files Browse the repository at this point in the history
Remove s3fs refs and use smart open instead
  • Loading branch information
lalmei committed Jan 31, 2021
2 parents b526c6c + a228783 commit 8f68166
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 24 deletions.
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ requests==2.25.0
requests-oauthlib==1.3.0
requests-toolbelt==0.9.1
rfc3986==1.4.0
s3fs==0.4.2
s3transfer==0.3.3
setuptools-black==0.1.5
six==1.15.0
Expand All @@ -116,3 +115,4 @@ websocket-client==0.57.0
Werkzeug==1.0.1
whylabs-datasketches==2.0.0b7
zipp==3.4.0
smart-open==4.1.2
8 changes: 4 additions & 4 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
boto3==1.16.28
botocore==1.19.28
boto3==1.16.63
botocore==1.19.63
certifi==2020.11.8
click==8.0.0a1
fsspec==0.8.4
Expand All @@ -12,7 +12,6 @@ protobuf==4.0.0rc2
python-dateutil==2.8.1
pytz==2020.4
PyYAML==5.3.1
s3fs==0.4.2
s3transfer==0.3.3
six==1.15.0
urllib3==1.26.2
Expand All @@ -24,4 +23,5 @@ scikit-learn==0.24.0
Pillow==8.1.0
moto==1.3.16
pytest-cov>=2.11.1
mlflow==1.13.1
mlflow==1.13.1
smart-open==4.1.2
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
boto3==1.16.28
botocore==1.19.28
boto3==1.16.63
botocore==1.19.63
certifi==2020.11.8
click==8.0.0a1
fsspec==0.8.4
Expand All @@ -11,8 +11,8 @@ protobuf==3.14.0
python-dateutil==2.8.1
pytz==2020.4
PyYAML==5.3.1
s3fs==0.4.2
s3transfer==0.3.3
six==1.15.0
urllib3==1.26.2
whylabs-datasketches==2.0.0b7
smart-open==4.1.2
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ install_requires =
whylabs-datasketches>=2.0.0b7
boto3>=1.14.1
botocore>=1.17.44
smart-open==4.1.2
# very important: s3fs pulls in aiobotocore, which locks boto3
s3fs==0.4.2
setup_requires =
pytest-runner
setuptools
Expand Down
16 changes: 8 additions & 8 deletions src/whylogs/app/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from string import Template
from typing import List

import s3fs
from smart_open import open

from google.protobuf.message import Message

from whylogs.app.output_formats import OutputFormat
Expand Down Expand Up @@ -286,7 +287,6 @@ def __init__(
filename_template: str = None,
):
super().__init__(output_path, formats, path_template, filename_template)
self.fs = s3fs.S3FileSystem(anon=False)

def write(self, profile: DatasetProfile, rotation_suffix: str = None):
"""
Expand Down Expand Up @@ -317,7 +317,7 @@ def _write_json(self, profile: DatasetProfile):
)

summary = profile.to_summary()
with self.fs.open(output_file, "wt") as f:
with open(output_file, "wt") as f:
f.write(message_to_json(summary))

def _write_flat(self, profile: DatasetProfile, indent: int = 4):
Expand All @@ -337,7 +337,7 @@ def _write_flat(self, profile: DatasetProfile, indent: int = 4):
self.output_path, self.path_suffix(profile), "flat_table"
)
summary_df = get_dataset_frame(summary)
with self.fs.open(
with open(
os.path.join(flat_table_path, self.file_name(
profile, ".csv")), "wt"
) as f:
Expand All @@ -348,7 +348,7 @@ def _write_flat(self, profile: DatasetProfile, indent: int = 4):
frequent_numbers_path = os.path.join(
self.output_path, self.path_suffix(profile), "freq_numbers"
)
with self.fs.open(
with open(
os.path.join(frequent_numbers_path, json_flat_file), "wt"
) as f:
hist = flatten_dataset_histograms(summary)
Expand All @@ -357,7 +357,7 @@ def _write_flat(self, profile: DatasetProfile, indent: int = 4):
frequent_strings_path = os.path.join(
self.output_path, self.path_suffix(profile), "frequent_strings"
)
with self.fs.open(
with open(
os.path.join(frequent_strings_path, json_flat_file), "wt"
) as f:
frequent_strings = flatten_dataset_frequent_strings(summary)
Expand All @@ -367,7 +367,7 @@ def _write_flat(self, profile: DatasetProfile, indent: int = 4):
self.output_path, self.path_suffix(profile), "histogram"
)

with self.fs.open(os.path.join(histogram_path, json_flat_file), "wt") as f:
with open(os.path.join(histogram_path, json_flat_file), "wt") as f:
histogram = flatten_dataset_histograms(summary)
json.dump(histogram, f, indent=indent)

Expand All @@ -380,7 +380,7 @@ def _write_protobuf(self, profile: DatasetProfile):

protobuf: Message = profile.to_protobuf()

with self.fs.open(
with open(
os.path.join(path, self.file_name(profile, ".bin")), "wb"
) as f:
f.write(protobuf.SerializeToString())
Expand Down
21 changes: 14 additions & 7 deletions tests/unit/app/test_writers.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@


import os
from whylogs.app.session import session_from_config
from whylogs.app.config import load_config
import boto3

import pytest
import boto3
from moto.s3.responses import DEFAULT_REGION_NAME
from moto import mock_s3
import pytest

from whylogs.app import WriterConfig
from whylogs.app.session import session_from_config
from whylogs.app.config import load_config
from whylogs.app.writers import writer_from_config

BUCKET = "mocked_bucket"
MY_PREFIX = "mock_folder"
# @pytest.fixture(autouse=True)

object_keys = ["dataset_test_s3/dataset_summary/flat_table/dataset_summary.csv",
"dataset_test_s3/dataset_summary/freq_numbers/dataset_summary.json",
Expand Down Expand Up @@ -70,3 +70,10 @@ def test_s3_writer(df_lending_club, moto_boto, s3_all_config_path):

for idx, each_objc in enumerate(objects["Contents"]):
assert each_objc["Key"] == object_keys[idx]


def test_non_valid_type(tmpdir):

config = WriterConfig(type="blob", formats=["json"], output_path=tmpdir)
with pytest.raises(ValueError):
writer = writer_from_config(config)

0 comments on commit 8f68166

Please sign in to comment.