Skip to content

Commit

Permalink
wrote functionality and added tests for delete_artifacts (mlflow#3958)
Browse files Browse the repository at this point in the history
Signed-off-by: sklingel <stefan.klingelschmitt@gmail.com>
Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
  • Loading branch information
sklingel authored and harupy committed Jun 7, 2021
1 parent e193479 commit 01a8c7f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
13 changes: 12 additions & 1 deletion mlflow/store/artifact/s3_artifact_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,15 @@ def _download_file(self, remote_file_path, local_path):
s3_client.download_file(bucket, s3_full_path, local_path)

def delete_artifacts(self, artifact_path=None):
raise MlflowException("Not implemented yet")
(bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
if artifact_path:
dest_path = posixpath.join(dest_path, artifact_path)

s3_client = self._get_s3_client()
list_objects = s3_client.list_objects(Bucket=bucket, Prefix=dest_path).get("Contents", [])
for to_delete_obj in list_objects:
file_path = to_delete_obj.get("Key")
self._verify_listed_object_contains_artifact_path_prefix(
listed_object_path=file_path, artifact_path=dest_path
)
s3_client.delete_object(Bucket=bucket, Key=file_path)
29 changes: 29 additions & 0 deletions tests/store/artifact/test_s3_artifact_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,32 @@ def test_get_s3_file_upload_extra_args_invalid_json():

with pytest.raises(ValueError):
S3ArtifactRepository.get_s3_file_upload_extra_args()


def test_delete_artifacts(s3_artifact_root, tmpdir):
subdir_path = str(tmpdir.mkdir("subdir"))
nested_path = os.path.join(subdir_path, "nested")
os.makedirs(nested_path)
path_a = os.path.join(subdir_path, "a.txt")
path_b = os.path.join(subdir_path, "b.tar.gz")
path_c = os.path.join(nested_path, "c.csv")

with open(path_a, "w") as f:
f.write("A")
with tarfile.open(path_b, "w:gz") as f:
f.add(path_a)
with open(path_c, "w") as f:
f.write("col1,col2\n1,3\n2,4\n")

repo = get_artifact_repository(posixpath.join(s3_artifact_root, "some/path"))
repo.log_artifacts(subdir_path)

# confirm that artifacts are present
artifact_file_names = [obj.path for obj in repo.list_artifacts()]
assert "a.txt" in artifact_file_names
assert "b.tar.gz" in artifact_file_names
assert "nested" in artifact_file_names

repo.delete_artifacts()
tmpdir_objects = repo.list_artifacts()
assert not tmpdir_objects

0 comments on commit 01a8c7f

Please sign in to comment.