Skip to content

Commit

Permalink
fix(sdk): remove duplicate generate_id functions, replace shortuuid w…
Browse files Browse the repository at this point in the history
…ith secrets (#4676)

fix(sdk): remove duplicate generate_id functions, replace shortuuid with secrets
  • Loading branch information
moredatarequired committed Jan 4, 2023
1 parent d3e1e9e commit 4f272d3
Show file tree
Hide file tree
Showing 30 changed files with 113 additions and 112 deletions.
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
Click>=7.0,!=8.0.0 # click 8.0.0 is broken
GitPython>=1.0.0
requests>=2.0.0,<3
shortuuid>=0.5.0
psutil>=5.0.0
sentry-sdk>=1.0.0
docker-pycreds>=0.4.0
Expand Down
3 changes: 2 additions & 1 deletion tests/standalone_tests/artifact_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
from filecmp import dircmp

import wandb
from wandb.sdk.lib import runid

# These should have bucket versioning enabled
GCS_BUCKET = "gs://wandb-experiments"
S3_BUCKET = "s3://kubeml"
PREFIX = wandb.util.generate_id()
PREFIX = runid.generate_id()
GCS_NAME = f"gcs-artifact-{PREFIX}"
S3_NAME = f"s3-artifact-{PREFIX}"
GCS_REMOTE = f"{GCS_BUCKET}/artifact-versions/{PREFIX}"
Expand Down
3 changes: 2 additions & 1 deletion tests/standalone_tests/resuming_and_reinit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import time

import wandb
from wandb.sdk.lib import runid


def main(args):
run_id = wandb.util.generate_id()
run_id = runid.generate_id()
try:
wandb.init(project="resuming", resume="must", id=run_id)
except wandb.Error:
Expand Down
4 changes: 2 additions & 2 deletions tests/unit_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from wandb.sdk.internal.handler import HandleManager
from wandb.sdk.internal.sender import SendManager
from wandb.sdk.internal.settings_static import SettingsStatic
from wandb.sdk.lib import filesystem
from wandb.sdk.lib import filesystem, runid
from wandb.sdk.lib.git import GitRepo
from wandb.sdk.lib.mailbox import Mailbox

Expand Down Expand Up @@ -413,7 +413,7 @@ def mock_run_fn(use_magic_mock=False, **kwargs: Any) -> "wandb.sdk.wandb_run.Run
kwargs_settings = kwargs.pop("settings", dict())
kwargs_settings = {
**{
"run_id": wandb.util.generate_id(),
"run_id": runid.generate_id(),
},
**kwargs_settings,
}
Expand Down
13 changes: 13 additions & 0 deletions tests/unit_tests/lib/test_runid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from wandb.sdk.lib import runid


def test_generate_id_is_base36():
# Given reasonable randomness assumptions, generating an 1000-digit string should
# hit all 36 characters at least once >99.9999999999% of the time.
new_id = runid.generate_id(1000)
assert len(new_id) == 1000
assert set(new_id) == set("0123456789abcdefghijklmnopqrstuvwxyz")


def test_generate_id_default_8_chars():
assert len(runid.generate_id()) == 8
2 changes: 1 addition & 1 deletion tests/unit_tests/test_offline_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest
from wandb.cli import cli
from wandb.util import generate_id
from wandb.sdk.lib.runid import generate_id


@pytest.mark.flaky
Expand Down
3 changes: 2 additions & 1 deletion tests/unit_tests/test_public_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import wandb.apis.public
import wandb.util
from wandb import Api
from wandb.sdk.lib import runid

from .test_wandb_sweep import (
SWEEP_CONFIG_BAYES,
Expand Down Expand Up @@ -161,7 +162,7 @@ def test_run_from_tensorboard(runner, relay_server, user, api, copy_asset):
with relay_server() as relay, runner.isolated_filesystem():
tb_file_name = "events.out.tfevents.1585769947.cvp"
copy_asset(tb_file_name)
run_id = wandb.util.generate_id()
run_id = runid.generate_id()
api.sync_tensorboard(".", project="test", run_id=run_id)
uploaded_files = relay.context.get_run_uploaded_files(run_id)
assert uploaded_files[0].endswith(tb_file_name)
Expand Down
3 changes: 2 additions & 1 deletion tests/unit_tests/test_redir_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import wandb.util
from click.testing import CliRunner
from wandb.cli import cli
from wandb.sdk.lib import runid

console_modes = ["wrap"]
if os.name != "nt":
Expand Down Expand Up @@ -93,7 +94,7 @@ def test_very_long_output(wandb_init, capfd, console, numpy):
settings={
"console": console,
"mode": "offline",
"run_id": wandb.util.generate_id(),
"run_id": runid.generate_id(),
}
)
run_dir, run_id = run.dir, run.id
Expand Down
4 changes: 2 additions & 2 deletions tests/unit_tests_old/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from wandb.sdk.internal.handler import HandleManager
from wandb.sdk.internal.internal_api import Api as InternalApi
from wandb.sdk.internal.sender import SendManager
from wandb.sdk.lib import filesystem
from wandb.sdk.lib import filesystem, runid
from wandb.sdk.lib.git import GitRepo
from wandb.sdk.lib.mailbox import Mailbox
from wandb.sdk.lib.module import unset_globals
Expand Down Expand Up @@ -249,7 +249,7 @@ def test_settings(test_dir, mocker, live_mock_server):
host="test",
project="test",
root_dir=test_dir,
run_id=wandb.util.generate_id(),
run_id=runid.generate_id(),
save_code=False,
)
settings._set_run_start_time()
Expand Down
4 changes: 2 additions & 2 deletions tests/unit_tests_old/tests_launch/test_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import wandb
import wandb.sdk.launch._project_spec as _project_spec
import wandb.sdk.launch.launch as launch
import wandb.util as util
import yaml
from wandb.apis import PublicApi
from wandb.errors import LaunchError
Expand All @@ -21,6 +20,7 @@
PROJECT_DOCKER_ARGS,
PROJECT_SYNCHRONOUS,
)
from wandb.sdk.lib import runid

from tests.unit_tests_old.utils import fixture_open, notebook_path

Expand Down Expand Up @@ -832,7 +832,7 @@ def test_launch_full_build_new_image(
api = wandb.sdk.internal.internal_api.Api(
default_settings=test_settings, load_settings=False
)
random_id = util.generate_id()
random_id = runid.generate_id()
run = launch.run(
api=api,
uri="https://wandb.ai/mock_server_entity/test/runs/1",
Expand Down
6 changes: 3 additions & 3 deletions wandb/apis/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from wandb.sdk.data_types._dtypes import InvalidType, Type, TypeRegistry
from wandb.sdk.interface import artifacts
from wandb.sdk.launch.utils import LAUNCH_DEFAULT_PROJECT, _fetch_git_repo, apply_patch
from wandb.sdk.lib import filesystem, ipython, retry
from wandb.sdk.lib import filesystem, ipython, retry, runid
from wandb.sdk.lib.hashutil import b64_to_hex_id, hex_to_b64_id, md5_file_b64

if TYPE_CHECKING:
Expand Down Expand Up @@ -485,7 +485,7 @@ def sync_tensorboard(self, root_dir, run_id=None, project=None, entity=None):
"""Sync a local directory containing tfevent files to wandb"""
from wandb.sync import SyncManager # noqa: F401 TODO: circular import madness

run_id = run_id or util.generate_id()
run_id = run_id or runid.generate_id()
project = project or self.settings.get("project") or "uncategorized"
entity = entity or self.default_entity
# TODO: pipe through log_path to inform the user how to debug
Expand Down Expand Up @@ -1752,7 +1752,7 @@ def name(self, new_name):
@classmethod
def create(cls, api, run_id=None, project=None, entity=None):
"""Create a run for the given project"""
run_id = run_id or util.generate_id()
run_id = run_id or runid.generate_id()
project = project or api.settings.get("project") or "uncategorized"
mutation = gql(
"""
Expand Down
17 changes: 8 additions & 9 deletions wandb/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from .sdk.data_types.plotly import Plotly
from .sdk.data_types.saved_model import _SavedModel
from .sdk.data_types.video import Video
from .sdk.lib import runid

# Note: we are importing everything from the sdk/data_types to maintain a namespace for now.
# Once we fully type this file and move it all into sdk, then we will need to clean up the
Expand Down Expand Up @@ -521,7 +522,7 @@ def bind_to_run(self, *args, **kwargs):
# this code path will be ultimately removed. The 10k limit warning confuses
# users given that we publicly say 200k is the limit.
data = self._to_table_json(warn=False)
tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".table.json")
tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".table.json")
data = _numpy_arrays_to_lists(data)
with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
util.json_dump_safer(data, fp)
Expand Down Expand Up @@ -655,9 +656,7 @@ def to_json(self, run_or_artifact):
"numpy",
required="Serializing numpy requires numpy to be installed",
)
file_name = "{}_{}.npz".format(
str(col_name), str(util.generate_id())
)
file_name = f"{str(col_name)}_{runid.generate_id()}.npz"
npz_file_name = os.path.join(MEDIA_TMP.name, file_name)
np.savez_compressed(
npz_file_name,
Expand Down Expand Up @@ -1063,7 +1062,7 @@ def __init__(self, data_or_path, sample_rate=None, caption=None):
required='Raw audio requires the soundfile package. To get it, run "pip install soundfile"',
)

tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".wav")
tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".wav")
soundfile.write(tmp_path, data_or_path, sample_rate)
self._duration = len(data_or_path) / float(sample_rate)

Expand Down Expand Up @@ -1338,7 +1337,7 @@ def __init__(self, data_or_path):
if "references" in b_json["roots"]:
b_json["roots"]["references"].sort(key=lambda x: x["id"])

tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".bokeh.json")
tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".bokeh.json")
with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
util.json_dump_safer(b_json, fp)
self._set_file(tmp_path, is_tmp=True, extension=".bokeh.json")
Expand Down Expand Up @@ -1419,7 +1418,7 @@ def _to_graph_json(self, run=None):

def bind_to_run(self, *args, **kwargs):
data = self._to_graph_json()
tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".graph.json")
tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".graph.json")
data = _numpy_arrays_to_lists(data)
with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
util.json_dump_safer(data, fp)
Expand Down Expand Up @@ -1985,7 +1984,7 @@ def from_obj(cls, py_obj):
def to_json(self, artifact=None):
res = super().to_json(artifact)
if artifact is not None:
table_name = f"media/tables/t_{util.generate_id()}"
table_name = f"media/tables/t_{runid.generate_id()}"
entry = artifact.add(self.params["table"], table_name)
res["params"]["table"] = entry.path
else:
Expand Down Expand Up @@ -2045,7 +2044,7 @@ def from_obj(cls, py_obj):
def to_json(self, artifact=None):
res = super().to_json(artifact)
if artifact is not None:
table_name = f"media/tables/t_{util.generate_id()}"
table_name = f"media/tables/t_{runid.generate_id()}"
entry = artifact.add(self.params["table"], table_name)
res["params"]["table"] = entry.path
else:
Expand Down
5 changes: 2 additions & 3 deletions wandb/filesync/step_checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import threading
from typing import TYPE_CHECKING, NamedTuple, Optional, Union, cast

from wandb import util
from wandb.filesync import dir_watcher, step_upload
from wandb.sdk.lib import filesystem
from wandb.sdk.lib import filesystem, runid

if TYPE_CHECKING:
import tempfile
Expand Down Expand Up @@ -73,7 +72,7 @@ def _thread_body(self) -> None:
if req.copy:
path = os.path.join(
self._tempdir.name,
f"{util.generate_id()}-{req.save_name}",
f"{runid.generate_id()}-{req.save_name}",
)
filesystem.mkdir_exists_ok(os.path.dirname(path))
try:
Expand Down
3 changes: 2 additions & 1 deletion wandb/sdk/data_types/base_types/json_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING, Type, Union

from wandb import util
from wandb.sdk.lib import runid

from .._private import MEDIA_TMP
from .media import Media
Expand Down Expand Up @@ -31,7 +32,7 @@ def __init__(self, val: dict) -> None:
self._val = val

ext = "." + self.type_name() + ".json"
tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ext)
tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ext)
with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
util.json_dump_uncompressed(self._val, fp)
self._set_file(tmp_path, is_tmp=True, extension=ext)
Expand Down
71 changes: 29 additions & 42 deletions wandb/sdk/data_types/helper_types/image_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import wandb
from wandb import util
from wandb.sdk.lib import runid

from .._private import MEDIA_TMP
from ..base_types.media import Media
Expand Down Expand Up @@ -53,30 +54,21 @@ class ImageMask(Media):
ground_truth_mask[:25, 25:] = 2
ground_truth_mask[25:, 25:] = 3
class_labels = {
0: "person",
1: "tree",
2: "car",
3: "road"
}
masked_image = wandb.Image(image, masks={
"predictions": {
"mask_data": predicted_mask,
"class_labels": class_labels
class_labels = {0: "person", 1: "tree", 2: "car", 3: "road"}
masked_image = wandb.Image(
image,
masks={
"predictions": {"mask_data": predicted_mask, "class_labels": class_labels},
"ground_truth": {"mask_data": ground_truth_mask, "class_labels": class_labels},
},
"ground_truth": {
"mask_data": ground_truth_mask,
"class_labels": class_labels
}
})
wandb.log({"img_with_masks" : masked_image})
)
wandb.log({"img_with_masks": masked_image})
```
### Log a masked image inside a Table
<!--yeadoc-test:log-image-mask-table-->
```python
import numpy as np
import wandb
Expand All @@ -95,30 +87,25 @@ class ImageMask(Media):
ground_truth_mask[:25, 25:] = 2
ground_truth_mask[25:, 25:] = 3
class_labels = {
0: "person",
1: "tree",
2: "car",
3: "road"
}
class_set = wandb.Classes([
{"name" : "person", "id" : 0},
{"name" : "tree", "id" : 1},
{"name" : "car", "id" : 2},
{"name" : "road", "id" : 3}
])
masked_image = wandb.Image(image, masks={
"predictions": {
"mask_data": predicted_mask,
"class_labels": class_labels
class_labels = {0: "person", 1: "tree", 2: "car", 3: "road"}
class_set = wandb.Classes(
[
{"name": "person", "id": 0},
{"name": "tree", "id": 1},
{"name": "car", "id": 2},
{"name": "road", "id": 3},
]
)
masked_image = wandb.Image(
image,
masks={
"predictions": {"mask_data": predicted_mask, "class_labels": class_labels},
"ground_truth": {"mask_data": ground_truth_mask, "class_labels": class_labels},
},
"ground_truth": {
"mask_data": ground_truth_mask,
"class_labels": class_labels
}
}, classes=class_set)
classes=class_set,
)
table = wandb.Table(columns=["image"])
table.add_data(masked_image)
Expand Down Expand Up @@ -160,7 +147,7 @@ def __init__(self, val: dict, key: str) -> None:
self._key = key

ext = "." + self.type_name() + ".png"
tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ext)
tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ext)

pil_image = util.get_module(
"PIL.Image",
Expand Down

0 comments on commit 4f272d3

Please sign in to comment.