fix(sdk): remove duplicate generate_id functions, replace shortuuid w…

…ith secrets (#4676) fix(sdk): remove duplicate generate_id functions, replace shortuuid with secrets
wandb · Jan 4, 2023 · 4f272d3 · 4f272d3
1 parent d3e1e9e
commit 4f272d3
Show file tree

Hide file tree

Showing 30 changed files with 113 additions and 112 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,6 @@
 Click>=7.0,!=8.0.0  # click 8.0.0 is broken
 GitPython>=1.0.0
 requests>=2.0.0,<3
-shortuuid>=0.5.0
 psutil>=5.0.0
 sentry-sdk>=1.0.0
 docker-pycreds>=0.4.0

diff --git a/tests/standalone_tests/artifact_references.py b/tests/standalone_tests/artifact_references.py
@@ -4,11 +4,12 @@
 from filecmp import dircmp
 
 import wandb
+from wandb.sdk.lib import runid
 
 # These should have bucket versioning enabled
 GCS_BUCKET = "gs://wandb-experiments"
 S3_BUCKET = "s3://kubeml"
-PREFIX = wandb.util.generate_id()
+PREFIX = runid.generate_id()
 GCS_NAME = f"gcs-artifact-{PREFIX}"
 S3_NAME = f"s3-artifact-{PREFIX}"
 GCS_REMOTE = f"{GCS_BUCKET}/artifact-versions/{PREFIX}"

diff --git a/tests/standalone_tests/resuming_and_reinit.py b/tests/standalone_tests/resuming_and_reinit.py
@@ -2,10 +2,11 @@
 import time
 
 import wandb
+from wandb.sdk.lib import runid
 
 
 def main(args):
-    run_id = wandb.util.generate_id()
+    run_id = runid.generate_id()
     try:
         wandb.init(project="resuming", resume="must", id=run_id)
     except wandb.Error:

diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
@@ -48,7 +48,7 @@
 from wandb.sdk.internal.handler import HandleManager
 from wandb.sdk.internal.sender import SendManager
 from wandb.sdk.internal.settings_static import SettingsStatic
-from wandb.sdk.lib import filesystem
+from wandb.sdk.lib import filesystem, runid
 from wandb.sdk.lib.git import GitRepo
 from wandb.sdk.lib.mailbox import Mailbox
 
@@ -413,7 +413,7 @@ def mock_run_fn(use_magic_mock=False, **kwargs: Any) -> "wandb.sdk.wandb_run.Run
         kwargs_settings = kwargs.pop("settings", dict())
         kwargs_settings = {
             **{
-                "run_id": wandb.util.generate_id(),
+                "run_id": runid.generate_id(),
             },
             **kwargs_settings,
         }

diff --git a/tests/unit_tests/lib/test_runid.py b/tests/unit_tests/lib/test_runid.py
@@ -0,0 +1,13 @@
+from wandb.sdk.lib import runid
+
+
+def test_generate_id_is_base36():
+    # Given reasonable randomness assumptions, generating an 1000-digit string should
+    # hit all 36 characters at least once >99.9999999999% of the time.
+    new_id = runid.generate_id(1000)
+    assert len(new_id) == 1000
+    assert set(new_id) == set("0123456789abcdefghijklmnopqrstuvwxyz")
+
+
+def test_generate_id_default_8_chars():
+    assert len(runid.generate_id()) == 8
diff --git a/tests/unit_tests/test_offline_sync.py b/tests/unit_tests/test_offline_sync.py
@@ -2,7 +2,7 @@
 
 import pytest
 from wandb.cli import cli
-from wandb.util import generate_id
+from wandb.sdk.lib.runid import generate_id
 
 
 @pytest.mark.flaky

diff --git a/tests/unit_tests/test_public_api.py b/tests/unit_tests/test_public_api.py
@@ -8,6 +8,7 @@
 import wandb.apis.public
 import wandb.util
 from wandb import Api
+from wandb.sdk.lib import runid
 
 from .test_wandb_sweep import (
     SWEEP_CONFIG_BAYES,
@@ -161,7 +162,7 @@ def test_run_from_tensorboard(runner, relay_server, user, api, copy_asset):
     with relay_server() as relay, runner.isolated_filesystem():
         tb_file_name = "events.out.tfevents.1585769947.cvp"
         copy_asset(tb_file_name)
-        run_id = wandb.util.generate_id()
+        run_id = runid.generate_id()
         api.sync_tensorboard(".", project="test", run_id=run_id)
         uploaded_files = relay.context.get_run_uploaded_files(run_id)
         assert uploaded_files[0].endswith(tb_file_name)

diff --git a/tests/unit_tests/test_redir_full.py b/tests/unit_tests/test_redir_full.py
@@ -10,6 +10,7 @@
 import wandb.util
 from click.testing import CliRunner
 from wandb.cli import cli
+from wandb.sdk.lib import runid
 
 console_modes = ["wrap"]
 if os.name != "nt":
@@ -93,7 +94,7 @@ def test_very_long_output(wandb_init, capfd, console, numpy):
                 settings={
                     "console": console,
                     "mode": "offline",
-                    "run_id": wandb.util.generate_id(),
+                    "run_id": runid.generate_id(),
                 }
             )
             run_dir, run_id = run.dir, run.id

diff --git a/tests/unit_tests_old/conftest.py b/tests/unit_tests_old/conftest.py
@@ -31,7 +31,7 @@
 from wandb.sdk.internal.handler import HandleManager
 from wandb.sdk.internal.internal_api import Api as InternalApi
 from wandb.sdk.internal.sender import SendManager
-from wandb.sdk.lib import filesystem
+from wandb.sdk.lib import filesystem, runid
 from wandb.sdk.lib.git import GitRepo
 from wandb.sdk.lib.mailbox import Mailbox
 from wandb.sdk.lib.module import unset_globals
@@ -249,7 +249,7 @@ def test_settings(test_dir, mocker, live_mock_server):
         host="test",
         project="test",
         root_dir=test_dir,
-        run_id=wandb.util.generate_id(),
+        run_id=runid.generate_id(),
         save_code=False,
     )
     settings._set_run_start_time()

diff --git a/tests/unit_tests_old/tests_launch/test_launch.py b/tests/unit_tests_old/tests_launch/test_launch.py
@@ -9,7 +9,6 @@
 import wandb
 import wandb.sdk.launch._project_spec as _project_spec
 import wandb.sdk.launch.launch as launch
-import wandb.util as util
 import yaml
 from wandb.apis import PublicApi
 from wandb.errors import LaunchError
@@ -21,6 +20,7 @@
     PROJECT_DOCKER_ARGS,
     PROJECT_SYNCHRONOUS,
 )
+from wandb.sdk.lib import runid
 
 from tests.unit_tests_old.utils import fixture_open, notebook_path
 
@@ -832,7 +832,7 @@ def test_launch_full_build_new_image(
     api = wandb.sdk.internal.internal_api.Api(
         default_settings=test_settings, load_settings=False
     )
-    random_id = util.generate_id()
+    random_id = runid.generate_id()
     run = launch.run(
         api=api,
         uri="https://wandb.ai/mock_server_entity/test/runs/1",

diff --git a/wandb/apis/public.py b/wandb/apis/public.py
@@ -52,7 +52,7 @@
 from wandb.sdk.data_types._dtypes import InvalidType, Type, TypeRegistry
 from wandb.sdk.interface import artifacts
 from wandb.sdk.launch.utils import LAUNCH_DEFAULT_PROJECT, _fetch_git_repo, apply_patch
-from wandb.sdk.lib import filesystem, ipython, retry
+from wandb.sdk.lib import filesystem, ipython, retry, runid
 from wandb.sdk.lib.hashutil import b64_to_hex_id, hex_to_b64_id, md5_file_b64
 
 if TYPE_CHECKING:
@@ -485,7 +485,7 @@ def sync_tensorboard(self, root_dir, run_id=None, project=None, entity=None):
         """Sync a local directory containing tfevent files to wandb"""
         from wandb.sync import SyncManager  # noqa: F401  TODO: circular import madness
 
-        run_id = run_id or util.generate_id()
+        run_id = run_id or runid.generate_id()
         project = project or self.settings.get("project") or "uncategorized"
         entity = entity or self.default_entity
         # TODO: pipe through log_path to inform the user how to debug
@@ -1752,7 +1752,7 @@ def name(self, new_name):
     @classmethod
     def create(cls, api, run_id=None, project=None, entity=None):
         """Create a run for the given project"""
-        run_id = run_id or util.generate_id()
+        run_id = run_id or runid.generate_id()
         project = project or api.settings.get("project") or "uncategorized"
         mutation = gql(
             """

diff --git a/wandb/data_types.py b/wandb/data_types.py
@@ -47,6 +47,7 @@
 from .sdk.data_types.plotly import Plotly
 from .sdk.data_types.saved_model import _SavedModel
 from .sdk.data_types.video import Video
+from .sdk.lib import runid
 
 # Note: we are importing everything from the sdk/data_types to maintain a namespace for now.
 # Once we fully type this file and move it all into sdk, then we will need to clean up the
@@ -521,7 +522,7 @@ def bind_to_run(self, *args, **kwargs):
         # this code path will be ultimately removed. The 10k limit warning confuses
         # users given that we publicly say 200k is the limit.
         data = self._to_table_json(warn=False)
-        tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".table.json")
+        tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".table.json")
         data = _numpy_arrays_to_lists(data)
         with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
             util.json_dump_safer(data, fp)
@@ -655,9 +656,7 @@ def to_json(self, run_or_artifact):
                         "numpy",
                         required="Serializing numpy requires numpy to be installed",
                     )
-                    file_name = "{}_{}.npz".format(
-                        str(col_name), str(util.generate_id())
-                    )
+                    file_name = f"{str(col_name)}_{runid.generate_id()}.npz"
                     npz_file_name = os.path.join(MEDIA_TMP.name, file_name)
                     np.savez_compressed(
                         npz_file_name,
@@ -1063,7 +1062,7 @@ def __init__(self, data_or_path, sample_rate=None, caption=None):
                 required='Raw audio requires the soundfile package. To get it, run "pip install soundfile"',
             )
 
-            tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".wav")
+            tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".wav")
             soundfile.write(tmp_path, data_or_path, sample_rate)
             self._duration = len(data_or_path) / float(sample_rate)
 
@@ -1338,7 +1337,7 @@ def __init__(self, data_or_path):
             if "references" in b_json["roots"]:
                 b_json["roots"]["references"].sort(key=lambda x: x["id"])
 
-            tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".bokeh.json")
+            tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".bokeh.json")
             with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
                 util.json_dump_safer(b_json, fp)
             self._set_file(tmp_path, is_tmp=True, extension=".bokeh.json")
@@ -1419,7 +1418,7 @@ def _to_graph_json(self, run=None):
 
     def bind_to_run(self, *args, **kwargs):
         data = self._to_graph_json()
-        tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ".graph.json")
+        tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".graph.json")
         data = _numpy_arrays_to_lists(data)
         with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
             util.json_dump_safer(data, fp)
@@ -1985,7 +1984,7 @@ def from_obj(cls, py_obj):
     def to_json(self, artifact=None):
         res = super().to_json(artifact)
         if artifact is not None:
-            table_name = f"media/tables/t_{util.generate_id()}"
+            table_name = f"media/tables/t_{runid.generate_id()}"
             entry = artifact.add(self.params["table"], table_name)
             res["params"]["table"] = entry.path
         else:
@@ -2045,7 +2044,7 @@ def from_obj(cls, py_obj):
     def to_json(self, artifact=None):
         res = super().to_json(artifact)
         if artifact is not None:
-            table_name = f"media/tables/t_{util.generate_id()}"
+            table_name = f"media/tables/t_{runid.generate_id()}"
             entry = artifact.add(self.params["table"], table_name)
             res["params"]["table"] = entry.path
         else:

diff --git a/wandb/filesync/step_checksum.py b/wandb/filesync/step_checksum.py
@@ -6,9 +6,8 @@
 import threading
 from typing import TYPE_CHECKING, NamedTuple, Optional, Union, cast
 
-from wandb import util
 from wandb.filesync import dir_watcher, step_upload
-from wandb.sdk.lib import filesystem
+from wandb.sdk.lib import filesystem, runid
 
 if TYPE_CHECKING:
     import tempfile
@@ -73,7 +72,7 @@ def _thread_body(self) -> None:
                 if req.copy:
                     path = os.path.join(
                         self._tempdir.name,
-                        f"{util.generate_id()}-{req.save_name}",
+                        f"{runid.generate_id()}-{req.save_name}",
                     )
                     filesystem.mkdir_exists_ok(os.path.dirname(path))
                     try:

diff --git a/wandb/sdk/data_types/base_types/json_metadata.py b/wandb/sdk/data_types/base_types/json_metadata.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING, Type, Union
 
 from wandb import util
+from wandb.sdk.lib import runid
 
 from .._private import MEDIA_TMP
 from .media import Media
@@ -31,7 +32,7 @@ def __init__(self, val: dict) -> None:
         self._val = val
 
         ext = "." + self.type_name() + ".json"
-        tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ext)
+        tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ext)
         with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
             util.json_dump_uncompressed(self._val, fp)
         self._set_file(tmp_path, is_tmp=True, extension=ext)

diff --git a/wandb/sdk/data_types/helper_types/image_mask.py b/wandb/sdk/data_types/helper_types/image_mask.py
@@ -4,6 +4,7 @@
 
 import wandb
 from wandb import util
+from wandb.sdk.lib import runid
 
 from .._private import MEDIA_TMP
 from ..base_types.media import Media
@@ -53,30 +54,21 @@ class ImageMask(Media):
         ground_truth_mask[:25, 25:] = 2
         ground_truth_mask[25:, 25:] = 3
 
-        class_labels = {
-            0: "person",
-            1: "tree",
-            2: "car",
-            3: "road"
-        }
-
-        masked_image = wandb.Image(image, masks={
-            "predictions": {
-                "mask_data": predicted_mask,
-                "class_labels": class_labels
+        class_labels = {0: "person", 1: "tree", 2: "car", 3: "road"}
+
+        masked_image = wandb.Image(
+            image,
+            masks={
+                "predictions": {"mask_data": predicted_mask, "class_labels": class_labels},
+                "ground_truth": {"mask_data": ground_truth_mask, "class_labels": class_labels},
             },
-            "ground_truth": {
-                "mask_data": ground_truth_mask,
-                "class_labels": class_labels
-            }
-        })
-        wandb.log({"img_with_masks" : masked_image})
+        )
+        wandb.log({"img_with_masks": masked_image})
         ```
 
         ### Log a masked image inside a Table
         <!--yeadoc-test:log-image-mask-table-->
         ```python
-
         import numpy as np
         import wandb
 
@@ -95,30 +87,25 @@ class ImageMask(Media):
         ground_truth_mask[:25, 25:] = 2
         ground_truth_mask[25:, 25:] = 3
 
-        class_labels = {
-            0: "person",
-            1: "tree",
-            2: "car",
-            3: "road"
-        }
-
-        class_set = wandb.Classes([
-            {"name" : "person", "id" : 0},
-            {"name" : "tree", "id" : 1},
-            {"name" : "car", "id" : 2},
-            {"name" : "road", "id" : 3}
-        ])
-
-        masked_image = wandb.Image(image, masks={
-            "predictions": {
-                "mask_data": predicted_mask,
-                "class_labels": class_labels
+        class_labels = {0: "person", 1: "tree", 2: "car", 3: "road"}
+
+        class_set = wandb.Classes(
+            [
+                {"name": "person", "id": 0},
+                {"name": "tree", "id": 1},
+                {"name": "car", "id": 2},
+                {"name": "road", "id": 3},
+            ]
+        )
+
+        masked_image = wandb.Image(
+            image,
+            masks={
+                "predictions": {"mask_data": predicted_mask, "class_labels": class_labels},
+                "ground_truth": {"mask_data": ground_truth_mask, "class_labels": class_labels},
             },
-            "ground_truth": {
-                "mask_data": ground_truth_mask,
-                "class_labels": class_labels
-            }
-        }, classes=class_set)
+            classes=class_set,
+        )
 
         table = wandb.Table(columns=["image"])
         table.add_data(masked_image)
@@ -160,7 +147,7 @@ def __init__(self, val: dict, key: str) -> None:
             self._key = key
 
             ext = "." + self.type_name() + ".png"
-            tmp_path = os.path.join(MEDIA_TMP.name, util.generate_id() + ext)
+            tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ext)
 
             pil_image = util.get_module(
                 "PIL.Image",