Skip to content

Commit

Permalink
[WB-3722] diagose/fix unittest flakiness (#1685)
Browse files Browse the repository at this point in the history
Added circleci-tool to help trigger workflows (down to individual tests) to find flaky tests
Add --flake-finder to help find flaky tests
Fix sender.py fixture to properly shutdown BackendSender (fixes some flake)
Add debug info when live_mock_server fails to start
Give live_mock_server longer to start (might have been causing flake - time will tell)
Added junit.xml so circleci knows about our tests and can show cleaner test failures and possibly test history insights
  • Loading branch information
raubitsj committed Jan 11, 2021
1 parent 7bb1c3b commit bbdeea6
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 25 deletions.
104 changes: 91 additions & 13 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,77 @@ version: 2.1
orbs:
win: circleci/windows@2.4.0

parameters:
manual:
type: boolean
default: false
manual_test:
type: boolean
default: false
manual_win:
type: boolean
default: false
manual_mac:
type: boolean
default: false
manual_test_image:
type: string
default: "python:3.7"
manual_test_toxenv:
type: string
default: "py37"
manual_win_toxenv:
type: string
default: "py37"
manual_mac_toxenv:
type: string
default: "py37"
manual_test_name:
type: string
default: "Python 3.7 [MANUAL]"
manual_win_name:
type: string
default: "Windows (Python 3.7) [MANUAL]"
manual_mac_name:
type: string
default: "MacOS (Python 3.7) [MANUAL]"

commands:
save-tox-cache:
description: "Save tox environment to cache"
steps:
- save_cache:
paths:
- ./.tox
key: v0.6-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-{{ checksum "setup.py" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
- unless:
condition: << pipeline.parameters.manual >>
steps:
- save_cache:
paths:
- ./.tox
key: v0.7-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-{{ checksum "setup.py" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
restore-tox-cache:
description: "Restore tox environment from cache"
steps:
- restore_cache:
keys:
- v0.6-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-{{ checksum "setup.py" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
- v0.6-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-
- v0.6-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-
- v0.6-toxenv-master-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-{{ checksum "setup.py" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
- v0.6-toxenv-master-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-
- v0.6-toxenv-master-{{ .Environment.CIRCLE_JOB }}-
- v0.7-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-{{ checksum "setup.py" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
- v0.7-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-
- v0.7-toxenv-{{ .Environment.CIRCLE_BRANCH }}-{{ .Environment.CIRCLE_JOB }}-
- v0.7-toxenv-master-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-{{ checksum "setup.py" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements_dev.txt" }}
- v0.7-toxenv-master-{{ .Environment.CIRCLE_JOB }}-{{ checksum "tox.ini" }}-
- v0.7-toxenv-master-{{ .Environment.CIRCLE_JOB }}-
save-test-results:
description: "Save test results"
steps:
- unless:
condition: << pipeline.parameters.manual >>
steps:
- store_test_results:
path: test-results
- store_artifacts:
path: test-results

workflows:
main:
unless: << pipeline.parameters.manual >>
jobs:
- test:
name: "Linters"
Expand Down Expand Up @@ -52,13 +101,34 @@ workflows:
toxenv: "py39"
- win:
name: "Windows (Python 3.7)"
toxenv: "py37"
- mac:
name: "MacOS (Python 3.7)"
toxenv: "py37"
- final:
name: "Upload Coverage"
requires:
- "Python 2.7"
- "Python 3.6"
manual_test:
when: << pipeline.parameters.manual_test >>
jobs:
- test:
name: << pipeline.parameters.manual_test_name >>
image: << pipeline.parameters.manual_test_image >>
toxenv: << pipeline.parameters.manual_test_toxenv >>
manual_win:
when: << pipeline.parameters.manual_win >>
jobs:
- win:
name: << pipeline.parameters.manual_win_name >>
toxenv: << pipeline.parameters.manual_win_toxenv >>
manual_mac:
when: << pipeline.parameters.manual_mac >>
jobs:
- mac:
name: << pipeline.parameters.manual_mac_name >>
toxenv: << pipeline.parameters.manual_mac_toxenv >>

jobs:
test:
Expand Down Expand Up @@ -86,7 +156,11 @@ jobs:
tox -vv -e << parameters.toxenv >>
no_output_timeout: 10m
- save-tox-cache
- save-test-results
win:
parameters:
toxenv:
type: string
executor: win/default
steps:
- checkout
Expand All @@ -104,11 +178,14 @@ jobs:
name: Run tests
shell: bash.exe
command: |
tox -vv -e py37
tox -vv -e << parameters.toxenv >>
no_output_timeout: 10m
- save-tox-cache

- save-test-results
mac:
parameters:
toxenv:
type: string
macos:
xcode: 11.4.1
steps:
Expand All @@ -123,9 +200,10 @@ jobs:
# Tests failed with Too many open files, so added ulimit
command: |
ulimit -n 1024
python3 -m tox -vv -e py37
python3 -m tox -vv -e << parameters.toxenv >>
no_output_timeout: 10m
- save-tox-cache
- save-test-results
final:
docker:
- image: python:3.7
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ tests/logs/*
!tests/logs/cleanup.sh
pip-wheel-metadata
.vscode/.ropeproject
xcuserdata/
xcuserdata/
test-results/
27 changes: 22 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,17 @@ def reset_ctx():
server.reset_ctx = reset_ctx

started = False
for i in range(5):
for i in range(10):
try:
res = requests.get("%s/ctx" % server.base_url, timeout=1)
res = requests.get("%s/ctx" % server.base_url, timeout=5)
if res.status_code == 200:
started = True
break
print("Attempting to connect but got: %s" % res)
except requests.exceptions.RequestException:
print("Timed out waiting for server to start...")
print(
"Timed out waiting for server to start...", server.base_url, time.time()
)
if server.poll() is None:
time.sleep(1)
else:
Expand All @@ -108,6 +110,14 @@ def reset_ctx():
else:
server.terminate()
print("Server failed to launch, see tests/logs/live_mock_server.log")
try:
print("=" * 40)
with open("tests/logs/live_mock_server.log") as f:
for l in f.readlines():
print(l.strip())
print("=" * 40)
except Exception as e:
print("EXCEPTION:", e)
raise ValueError("Failed to start server! Exit code %s" % server.returncode)
return server

Expand All @@ -117,10 +127,17 @@ def reset_ctx():


@pytest.fixture
def test_dir(request):
def test_name(request):
# change "test[1]" to "test__1__"
name = urllib.parse.quote(request.node.name.replace("[", "__").replace("]", "__"))
return name


@pytest.fixture
def test_dir(test_name):
orig_dir = os.getcwd()
root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
test_dir = os.path.join(root, "tests", "logs", request.node.name)
test_dir = os.path.join(root, "tests", "logs", test_name)
if os.path.exists(test_dir):
shutil.rmtree(test_dir)
mkdir_exists_ok(test_dir)
Expand Down
1 change: 1 addition & 0 deletions tests/integrations/test_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def conv3x3(in_channels, out_channels, **kwargs):


def test_all_logging(wandb_init_run):
# TODO(jhr): does not work with --flake-finder
net = ConvNet()
wandb.watch(net, log="all", log_freq=1)
for i in range(3):
Expand Down
1 change: 1 addition & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ def test_local_already_running(runner, docker, local_settings):
reason="The patch in mock_server.py doesn't work in windows",
)
def test_restore_no_remote(runner, mock_server, git_repo, docker, monkeypatch):
# TODO(jhr): does not work with --flake-finder
with open("patch.txt", "w") as f:
f.write("test")
git_repo.repo.index.add(["patch.txt"])
Expand Down
10 changes: 9 additions & 1 deletion tests/test_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,15 @@ def stop_backend(
mocked_run, hm, sm, sender, start_handle_thread, start_send_thread,
):
def stop_backend_func():
sender.communicate_exit(0, timeout=5)
sender.publish_exit(0)
for _ in range(10):
ret = sender.communicate_poll_exit()
assert ret, "poll exit timedout"
done = ret.response.poll_exit_response.done
if done:
break
time.sleep(1)
assert done, "backend didnt shutdown"

yield stop_backend_func

Expand Down
9 changes: 5 additions & 4 deletions tests/wandb_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_resume_allow_success(live_mock_server, test_settings):
platform.system() == "Windows", reason="File syncing is somewhat busted in windows"
)
# TODO: Sometimes wandb-summary.json didn't exists, other times requirements.txt in windows
def test_parallel_runs(live_mock_server, test_settings):
def test_parallel_runs(request, live_mock_server, test_settings, test_name):
with open("train.py", "w") as f:
f.write(fixture_open("train.py").read())
p1 = subprocess.Popen(["python", "train.py"], env=os.environ)
Expand All @@ -70,12 +70,13 @@ def test_parallel_runs(live_mock_server, test_settings):
num_runs = 0
# Assert we've stored 2 runs worth of files
# TODO: not confirming output.log because it is missing sometimes likely due to a BUG
# TODO: code saving sometimes doesnt work?
files_sorted = sorted(
[
"wandb-metadata.json",
"code/tests/logs/test_parallel_runs/train.py",
"requirements.txt",
"config.yaml",
"code/tests/logs/{}/train.py".format(test_name),
"requirements.txt",
"wandb-metadata.json",
"wandb-summary.json",
]
)
Expand Down
1 change: 1 addition & 0 deletions tests/wandb_tensorflow_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def spy_cb(row, step=None):
reason="TF has sketchy support for py2. TODO: Windows is legitimately busted",
)
def test_compat_tensorboard(live_mock_server, test_settings):
# TODO(jhr): does not work with --flake-finder
# TODO: we currently don't unpatch tensorflow so this is the only test that can do it...
wandb.init(sync_tensorboard=True, settings=test_settings)

Expand Down

0 comments on commit bbdeea6

Please sign in to comment.