Skip to content

Commit

Permalink
Add timeout support to ansible-test. (ansible#53302)
Browse files Browse the repository at this point in the history
* Add timeout support to ansible-test.
* Fix ansible-test tar filename filter bug.
* Update timeouts used on Shippable.
* Kill subprocesses when parent process terminates.
* Require explicit use of env --show option.
  • Loading branch information
mattclay committed Mar 5, 2019
1 parent 44b347a commit a8d829d
Show file tree
Hide file tree
Showing 9 changed files with 219 additions and 21 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,4 @@ test/units/.coverage.*
/test/integration/cloud-config-azure.yml
/SYMLINK_CACHE.json
changelogs/.plugin-cache.yaml
.ansible-test-timeout.json
7 changes: 7 additions & 0 deletions test/runner/lib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from lib.env import (
EnvConfig,
command_env,
configure_timeout,
)

from lib.sanity import (
Expand Down Expand Up @@ -95,6 +96,7 @@ def main():
display.color = config.color
display.info_stderr = (isinstance(config, SanityConfig) and config.lint) or (isinstance(config, IntegrationConfig) and config.list_targets)
check_startup()
configure_timeout(config)

display.info('RLIMIT_NOFILE: %s' % (CURRENT_RLIMIT_NOFILE,), verbosity=2)
display.info('MAXFD: %d' % MAXFD, verbosity=2)
Expand Down Expand Up @@ -509,6 +511,11 @@ def parse_args():
action='store_true',
help='dump environment to disk')

env.add_argument('--timeout',
type=int,
metavar='MINUTES',
help='timeout for future ansible-test commands (0 clears)')

if argcomplete:
argcomplete.autocomplete(parser, always_complete_options=False, validator=lambda i, k: True)

Expand Down
3 changes: 3 additions & 0 deletions test/runner/lib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
# Setting a low soft RLIMIT_NOFILE value will improve the performance of subprocess.Popen on Python 2.x when close_fds=True.
# This will affect all Python subprocesses. It will also affect the current Python process if set before subprocess is imported for the first time.
SOFT_RLIMIT_NOFILE = 1024

# File used to track the ansible-test test execution timeout.
TIMEOUT_PATH = '.ansible-test-timeout.json'
129 changes: 128 additions & 1 deletion test/runner/lib/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@

import datetime
import json
import functools
import os
import platform
import re
import signal
import sys
import time

from lib.config import (
CommonConfig,
TestConfig,
)

from lib.util import (
Expand All @@ -34,6 +38,18 @@
docker_version
)

from lib.thread import (
WrappedThread,
)

from lib.constants import (
TIMEOUT_PATH,
)

from lib.test import (
TestTimeout,
)


class EnvConfig(CommonConfig):
"""Configuration for the tools command."""
Expand All @@ -43,14 +59,26 @@ def __init__(self, args):
"""
super(EnvConfig, self).__init__(args, 'env')

self.show = args.show or not args.dump
self.show = args.show
self.dump = args.dump
self.timeout = args.timeout


def command_env(args):
"""
:type args: EnvConfig
"""
show_dump_env(args)
set_timeout(args)


def show_dump_env(args):
"""
:type args: EnvConfig
"""
if not args.show and not args.dump:
return

data = dict(
ansible=dict(
version=get_ansible_version(args),
Expand Down Expand Up @@ -84,6 +112,105 @@ def command_env(args):
results_fd.write(json.dumps(data, sort_keys=True))


def set_timeout(args):
"""
:type args: EnvConfig
"""
if args.timeout is None:
return

if args.timeout:
deadline = (datetime.datetime.utcnow() + datetime.timedelta(minutes=args.timeout)).strftime('%Y-%m-%dT%H:%M:%SZ')

display.info('Setting a %d minute test timeout which will end at: %s' % (args.timeout, deadline), verbosity=1)
else:
deadline = None

display.info('Clearing existing test timeout.', verbosity=1)

if args.explain:
return

if deadline:
data = dict(
duration=args.timeout,
deadline=deadline,
)

with open(TIMEOUT_PATH, 'w') as timeout_fd:
json.dump(data, timeout_fd, indent=4, sort_keys=True)
elif os.path.exists(TIMEOUT_PATH):
os.remove(TIMEOUT_PATH)


def get_timeout():
"""
:rtype: dict[str, any] | None
"""
if not os.path.exists(TIMEOUT_PATH):
return None

with open(TIMEOUT_PATH, 'r') as timeout_fd:
data = json.load(timeout_fd)

data['deadline'] = datetime.datetime.strptime(data['deadline'], '%Y-%m-%dT%H:%M:%SZ')

return data


def configure_timeout(args):
"""
:type args: CommonConfig
"""
if isinstance(args, TestConfig):
configure_test_timeout(args) # only tests are subject to the timeout


def configure_test_timeout(args):
"""
:type args: TestConfig
"""
timeout = get_timeout()

if not timeout:
return

timeout_start = datetime.datetime.utcnow()
timeout_duration = timeout['duration']
timeout_deadline = timeout['deadline']
timeout_remaining = timeout_deadline - timeout_start

test_timeout = TestTimeout(timeout_duration)

if timeout_remaining <= datetime.timedelta():
test_timeout.write(args)

raise ApplicationError('The %d minute test timeout expired %s ago at %s.' % (
timeout_duration, timeout_remaining * -1, timeout_deadline))

display.info('The %d minute test timeout expires in %s at %s.' % (
timeout_duration, timeout_remaining, timeout_deadline), verbosity=1)

def timeout_handler(_dummy1, _dummy2):
"""Runs when SIGUSR1 is received."""
test_timeout.write(args)

raise ApplicationError('Tests aborted after exceeding the %d minute time limit.' % timeout_duration)

def timeout_waiter(timeout_seconds):
"""
:type timeout_seconds: int
"""
time.sleep(timeout_seconds)
os.kill(os.getpid(), signal.SIGUSR1)

signal.signal(signal.SIGUSR1, timeout_handler)

instance = WrappedThread(functools.partial(timeout_waiter, timeout_remaining.seconds))
instance.daemon = True
instance.start()


def show_dict(data, verbose, root_verbosity=0, path=None):
"""
:type data: dict[str, any]
Expand Down
9 changes: 7 additions & 2 deletions test/runner/lib/pytar.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
ABC,
)

from lib.constants import (
TIMEOUT_PATH,
)

# improve performance by disabling uid/gid lookups
tarfile.pwd = None
tarfile.grp = None
Expand Down Expand Up @@ -45,6 +49,7 @@ def __init__(self):
self.ignore_files = (
'.gitignore',
'.gitdir',
TIMEOUT_PATH,
)

self.ignore_extensions = (
Expand All @@ -58,7 +63,7 @@ def ignore(self, item):
:rtype: tarfile.TarInfo | None
"""
filename = os.path.basename(item.path)
name, ext = os.path.splitext(filename)
ext = os.path.splitext(filename)[1]
dirs = os.path.split(item.path)

if not item.isdir():
Expand All @@ -68,7 +73,7 @@ def ignore(self, item):
if item.path.startswith('./docs/docsite/_build/'):
return None

if name in self.ignore_files:
if filename in self.ignore_files:
return None

if ext in self.ignore_extensions:
Expand Down
38 changes: 36 additions & 2 deletions test/runner/lib/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,17 @@ def calculate_confidence(path, line, metadata):

class TestResult(object):
"""Base class for test results."""
def __init__(self, command, test, python_version=None):
def __init__(self, command, test, python_version=None, enable_junit=False):
"""
:type command: str
:type test: str
:type python_version: str
:type enable_junit: bool
"""
self.command = command
self.test = test
self.python_version = python_version
self.enable_junit = enable_junit
self.name = self.test or self.command

if self.python_version:
Expand All @@ -88,7 +90,7 @@ def write(self, args):
if args.lint:
self.write_lint()

if args.junit:
if args.junit or self.enable_junit:
if self.junit:
self.write_junit(args)
else:
Expand Down Expand Up @@ -159,6 +161,38 @@ def save_junit(self, args, test_case, properties=None):
xml.write(report.encode('utf-8', 'strict'))


class TestTimeout(TestResult):
"""Test timeout."""
def __init__(self, timeout_duration):
"""
:type timeout_duration: int
"""
super(TestTimeout, self).__init__(command='timeout', test='', enable_junit=True)

self.timeout_duration = timeout_duration

def write_junit(self, args):
"""
:type args: TestConfig
"""
message = 'Tests were aborted after exceeding the %d minute time limit.' % self.timeout_duration
output = '''One or more of the following situations may be responsible:
- Code changes have resulted in tests that hang or run for an excessive amount of time.
- Tests have been added which exceed the time limit when combined with existing tests.
- Test infrastructure and/or external dependencies are operating slower than normal.'''

if args.coverage:
output += '\n- Additional overhead from collecting code coverage has resulted in tests exceeding the time limit.'

output += '\n\nConsult the console log for additional details on where the timeout occurred.'

test_case = self.junit.TestCase(classname=self.command, name=self.name)
test_case.add_error_info(message, output)

self.save_junit(args, test_case)


class TestSuccess(TestResult):
"""Test success."""
def write_junit(self, args):
Expand Down
37 changes: 22 additions & 15 deletions test/runner/lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,23 +380,30 @@ def raw_command(cmd, capture=False, env=None, data=None, cwd=None, explain=False
stderr = None

start = time.time()
process = None

try:
process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
except OSError as ex:
if ex.errno == errno.ENOENT:
raise ApplicationError('Required program "%s" not found.' % cmd[0])
raise

if communicate:
encoding = 'utf-8'
data_bytes = data.encode(encoding, 'surrogateescape') if data else None
stdout_bytes, stderr_bytes = process.communicate(data_bytes)
stdout_text = stdout_bytes.decode(encoding, str_errors) if stdout_bytes else u''
stderr_text = stderr_bytes.decode(encoding, str_errors) if stderr_bytes else u''
else:
process.wait()
stdout_text, stderr_text = None, None
try:
process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
except OSError as ex:
if ex.errno == errno.ENOENT:
raise ApplicationError('Required program "%s" not found.' % cmd[0])
raise

if communicate:
encoding = 'utf-8'
data_bytes = data.encode(encoding, 'surrogateescape') if data else None
stdout_bytes, stderr_bytes = process.communicate(data_bytes)
stdout_text = stdout_bytes.decode(encoding, str_errors) if stdout_bytes else u''
stderr_text = stderr_bytes.decode(encoding, str_errors) if stderr_bytes else u''
else:
process.wait()
stdout_text, stderr_text = None, None
finally:
if process and process.returncode is None:
process.kill()
display.info('') # the process we're interrupting may have completed a partial line of output
display.notice('Killed command to avoid an orphaned child process during handling of an unexpected exception.')

status = process.returncode
runtime = time.time() - start
Expand Down
8 changes: 7 additions & 1 deletion test/utils/shippable/shippable.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ function cleanup

trap cleanup EXIT

ansible-test env --dump --show --color -v
if [[ "${COVERAGE:-}" ]]; then
timeout=60
else
timeout=45
fi

ansible-test env --dump --show --timeout "${timeout}" --color -v

"test/utils/shippable/${script}.sh" "${test}"
8 changes: 8 additions & 0 deletions test/utils/shippable/units.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,13 @@ IFS='/:' read -ra args <<< "$1"

version="${args[1]}"

if [[ "${COVERAGE:-}" ]]; then
timeout=90
else
timeout=10
fi

ansible-test env --timeout "${timeout}" --color -v

# shellcheck disable=SC2086
ansible-test units --color -v --docker default --python "${version}" ${COVERAGE:+"$COVERAGE"} ${CHANGED:+"$CHANGED"} \

0 comments on commit a8d829d

Please sign in to comment.