Skip to content

Commit

Permalink
Retry requests on HTTP 503 and 504
Browse files Browse the repository at this point in the history
PR ostreedev#1594 added logic to retry downloads when they fail due to network
errors, or due to HTTP 408 Request Timeout.

In practice, pulling from Flathub frequently fails with 503 Service
Unavailable. This is a transient error, so it makes sense to retry when
it's encountered.

There is no good code in GIOErrorEnum for this so this patch takes the
lazy route of mapping it to G_IO_ERROR_TIMED_OUT, which is already
treated as a transient error in _ostree_fetcher_should_retry_request().

While we're here, also map 504 Gateway Timeout to G_IO_ERROR_TIMED_OUT
(which does seem correct, if lossy).
  • Loading branch information
wjt committed May 1, 2019
1 parent 3ca1035 commit aac4364
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/libostree/ostree-fetcher-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ _ostree_fetcher_http_status_code_to_io_error (guint status_code)
case 410: /* SOUP_STATUS_GONE */
return G_IO_ERROR_NOT_FOUND;
case 408: /* SOUP_STATUS_REQUEST_TIMEOUT */
case 503: /* SOUP_STATUS_SERVICE_UNAVAILABLE */
case 504: /* SOUP_STATUS_GATEWAY_TIMEOUT */
return G_IO_ERROR_TIMED_OUT;
default:
return G_IO_ERROR_FAILED;
Expand Down
13 changes: 13 additions & 0 deletions src/ostree/ostree-trivial-httpd.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ static int opt_random_500s_percentage;
static int opt_random_500s_max = 100;
static int opt_random_408s_percentage;
static int opt_random_408s_max = 100;
static int opt_random_503s_percentage;
static int opt_random_503s_max = 100;
static gint opt_port = 0;
static gchar **opt_expected_cookies;
static gchar **opt_expected_headers;

static guint emitted_random_500s_count = 0;
static guint emitted_random_408s_count = 0;
static guint emitted_random_503s_count = 0;

typedef struct {
int root_dfd;
Expand All @@ -73,6 +76,8 @@ static GOptionEntry options[] = {
{ "force-range-requests", 0, 0, G_OPTION_ARG_NONE, &opt_force_ranges, "Force range requests by only serving half of files", NULL },
{ "random-500s", 0, 0, G_OPTION_ARG_INT, &opt_random_500s_percentage, "Generate random HTTP 500 errors approximately for PERCENTAGE requests", "PERCENTAGE" },
{ "random-500s-max", 0, 0, G_OPTION_ARG_INT, &opt_random_500s_max, "Limit HTTP 500 errors to MAX (default 100)", "MAX" },
{ "random-503s", 0, 0, G_OPTION_ARG_INT, &opt_random_503s_percentage, "Generate random HTTP 503 errors approximately for PERCENTAGE requests", "PERCENTAGE" },
{ "random-503s-max", 0, 0, G_OPTION_ARG_INT, &opt_random_503s_max, "Limit HTTP 503 errors to MAX (default 100)", "MAX" },
{ "random-408s", 0, 0, G_OPTION_ARG_INT, &opt_random_408s_percentage, "Generate random HTTP 408 errors approximately for PERCENTAGE requests", "PERCENTAGE" },
{ "random-408s-max", 0, 0, G_OPTION_ARG_INT, &opt_random_408s_max, "Limit HTTP 408 errors to MAX (default 100)", "MAX" },
{ "log-file", 0, 0, G_OPTION_ARG_FILENAME, &opt_log, "Put logs here (use - for stdout)", "PATH" },
Expand Down Expand Up @@ -304,6 +309,14 @@ do_get (OtTrivialHttpd *self,
soup_message_set_status (msg, SOUP_STATUS_REQUEST_TIMEOUT);
goto out;
}
else if (opt_random_503s_percentage > 0 &&
emitted_random_503s_count < opt_random_503s_max &&
g_random_int_range (0, 100) < opt_random_503s_percentage)
{
emitted_random_503s_count++;
soup_message_set_status (msg, SOUP_STATUS_SERVICE_UNAVAILABLE);
goto out;
}

while (path[0] == '/')
path++;
Expand Down
54 changes: 53 additions & 1 deletion tests/test-pull-repeated.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ set -euo pipefail

. $(dirname $0)/libtest.sh

echo "1..4"
echo "1..7"

COMMIT_SIGN="--gpg-homedir=${TEST_GPG_KEYHOME} --gpg-sign=${TEST_GPG_KEYID_1}"

Expand Down Expand Up @@ -98,3 +98,55 @@ ${CMD_PREFIX} ostree --repo=repo rev-parse main

popd
echo "ok big number of retries with one 408"

# Sanity check with no network retries and 503s given, pull should fail.
rm ostree-srv httpd repo -rf
setup_fake_remote_repo1 "archive" "${COMMIT_SIGN}" --random-503s=99

pushd ${test_tmpdir}
ostree_repo_init repo --mode=archive
${CMD_PREFIX} ostree --repo=repo remote add --set=gpg-verify=false origin $(cat httpd-address)/ostree/gnomerepo
assert_fail ${CMD_PREFIX} ostree --repo=repo pull --mirror origin --network-retries=0 main 2>err.txt
assert_file_has_content err.txt "\(503.*Service Unavailable\)\|\(HTTP 503\)"

popd
echo "ok no retries after a 503"

# Test pulling a repo which gives error 503 (service unavailable) a lot of the time.
rm ostree-srv httpd repo -rf
setup_fake_remote_repo1 "archive" "${COMMIT_SIGN}" --random-503s=50

pushd ${test_tmpdir}
ostree_repo_init repo --mode=archive
${CMD_PREFIX} ostree --repo=repo remote add --set=gpg-verify=false origin $(cat httpd-address)/ostree/gnomerepo
for x in $(seq 40); do
if ${CMD_PREFIX} ostree --repo=repo pull --mirror origin --network-retries=2 main 2>err.txt; then
echo "Success on iteration ${x}"
break;
fi
assert_file_has_content err.txt "\(503.*Service Unavailable\)\|\(HTTP 503\)"
done

${CMD_PREFIX} ostree --repo=repo fsck
${CMD_PREFIX} ostree --repo=repo rev-parse main

popd
echo "ok repeated pull after 503s"

# Test pulling a repo that gives 503s a lot of the time, with many network retries.
rm ostree-srv httpd repo -rf
setup_fake_remote_repo1 "archive" "${COMMIT_SIGN}" --random-503s=50

pushd ${test_tmpdir}
ostree_repo_init repo --mode=archive
${CMD_PREFIX} ostree --repo=repo remote add --set=gpg-verify=false origin $(cat httpd-address)/ostree/gnomerepo

# Using 8 network retries gives error rate of <0.5%, when --random-503s=50
${CMD_PREFIX} ostree --repo=repo pull --mirror origin --network-retries=8 main
echo "Success with big number of network retries"

${CMD_PREFIX} ostree --repo=repo fsck
${CMD_PREFIX} ostree --repo=repo rev-parse main

popd
echo "ok big number of retries with one 503"

0 comments on commit aac4364

Please sign in to comment.