Reduce replicator.retries_per_request value from 10 to 5

Previously an individual failed request would be tried 10 times in a row with an exponential backoff starting at 0.25 seconds. So the intervals in seconds would be: `0.25, 0.5, 1, 2, 4, 8, 16, 32, 64, 128` For a total of about 250 seconds (or about 4 minutes). This made sense before the scheduling replicator because if a replication job had crashed in the startup phase enough times it would not be retried anymore. With a scheduling replicator, it makes more sense to stop the whole task, and let the scheduling replicatgor retry later. `retries_per_request` then becomes something used mainly for short intermettent network issues. The new retry schedule is `0.25, 0.5, 1, 2, 4` Or about 8 seconds. An additional benefit when the job is stopped quicker, the user can find out about the problem sooner from the _scheduler/docs and _scheduler/jobs status endpoints and can rectify the problem. Otherwise a single request retrying for 4 minutes would be indicated there as the job is healthy and running. Fixes apache#810
willholley · May 22, 2018 · 0a5aae3 · 0a5aae3
1 parent 2812ef7
commit 0a5aae3
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 3 deletions.
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
@@ -370,7 +370,7 @@ connection_timeout = 30000
 ; Request timeout
 ;request_timeout = infinity
 ; If a request fails, the replicator will retry it up to N times.
-retries_per_request = 10
+retries_per_request = 5
 ; Use checkpoints
 ;use_checkpoints = true
 ; Checkpoint interval

diff --git a/src/couch_replicator/src/couch_replicator_api_wrap.hrl b/src/couch_replicator/src/couch_replicator_api_wrap.hrl
@@ -21,7 +21,7 @@
     ],
     timeout,            % milliseconds
     ibrowse_options = [],
-    retries = 10,
+    retries = 5,
     wait = 250,         % milliseconds
     httpc_pool = nil,
     http_connections,

diff --git a/src/couch_replicator/src/couch_replicator_docs.erl b/src/couch_replicator/src/couch_replicator_docs.erl
@@ -466,7 +466,7 @@ make_options(Props) ->
     DefBatchSize = config:get("replicator", "worker_batch_size", "500"),
     DefConns = config:get("replicator", "http_connections", "20"),
     DefTimeout = config:get("replicator", "connection_timeout", "30000"),
-    DefRetries = config:get("replicator", "retries_per_request", "10"),
+    DefRetries = config:get("replicator", "retries_per_request", "5"),
     UseCheckpoints = config:get("replicator", "use_checkpoints", "true"),
     DefCheckpointInterval = config:get("replicator", "checkpoint_interval",
         "30000"),