Skip to content

Commit

Permalink
Add retrying to Azure downloads (#249)
Browse files Browse the repository at this point in the history
* Add retrying to Azure downloads

* Add default timeouts to Faraday requests

* Stub rand() and not the whole method to avoid randomness

* These are specs, no reason to wait
  • Loading branch information
zwolf committed May 27, 2021
1 parent 33ea06b commit 9ac3207
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 3 deletions.
25 changes: 25 additions & 0 deletions app/lib/retryable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module Retryable
SLEEP_INTERVAL = 0.4

def with_retries(retries: 3, rescue_class: )
tries = 0

begin
yield
rescue *rescue_class => e
tries += 1
if tries <= retries
sleep sleep_interval(tries)
retry
else
raise
end
end
end

private

def sleep_interval(tries)
(SLEEP_INTERVAL + rand(0.0..1.0)) * tries
end
end
10 changes: 8 additions & 2 deletions app/services/data_exports/aggregate_metadata_file_generator.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
require 'csv'
require 'retryable'

module DataExports
# Helper class for aggregating metadata from individual transcriptions
# within a group/workflow/project into a single csv file
class AggregateMetadataFileGenerator
class << self
include Retryable
# Public: add metadata csv file to group folder
def generate_group_file(transcriptions, output_folder)
metadata_rows = compile_transcription_metadata(transcriptions)
Expand Down Expand Up @@ -42,8 +44,12 @@ def compile_transcription_metadata(transcriptions)
transcription.export_files.each do |storage_file|
is_transcription_metadata_file = metadata_file_regex.match storage_file.filename.to_s
if is_transcription_metadata_file
rows = CSV.parse(storage_file.download)

tmp_file = with_retries(
rescue_class: [Faraday::TimeoutError, Errno::ECONNREFUSED]
) do
storage_file.download
end
rows = CSV.parse(tmp_file)
# add header if it's the first transcription being added
metadata_rows << rows[0] if metadata_rows.empty?
# add content regardless
Expand Down
6 changes: 5 additions & 1 deletion app/services/data_exports/data_storage.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
require 'fileutils'
require 'securerandom'
require 'retryable'

module DataExports
class DataStorage
include Retryable

class NoStoredFilesFoundError < StandardError; end

Expand Down Expand Up @@ -91,7 +93,9 @@ def download_file_from_storage(storage_file, transcription_folder)
download_path = File.join(transcription_folder, storage_file.filename.to_s)
file = File.open(download_path, 'w')
begin
file.write(storage_file.download.force_encoding('UTF-8'))
with_retries(
rescue_class: [Faraday::TimeoutError, Errno::ECONNREFUSED]
) { file.write(storage_file.download.force_encoding('UTF-8')) }
rescue Encoding::UndefinedConversionError => exception
# build new exception with message including the problematic file
message = exception.message + ". Filename: #{storage_file.filename}, Blob path: #{storage_file.key}, Blob id: #{storage_file.blob_id}"
Expand Down
6 changes: 6 additions & 0 deletions config/initializers/faraday.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
require 'faraday'

# Sets a 5 second default timeout for read & write requests
# Faraday is used under the hood by ActiveStorage, and therefore Azure Blob Storage
Faraday.default_connection_options.request.open_timeout = 5
Faraday.default_connection_options.request.timeout = 5
39 changes: 39 additions & 0 deletions spec/lib/retryable_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
require 'rails_helper'
require 'retryable'

RSpec.describe Retryable do
class DummyClass
def process
with_retries(
rescue_class: [Faraday::TimeoutError, Errno::ECONNREFUSED]
) { self.call_me_maybe }
end

# Can't raise the exception directly or it won't retry
def call_me_maybe
return "success"
end
end

let(:dummy_class) { DummyClass.new }

before(:each) do
# No waiting for the interval, no retry timing randomness
stub_const("Retryable::SLEEP_INTERVAL", 0.0)
allow_any_instance_of(Object).to receive(:rand).and_return(0.0)
dummy_class.extend(Retryable)
end

it 'tries 4 times, then re-raises the error' do
expect(dummy_class).to receive(:call_me_maybe).exactly(4).times.and_raise(Faraday::TimeoutError)
expect {
expect(dummy_class.process)
}.to raise_error(Faraday::TimeoutError)
end

it 'retries once then succeeds' do
expect(dummy_class).to receive(:call_me_maybe).once.and_raise(Errno::ECONNREFUSED)
expect(dummy_class).to receive(:call_me_maybe).once.and_return("success")
expect(dummy_class.process).to eq("success")
end
end

0 comments on commit 9ac3207

Please sign in to comment.