Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

retry failed elevation tile download #4461

Merged
merged 5 commits into from
Dec 18, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 71 additions & 34 deletions scripts/valhalla_build_elevation
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ from multiprocessing.dummy import Pool as ThreadPool
import os
from pathlib import Path
import sys
from time import sleep
from typing import List, Iterable, Set
from urllib import request
from urllib.error import URLError
from urllib.error import URLError, HTTPError

# hack so ArgumentParser can accept negative numbers
# see https://github.com/valhalla/valhalla/issues/3426
Expand Down Expand Up @@ -53,6 +54,12 @@ class TileCompression(Enum):
return ".lz4"


class DownloadStatus(Enum):
OK = 1
FAILED = 2
CORRUPTED = 3


parser = argparse.ArgumentParser(description=description)
method = parser.add_mutually_exclusive_group()
method.add_argument(
Expand Down Expand Up @@ -225,7 +232,7 @@ def get_tiles_with_graph(graph_dir: Path) -> Set[Tile]:
return tile_infos


def download(tile: Tile, output_dir, compression: TileCompression):
def download(tile: Tile, output_dir, compression: TileCompression) -> bool:
dest_directory = Path(output_dir, tile.dir)
dest_directory.mkdir(parents=True, exist_ok=True)

Expand All @@ -237,39 +244,69 @@ def download(tile: Tile, output_dir, compression: TileCompression):
url = f"https://elevation-tiles-prod.s3.us-east-1.amazonaws.com/skadi/{tile.dir}/{tile.name}.gz"

LOGGER.info(f"Downloading tile {tile.name}")
try:
with request.urlopen(url) as res, open(filepath, "wb") as f:
if compression is TileCompression.GZIP:
f.write(res.read())
else:
with gzip.GzipFile(fileobj=res, mode="rb") as gz:
uncompressed = gz.read()
if compression is TileCompression.UNCOMPRESSED:
f.write(uncompressed)
elif compression is TileCompression.LZ4:
# Compression level 6 was chosen after some benchmarking as the approx efficient frontier
# between compression time and space savings (decompression time is roughly constant regardless
# of level). The end result is larger than the maximally gzipped tiles from AWS, but only
# by around 12%.
import lz4.frame

with lz4.frame.LZ4FrameCompressor(
block_size=lz4.frame.BLOCKSIZE_MAX4MB, compression_level=6
) as compressor:
# Optimization: we know the exact size of every uncompressed hgt file
f.write(compressor.begin(25934402))
f.write(compressor.compress(uncompressed))
f.write(compressor.flush())

LOGGER.debug(f"Successfully downloaded tile {tile.name}")

return True
except URLError as e:
LOGGER.critical(f"Download failed of elevation tile {tile.dir}/{tile.name}: {e.reason}")

download_status = DownloadStatus.FAILED
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

most of the diff is just wrapping the previous logic into a for loop, where we request 15 times maximum with increased sleeps in between (exponential back off), up to 4.5 hours until we give up. could give much earlier though :D

# tries up to 4.3 hours with exponentially increasing sleeps in between
for i in range(15):
try:
LOGGER.debug(f"Downloading tile {tile.dir}/{tile.name} for the {i}th time.")
sleep((i**2) / 2 * 30)
with request.urlopen(url) as res, open(filepath, "wb") as f:

if compression is TileCompression.GZIP:
f.write(res.read())
else:
with gzip.GzipFile(fileobj=res, mode="rb") as gz:
try:
uncompressed = gz.read()
except Exception as e:
Comment on lines +261 to +263
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

protect for corrupt downloads

download_status = DownloadStatus.CORRUPTED
LOGGER.error(
f"Decompression error on tile {tile.dir}/{tile.name}: {e}. Likely a corrupted download."
)
continue
if compression is TileCompression.UNCOMPRESSED:
f.write(uncompressed)
elif compression is TileCompression.LZ4:
# Compression level 6 was chosen after some benchmarking as the approx efficient frontier
# between compression time and space savings (decompression time is roughly constant regardless
# of level). The end result is larger than the maximally gzipped tiles from AWS, but only
# by around 12%.
import lz4.frame

with lz4.frame.LZ4FrameCompressor(
block_size=lz4.frame.BLOCKSIZE_MAX4MB, compression_level=6
) as compressor:
# Optimization: we know the exact size of every uncompressed hgt file
f.write(compressor.begin(25934402))
f.write(compressor.compress(uncompressed))
f.write(compressor.flush())

download_status = DownloadStatus.OK
LOGGER.debug(f"Successfully downloaded tile {tile.dir}/{tile.name}")
except HTTPError as e:
LOGGER.error(f"Download failed with HTTP error {e.code}: {e.reason}.\nTrying again...")
continue
except URLError as e:
LOGGER.error(
f"Download failed of elevation tile {tile.dir}/{tile.name}: {e.reason}.\nTrying again.."
)
continue
except ImportError:
LOGGER.critical(
"Could not import lz4. Please install lz4 or use another compression format."
)
sys.exit(1)

if download_status == DownloadStatus.CORRUPTED:
LOGGER.error(f"Tile {tile.dir}/{tile.name} was corrupted, removing it...")
filepath.unlink()
Comment on lines +303 to +305
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it was corrupted in the end, let's remove that tile from the file system

return False
except ImportError:
LOGGER.critical("Could not import lz4. Please install lz4 or use another compression format.")
sys.exit(1)
elif download_status == DownloadStatus.FAILED:
LOGGER.error(f"Tile {tile.dir}/{tile.name} couldn't be downloaded...")
return False

return True


if __name__ == "__main__":
Expand Down
Loading