<a href="https://colab.research.google.com/github/yassinehammoud/R-Bridge-Tutorial-Notebooks/blob/master/Maxar_download_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Step 1: Set Up the Colab Environment

# Install necessary packages
!sudo apt-get update
!sudo apt-get install -y gdal-bin jq libtiff-tools python3-virtualenv
!pip install rich shapely typer duckdb

# Download and configure DuckDB
!wget -c https://github.com/duckdb/duckdb/releases/download/v0.9.1/duckdb_cli-linux-amd64.zip
!unzip -j duckdb_cli-linux-amd64.zip
!chmod +x duckdb
!./duckdb -c "INSTALL json; INSTALL spatial;"

# Create DuckDB config file
!echo -e ".timer on\n.width 180\nLOAD json;\nLOAD spatial;" > ~/.duckdbrc

# Clone the Maxar open data repository
!git clone https://github.com/opengeos/maxar-open-data/
!mkdir -p maxar-open-data/downloads

In [None]:
# @title Step 2: Download Script

%%writefile download.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from   glob     import glob
import json
from   os       import makedirs, path
from   random   import shuffle
import requests

from   rich.progress import track
import typer


app = typer.Typer(rich_markup_mode='rich')


@app.command()
def download(timeout: int = 60):
    geojsons = [x
                for x in glob('maxar-open-data/datasets/*.geojson')
                if 'union' not in x and 'Morocco-Earthquake-Sept-2023' in x]
    shuffle(geojsons)

    for geojson in geojsons:
        print(geojson)
        recs = json.loads(open(geojson, 'r').read())

        urls = [x['properties']['visual']
                for x in recs['features']]
        shuffle(urls)

        folder = 'maxar-open-data/downloads/' + geojson.split('/')[-1].split('.')[0]

        try:
            makedirs(folder)
        except FileExistsError:
            pass

        for url in track(urls, description='Downloading..'):
            date = url.split('/')[-2]
            assert len(date) == len('YYYY-MM-DD')
            out_filename = 'ard' + url.split('/ard')[-1].replace('/', '_')
            output_path = '%s/%s/%s' % (folder, date, out_filename)

            try:
                makedirs(folder + '/' + date)
            except FileExistsError:
                pass

            if path.isfile(output_path):
                continue

            print(out_filename)

            try:
                r = requests.get(url, timeout=timeout)
            except Exception as exc:
                print(exc)
                continue

            with open(output_path, 'wb') as f:
                f.write(r.content)


if __name__ == "__main__":
    app()


In [None]:
# @title Step 3: Run the Download Script

!python download.py