# Search and download data from Digtal Africa

https://docs.digitalearthafrica.org/en/latest/data_specs/Landsat_WOfS_specs.html#

In [65]:
import sys, os, importlib
import rasterio, boto3

import pandas as pd
import geopandas as gpd

bucket = 'deafrica-services'
prefix = 'wofs_ls_summary_annual'
region = 'af-south-1'
s3client = boto3.client('s3', region_name='af-south-1', config=Config(signature_version=UNSIGNED))

In [128]:
# Define your input variables
iso3 = 'MWI'
year = 2020
product = '_frequency.tif' #ount_clear.tif, _count_wet.tif
out_folder = f"/home/wb411133/temp/{iso3}"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

In [104]:
#Read in extents of output tiles
in_extents = gpd.read_file("wofs_ls_summary_alltime-regions-deafrica-data.geojson")
in_extents['COL'] = in_extents['region_code'].apply(lambda x: int(x.split("_")[0]) + 181) # These additions transform numbers for downloads
in_extents['ROW'] = in_extents['region_code'].apply(lambda x: int(x.split("_")[1]) + 77)
in_extents['COL_ROW'] = in_extents.apply(lambda x: f"{x['COL']}_{x['ROW']}", axis=1)

#Read in country extent and select intersecting tiles
adm0 = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
sel_extent = adm0.loc[adm0['iso_a3'] == iso3]
sel_tiles = in_extents.loc[in_extents.intersects(sel_extent.unary_union)]

In [105]:
sel_tiles.head()

Unnamed: 0,region_code,label,count,geometry,COL,ROW,COL_ROW
2757,32_-13,32_-13,1,"POLYGON ((31.83872 -9.06690, 32.83368 -9.06690...",213,64,213_64
2762,32_-18,32_-18,1,"POLYGON ((31.83872 -12.89847, 32.83368 -12.898...",213,59,213_59
2763,32_-19,32_-19,1,"POLYGON ((31.83872 -13.67113, 32.83368 -13.671...",213,58,213_58
2839,33_-13,33_-13,1,"POLYGON ((32.83368 -9.06690, 33.82864 -9.06690...",214,64,214_64
2840,33_-14,33_-14,1,"POLYGON ((32.83368 -9.82949, 33.82864 -9.82949...",214,63,214_63


In [27]:
# Loop through the S3 bucket and get all the keys for files that are .tif 
more_results = True
try:
    del(token)
except:
    pass
loops = 0
good_res = []
while more_results:
    print(f"Completed loop: {loops}")
    if loops > 0:
        objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=token)
    else:
        objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix)
    more_results = objects['IsTruncated']
    if more_results:
        token = objects['NextContinuationToken']
    loops += 1
    for res in objects['Contents']:
        if res['Key'].endswith('.tif'):
            good_res.append(res)

Completed loop: 0
Completed loop: 1
Completed loop: 2
Completed loop: 3
Completed loop: 4
Completed loop: 5
Completed loop: 6
Completed loop: 7
Completed loop: 8
Completed loop: 9
Completed loop: 10
Completed loop: 11
Completed loop: 12
Completed loop: 13
Completed loop: 14
Completed loop: 15
Completed loop: 16
Completed loop: 17
Completed loop: 18
Completed loop: 19
Completed loop: 20
Completed loop: 21
Completed loop: 22
Completed loop: 23
Completed loop: 24
Completed loop: 25
Completed loop: 26
Completed loop: 27
Completed loop: 28
Completed loop: 29
Completed loop: 30
Completed loop: 31
Completed loop: 32
Completed loop: 33
Completed loop: 34
Completed loop: 35
Completed loop: 36
Completed loop: 37
Completed loop: 38
Completed loop: 39
Completed loop: 40
Completed loop: 41
Completed loop: 42
Completed loop: 43
Completed loop: 44
Completed loop: 45
Completed loop: 46
Completed loop: 47
Completed loop: 48
Completed loop: 49
Completed loop: 50
Completed loop: 51
Completed loop: 52
Com

Completed loop: 416
Completed loop: 417
Completed loop: 418
Completed loop: 419
Completed loop: 420
Completed loop: 421
Completed loop: 422
Completed loop: 423
Completed loop: 424
Completed loop: 425
Completed loop: 426
Completed loop: 427
Completed loop: 428
Completed loop: 429
Completed loop: 430
Completed loop: 431
Completed loop: 432
Completed loop: 433
Completed loop: 434
Completed loop: 435
Completed loop: 436
Completed loop: 437
Completed loop: 438
Completed loop: 439
Completed loop: 440
Completed loop: 441
Completed loop: 442
Completed loop: 443
Completed loop: 444
Completed loop: 445
Completed loop: 446
Completed loop: 447
Completed loop: 448
Completed loop: 449
Completed loop: 450
Completed loop: 451
Completed loop: 452
Completed loop: 453
Completed loop: 454
Completed loop: 455
Completed loop: 456
Completed loop: 457
Completed loop: 458
Completed loop: 459
Completed loop: 460
Completed loop: 461
Completed loop: 462
Completed loop: 463
Completed loop: 464
Completed loop: 465


In [118]:
# Convert the results to a data frame
s3res = pd.DataFrame(good_res)
s3res['COL']  = s3res['Key'].apply(lambda x: int(x.split("/")[2][1:]))
s3res['ROW']  = s3res['Key'].apply(lambda x: int(x.split("/")[3][1:]))
s3res['COL_ROW'] = s3res.apply(lambda x: f"{x['COL']}_{x['ROW']}", axis=1)
s3res['TYPE'] = s3res['Key'].apply(lambda x: x.split("/")[-1][-14:])
s3res['YEAR'] = s3res['Key'].apply(lambda x: x.split("/")[4][:4])

s3res.head()

Unnamed: 0,Key,LastModified,ETag,Size,StorageClass,COL,ROW,COL_ROW,TYPE,YEAR
0,wofs_ls_summary_annual/1-0-0/x154/y096/2013--P...,2021-09-08 02:33:16+00:00,"""d32ed33cb46effee73892e43e5da2e98""",390002,STANDARD,154,96,154_96,ount_clear.tif,2013
1,wofs_ls_summary_annual/1-0-0/x154/y096/2013--P...,2021-09-08 02:33:16+00:00,"""e5eeea7c323a984c4bb159b1cb8ea3c1""",312745,STANDARD,154,96,154_96,_count_wet.tif,2013
2,wofs_ls_summary_annual/1-0-0/x154/y096/2013--P...,2021-09-08 02:33:18+00:00,"""a9ba372ec73d855e2783f285a0935834""",983081,STANDARD,154,96,154_96,_frequency.tif,2013
3,wofs_ls_summary_annual/1-0-0/x154/y096/2014--P...,2021-09-08 02:35:41+00:00,"""c5feac7a5edc8a9d8d1ff67fae83ab36""",485506,STANDARD,154,96,154_96,ount_clear.tif,2014
4,wofs_ls_summary_annual/1-0-0/x154/y096/2014--P...,2021-09-08 02:35:40+00:00,"""84adbc9a2c8ede3d1addd8b87a7d5ff5""",411290,STANDARD,154,96,154_96,_count_wet.tif,2014


In [130]:
# Limit s3 results to selected area
cur_s3 = s3res.loc[s3res['COL_ROW'].isin(sel_tiles['COL_ROW'])]
cur_s3 = cur_s3.loc[(cur_s3['YEAR'] == str(year)) & (cur_s3['TYPE'] == product)]

In [131]:
# Download the results
loop = 0
for idx, row in cur_s3.iterrows():
    out_file = os.path.join(out_folder, os.path.basename(row['Key']))
    s3client.download_file(bucket, row['Key'], out_file)
    loop += 1
    print(f'{loop} of {cur_s3.shape}')

1 of (27, 10)
2 of (27, 10)
3 of (27, 10)
4 of (27, 10)
5 of (27, 10)
6 of (27, 10)
7 of (27, 10)
8 of (27, 10)
9 of (27, 10)
10 of (27, 10)
11 of (27, 10)
12 of (27, 10)
13 of (27, 10)
14 of (27, 10)
15 of (27, 10)
16 of (27, 10)
17 of (27, 10)
18 of (27, 10)
19 of (27, 10)
20 of (27, 10)
21 of (27, 10)
22 of (27, 10)
23 of (27, 10)
24 of (27, 10)
25 of (27, 10)
26 of (27, 10)
27 of (27, 10)
