Skip to content

Commit

Permalink
feat(data-pipeline): new datasets (#337)
Browse files Browse the repository at this point in the history
* feat(datasets): add new sets

* feat(data-pipeline): add new (reprojected) datasets
  • Loading branch information
pwambach committed Apr 27, 2020
1 parent 9fa16a9 commit 12d33ab
Show file tree
Hide file tree
Showing 44 changed files with 852 additions and 241 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ dist-electron
dist-src-electron
.vscode
storage/**/*.zip
download
*.ipynb
46 changes: 46 additions & 0 deletions ci/cloudbuild-tiles-reproject.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
steps:
- name: gcr.io/cloud-builders/gsutil
id: 'download-netcdfs'
entrypoint: '/bin/bash'
args:
- "-c"
- "mkdir /data/netcdfs && gsutil -m cp -r gs://esa-cfs-cate-data/${_LAYER_ID}/*.nc /data/netcdfs/"

- name: gcr.io/esa-climate-from-space/cate:latest
id: 'cate-export-data-cube'
entrypoint: '/bin/bash'
args:
- "-c"
- "conda run -n cate-env python data/write-zarr.py --layer ${_LAYER_ID} --variable ${_VARIABLE_ID} --zoom-levels ${_ZOOM_LEVELS} --min ${_MIN} --max ${_MAX}"

- name: geographica/gdal2:2.4.0
id: 'gdal-generate-tiles'
entrypoint: '/bin/bash'
args:
- "./data/gdal-reproject.sh"
- "${_VARIABLE_ID}"
- "${_MIN_LON} ${_MIN_LAT} ${_MAX_LON} ${_MAX_LAT}"
- "${_ZOOM_LEVELS}"

- name: gcr.io/esa-climate-from-space/tile-mover
id: 'prepare-upload'
entrypoint: '/bin/bash'
args:
- "./data/prepare-tile-upload.sh"
- "${_VARIABLE_ID}"
- "${_LAYER_ID}"

- name: gcr.io/cloud-builders/gsutil
id: 'upload-to-storage'
args:
- "-m"
- "cp"
- "-r"
- "/data/upload/${_LAYER_ID}/*"
- "gs://esa-cfs-tiles/${_VERSION}/${_LAYER_ID}/"

options:
diskSizeGb: 50
volumes:
- name: 'vol1'
path: '/data'
17 changes: 17 additions & 0 deletions data/add-time-coordinate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import os
import xarray as xr
import pandas as pd
from argparse import ArgumentParser

parser = ArgumentParser()
parser.add_argument("-f", "--file", dest="file")
parser.add_argument("-t", "--timestamp", dest="timestamp")
args = parser.parse_args()

ds = xr.open_dataset(args.file, decode_cf=False)

new_time = pd.to_datetime([args.timestamp])
ds = ds.expand_dims(dim={'time': new_time}, axis=0)

os.remove(args.file)
ds.to_netcdf(args.file, format='NETCDF4_CLASSIC', mode='w')
20 changes: 20 additions & 0 deletions data/downloads/odp-ftp-aerosol.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/AATSR_SU/L3/v4.21/MONTHLY/"
START_DATE=2002-07-01
OUTPUT_FODLER=./download

for i in {0..24}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_MONTH=$(date +%Y%m -d "$START_DATE + $i month")
NEXT_DATE=$(date +%Y-%m-%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL$NEXT_YEAR/$NEXT_MONTH-ESACCI-L3C_AEROSOL-AER_PRODUCTS-AATSR_ENVISAT-SU_MONTHLY-v4.21.nc
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable AOD550_mean
python ./data/add-time-coordinate.py --file $FILENAME --timestamp $NEXT_DATE
done
18 changes: 18 additions & 0 deletions data/downloads/odp-ftp-cloud.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/ocean_colour/data/v4.0-release/geographic/netcdf/chlor_a/monthly/v4.0/"
START_DATE=1997-09-01
OUTPUT_FODLER=./download

for i in {0..10}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_MONTH=$(date +%Y%m -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL$NEXT_YEAR/ESACCI-OC-L3S-CHLOR_A-MERGED-1M_MONTHLY_4km_GEO_PML_OCx-$NEXT_MONTH-fv4.0.nc
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable chlor_a
done
18 changes: 18 additions & 0 deletions data/downloads/odp-ftp-fire.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/grid/v5.1"
START_DATE=2001-01-01
OUTPUT_FOLDER=./download

for i in {0..4}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_DATE_SPACE=$(date +%Y%m%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FOLDER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL/$NEXT_YEAR/$NEXT_DATE_SPACE"-ESACCI-L4_FIRE-BA-MODIS-fv5.1.nc"
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable burned_area
done
21 changes: 21 additions & 0 deletions data/downloads/odp-ftp-greenland-ice.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/ice_sheets_greenland/data/greenland_surface_elevation_change/v1.2/RT_XO_"

OUTPUT_FODLER=./download

curl --silent $BASE_URL"1996_2000.nc" > $OUTPUT_FODLER/2000-01-01.nc
curl --silent $BASE_URL"1997_2001.nc" > $OUTPUT_FODLER/2001-01-01.nc
curl --silent $BASE_URL"1998_2002.nc" > $OUTPUT_FODLER/2002-01-01.nc
curl --silent $BASE_URL"2003_2007.nc" > $OUTPUT_FODLER/2007-01-01.nc
curl --silent $BASE_URL"2004_2008.nc" > $OUTPUT_FODLER/2008-01-01.nc
curl --silent $BASE_URL"2005_2009.nc" > $OUTPUT_FODLER/2009-01-01.nc
curl --silent $BASE_URL"2006_2010.nc" > $OUTPUT_FODLER/2010-01-01.nc

python ./data/add-time-coordinate.py --file $OUTPUT_FODLER/2000-01-01.nc --timestamp 2000-01-01
python ./data/add-time-coordinate.py --file $OUTPUT_FODLER/2001-01-01.nc --timestamp 2001-01-01
python ./data/add-time-coordinate.py --file $OUTPUT_FODLER/2002-01-01.nc --timestamp 2002-01-01
python ./data/add-time-coordinate.py --file $OUTPUT_FODLER/2007-01-01.nc --timestamp 2007-01-01
python ./data/add-time-coordinate.py --file $OUTPUT_FODLER/2008-01-01.nc --timestamp 2008-01-01
python ./data/add-time-coordinate.py --file $OUTPUT_FODLER/2009-01-01.nc --timestamp 2009-01-01
python ./data/add-time-coordinate.py --file $OUTPUT_FODLER/2010-01-01.nc --timestamp 2010-01-01
18 changes: 18 additions & 0 deletions data/downloads/odp-ftp-land-cover.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/land_cover/data/land_cover_maps/v2.0.7/ESACCI-LC-L4-LCCS-Map-300m-P1Y-"
START_DATE=1992-01-01
OUTPUT_FODLER=./download

for i in {0..2}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i year")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i year").nc
FTP_URL=$BASE_URL$NEXT_YEAR-v2.0.7b.nc
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable lccs_class
python ./data/add-time-coordinate.py --file $FILENAME --timestamp $NEXT_DATE
done
20 changes: 20 additions & 0 deletions data/downloads/odp-ftp-ozone.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/ozone/data/total_columns/l3/merged/v0100"
START_DATE=2001-04-01
OUTPUT_FODLER=./download

for i in {0..10}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_MONTH=$(date +%Y%m%d -d "$START_DATE + $i month")
NEXT_DATE=$(date +%Y-%m-%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL/$NEXT_YEAR/ESACCI-OZONE-L3S-TC-MERGED-DLR_1M-$NEXT_MONTH-fv0100.nc
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable atmosphere_mole_content_of_ozone
python ./data/add-time-coordinate.py --file $FILENAME --timestamp $NEXT_DATE
done
18 changes: 18 additions & 0 deletions data/downloads/odp-ftp-permafrost.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/permafrost/data/permafrost_extent/L4/area4/pp/v01.0/ESACCI-PERMAFROST-L4-MODIS-PFR-AREA4_PP"
START_DATE=2003-01-01
OUTPUT_FODLER=./download

for i in {0..5}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_DATE=$(date +%Y-%m-%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL"-"$NEXT_YEAR"-fv01.0.nc"
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/add-time-coordinate.py --file $FILENAME --timestamp $NEXT_DATE
done
25 changes: 25 additions & 0 deletions data/downloads/odp-ftp-sea-ice.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash

BASE_URL_SH="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_ice/data/sea_ice_concentration/L4/amsr/25km/v2.1/SH"
BASE_URL_NH="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_ice/data/sea_ice_concentration/L4/amsr/25km/v2.1/NH"
START_DATE=2002-06-01
OUTPUT_FODLER=./download

mkdir $OUTPUT_FODLER/NH
mkdir $OUTPUT_FODLER/SH

for i in {0..5}
do
NEXT_YEAR=$(date +%Y/%m -d "$START_DATE + $i month")
NEXT_MONTH=$(date +%Y%m%d -d "$START_DATE + $i month")
FILENAME_SH=$OUTPUT_FODLER/SH/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FILENAME_NH=$OUTPUT_FODLER/NH/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL_SH=$BASE_URL_SH/$NEXT_YEAR/ESACCI-SEAICE-L4-SICONC-AMSR_25.0kmEASE2-SH-$NEXT_MONTH-fv2.1.nc
FTP_URL_NH=$BASE_URL_NH/$NEXT_YEAR/ESACCI-SEAICE-L4-SICONC-AMSR_25.0kmEASE2-NH-$NEXT_MONTH-fv2.1.nc
echo $FTP_URL_SH
curl --silent $FTP_URL_SH > $FILENAME_SH
# python ./data/drop-unused-vars.py --file $FILENAME_SH --variable ice_conc
echo $FTP_URL_NH
curl --silent $FTP_URL_NH > $FILENAME_NH
# python ./data/drop-unused-vars.py --file $FILENAME_NH --variable ice_conc
done
20 changes: 20 additions & 0 deletions data/downloads/odp-ftp-sea-level.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_level/data/L4/MSLA/v2.0/"
START_DATE=1994-01-15
OUTPUT_FODLER=./download

for i in {0..10}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_MONTH=$(date +%Y%m%d -d "$START_DATE + $i month")
NEXT_DATE=$(date +%Y-%m-%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL/$NEXT_YEAR/ESACCI-SEALEVEL-L4-MSLA-MERGED-$NEXT_MONTH"000000-fv02.nc"
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable sla
python ./data/add-time-coordinate.py --file $FILENAME --timestamp $NEXT_DATE
done
19 changes: 19 additions & 0 deletions data/downloads/odp-ftp-sea-state.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_state/data/v1.1_release/l4/v1.1"
START_DATE=1993-01-01
OUTPUT_FODLER=./download

for i in {0..10}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_MONTH=$(date +%Y%m -d "$START_DATE + $i month")
NEXT_DATE=$(date +%Y-%m-%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL/$NEXT_YEAR/ESACCI-SEASTATE-L4-SWH-MULTI_1M-$NEXT_MONTH-fv01.nc
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable swh_mean
done
22 changes: 22 additions & 0 deletions data/downloads/odp-ftp-soil-moisture.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash


ftp://anon-ftp.ceda.ac.uk/neodc/esacci/soil_moisture/data/daily_files/COMBINED/v04.5/1988/ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-198802000000-fv04.5.nc


BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/soil_moisture/data/daily_files/COMBINED/v04.5"
START_DATE=1987-11-01
OUTPUT_FODLER=./download

for i in {0..5}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_MONTH=$(date +%Y%m%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL="$BASE_URL/$NEXT_YEAR/ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-$NEXT_MONTH"000000-fv04.5.nc
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable sm
done
18 changes: 18 additions & 0 deletions data/downloads/odp-ftp-sss.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

BASE_URL="ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_surface_salinity/data/v01.8/30days"
START_DATE=2010-01-01
OUTPUT_FODLER=./download

for i in {0..3}
do
NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month")
NEXT_DATE=$(date +%Y%m%d -d "$START_DATE + $i month")
FILENAME=$OUTPUT_FODLER/$(date +%Y%m%d -d "$START_DATE + $i month").nc
FTP_URL=$BASE_URL/$NEXT_YEAR/ESACCI-SEASURFACESALINITY-L4-SSS-MERGED_OI_Monthly_CENTRED_15Day_25km-$NEXT_DATE-fv1.8.nc
echo $FTP_URL

curl --silent $FTP_URL > $FILENAME

python ./data/drop-unused-vars.py --file $FILENAME --variable sss
done
8 changes: 3 additions & 5 deletions data/drop-unused-vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@
parser.add_argument("-v", "--variable", dest="variable")
args = parser.parse_args()

vars_to_keep = [args.variable, 'lat', 'lon', 'time']

ds = xr.open_dataset(args.file)
drop_vars = [v for v in ds.variables if v not in vars_to_keep]
ds_new = ds.drop_vars(drop_vars)
ds = xr.open_dataset(args.file, decode_coords=False, decode_cf=False)
ds_new = ds[args.variable].to_dataset()
ds_new.attrs = ds.attrs

os.remove(args.file)
ds_new.to_netcdf(args.file, format='NETCDF4_CLASSIC', mode='w')
7 changes: 7 additions & 0 deletions data/gdal-colors/colors-PFR.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
100 0 0 255
80 0 155 100
60 100 155 0
40 255 255 0
20 155 255 0
1 0 255 0
nv 0 255 255 0
6 changes: 6 additions & 0 deletions data/gdal-colors/colors-SEC.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
2 0 255 0
1 255 255 0
0 0 0 255
-1 255 255 0
-2 0 255 255
nv 0 0 0 0
3 changes: 3 additions & 0 deletions data/gdal-colors/colors-greenland.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1.1 255 0 255
-1.8 0 255 0
nv 0 0 0 0
3 changes: 3 additions & 0 deletions data/gdal-colors/colors-ice_conc.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
10000 255 255 0
0 255 0 0
nv 0 255 255 0
38 changes: 38 additions & 0 deletions data/gdal-colors/colors-lccs_class.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
0.00000 0 0 0
10.000000 255 255 100
11.000000 255 255 100
12.000000 255 255 0
20.000000 170 240 240
30.000000 220 240 100
40.000000 200 200 100
50.000000 0 100 0
60.000000 0 160 0
61.000000 0 160 0
62.000000 170 200 0
70.000000 0 60 0
71.000000 0 60 0
72.000000 0 80 0
80.000000 40 80 0
81.000000 40 80 0
82.000000 40 100 0
90.000000 120 130 0
100.00000 140 160 0
110.00000 190 150 0
120.00000 150 100 0
121.00000 120 75 0
122.00000 150 100 0
130.00000 255 180 50
140.00000 255 220 210
150.00000 255 235 175
151.00000 255 200 100
152.00000 255 210 120
153.00000 255 235 175
160.00000 0 120 90
170.00000 0 150 120
180.00000 0 220 130
190.00000 195 20 0
200.00000 255 245 215
201.00000 220 220 220
202.00000 255 245 215
210.00000 0 70 200
220.00000 255 255 255
Loading

0 comments on commit 12d33ab

Please sign in to comment.