OPeNDAP Testing
===============

**Author:** Xavier R Nogueira

**Resources:**
* [Walk-thru / tutorial](https://www.earthdata.nasa.gov/engage/open-data-services-and-software/api/opendap).
* [User guide](https://www.earthdata.nasa.gov/opendap-user-guide) - inlcudes info on how to structure URLs for specific subsets.
* [Publication](https://www.opendap.org/pdf/ESE-RFC-004v1.1.pdf) - includes specific info on the protocol.
* opendap endpoint for testing: `http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz`

# Using a base OPeNDAP server via `requests` (broswer interface)

**Notes:**
* Responses are returned in binary, therefore to covert to traditional strings use `.decode('utf-8')`.

In [6]:
import requests

### Data Descriptor Structure (DDS) request

In [13]:
dds = requests.get(r'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.dds')

In [37]:
print(dir(dds))

['__attrs__', '__bool__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__nonzero__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_content', '_content_consumed', '_next', 'apparent_encoding', 'close', 'connection', 'content', 'cookies', 'elapsed', 'encoding', 'headers', 'history', 'is_permanent_redirect', 'is_redirect', 'iter_content', 'iter_lines', 'json', 'links', 'next', 'ok', 'raise_for_status', 'raw', 'reason', 'request', 'status_code', 'text', 'url']


In [30]:
# content includes a "b" a t the start of the line
dds.content

b'Dataset {\n    Float32 lat[lat = 89];\n    Float32 lon[lon = 180];\n    Float64 time[time = 1857];\n    Float64 time_bnds[time = 1857][nbnds = 2];\n    Grid {\n      Array:\n        Int16 sst[time = 1857][lat = 89][lon = 180];\n      Maps:\n        Float64 time[time = 1857];\n        Float32 lat[lat = 89];\n        Float32 lon[lon = 180];\n    } sst;\n} sst.mnmean.nc;\n'

In [34]:
dds.text

'Dataset {\n    Float32 lat[lat = 89];\n    Float32 lon[lon = 180];\n    Float64 time[time = 1857];\n    Float64 time_bnds[time = 1857][nbnds = 2];\n    Grid {\n      Array:\n        Int16 sst[time = 1857][lat = 89][lon = 180];\n      Maps:\n        Float64 time[time = 1857];\n        Float32 lat[lat = 89];\n        Float32 lon[lon = 180];\n    } sst;\n} sst.mnmean.nc;\n'

In [44]:
# iterlines is a generator object
dds.iter_lines()

<generator object Response.iter_lines at 0x000001AD5056DA40>

In [67]:
for i in dds.iter_lines():
    print(i.decode('utf-8'))

Dataset {
    Float32 lat[lat = 89];
    Float32 lon[lon = 180];
    Float64 time[time = 1857];
    Float64 time_bnds[time = 1857][nbnds = 2];
    Grid {
      Array:
        Int16 sst[time = 1857][lat = 89][lon = 180];
      Maps:
        Float64 time[time = 1857];
        Float32 lat[lat = 89];
        Float32 lon[lon = 180];
    } sst;
} sst.mnmean.nc;


### Data Attribute Structure (DAS) request

In [39]:
das = requests.get(r'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.das')

In [68]:
for i in das.iter_lines():
    print(i.decode('utf-8'))

Attributes {
    lat {
        String units "degrees_north";
        String long_name "Latitude";
        Float32 actual_range 88.0000000, -88.0000000;
        String standard_name "latitude_north";
        String axis "y";
        String coordinate_defines "center";
    }
    lon {
        String units "degrees_east";
        String long_name "Longitude";
        Float32 actual_range 0.00000000, 358.000000;
        String standard_name "longitude_east";
        String axis "x";
        String coordinate_defines "center";
    }
    time {
        String units "days since 1800-1-1 00:00:00";
        String long_name "Time";
        Float64 actual_range 19723.00000000000, 76214.00000000000;
        String delta_t "0000-01-00 00:00:00";
        String avg_period "0000-01-00 00:00:00";
        String prev_avg_period "0000-00-07 00:00:00";
        String standard_name "time";
        String axis "t";
    }
    time_bnds {
        String long_name "Time Boundaries";
    }
    sst {
        S

### Getting a dimension's array

In [63]:
# lets get the lat longs of the dataset as zipped list
lats = requests.get(r'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?lat')

In [83]:
lats_list = [float(i) for i in lats.text.split(', ') if i.isnumeric()]

In [85]:
print(lats_list)

[88.0, 86.0, 84.0, 82.0, 80.0, 78.0, 76.0, 74.0, 72.0, 70.0, 68.0, 66.0, 64.0, 62.0, 60.0, 58.0, 56.0, 54.0, 52.0, 50.0, 48.0, 46.0, 44.0, 42.0, 40.0, 38.0, 36.0, 34.0, 32.0, 30.0, 28.0, 26.0, 24.0, 22.0, 20.0, 18.0, 16.0, 14.0, 12.0, 10.0, 8.0, 6.0, 4.0, 2.0, 0.0]


In [122]:
# lets get the lat longs of the dataset as zipped list
lons = requests.get(r'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?lon')

In [123]:
lons_list = [float(i) for i in lons.text.split(', ') if i.isnumeric()]

In [125]:
print(lons_list)

[0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0, 50.0, 52.0, 54.0, 56.0, 58.0, 60.0, 62.0, 64.0, 66.0, 68.0, 70.0, 72.0, 74.0, 76.0, 78.0, 80.0, 82.0, 84.0, 86.0, 88.0, 90.0, 92.0, 94.0, 96.0, 98.0, 100.0, 102.0, 104.0, 106.0, 108.0, 110.0, 112.0, 114.0, 116.0, 118.0, 120.0, 122.0, 124.0, 126.0, 128.0, 130.0, 132.0, 134.0, 136.0, 138.0, 140.0, 142.0, 144.0, 146.0, 148.0, 150.0, 152.0, 154.0, 156.0, 158.0, 160.0, 162.0, 164.0, 166.0, 168.0, 170.0, 172.0, 174.0, 176.0, 178.0, 180.0, 182.0, 184.0, 186.0, 188.0, 190.0, 192.0, 194.0, 196.0, 198.0, 200.0, 202.0, 204.0, 206.0, 208.0, 210.0, 212.0, 214.0, 216.0, 218.0, 220.0, 222.0, 224.0, 226.0, 228.0, 230.0, 232.0, 234.0, 236.0, 238.0, 240.0, 242.0, 244.0, 246.0, 248.0, 250.0, 252.0, 254.0, 256.0, 258.0, 260.0, 262.0, 264.0, 266.0, 268.0, 270.0, 272.0, 274.0, 276.0, 278.0, 280.0, 282.0, 284.0, 286.0, 288.0, 290.0, 292.0, 294.0, 296.0, 298.0, 300.

### Getting a subset of `sst` data using dimension indexing

* Note that the first item is time (always?), the second is latitude, the third is longitude. The latter two order seems to match the return from the DDS.
* **The indexing is inclusive!** For example [0:1:5] included 6 records not 5.
* **The middle number in the indexing controls the step!**
* **32767** represent missing data!

In [224]:
subset = 'sst[0:1:5][5:1:9][0:1:10]'

# note that the first item is time, the second is latitude, the third is longitude
data_response = requests.get(f'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?{subset}')

In [225]:
data_response.url

'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?sst%5B0:1:5%5D%5B5:1:9%5D%5B0:1:10%5D'

In [226]:
print(str(data_response.content.decode('utf-8')))

Dataset: sst.mnmean.nc
sst.lon, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20
sst.sst[sst.time=19723][sst.lat=78], -65, -10, 49, 94, 176, 147, 136, 135, 79, -180, -176
sst.sst[sst.time=19723][sst.lat=76], -9, 36, 100, 122, 159, 146, 155, 220, 183, 89, -124
sst.sst[sst.time=19723][sst.lat=74], 10, 55, 126, 212, 239, 229, 260, 345, 338, 223, 176
sst.sst[sst.time=19723][sst.lat=72], 168, 212, 270, 332, 369, 379, 409, 441, 453, 443, 384
sst.sst[sst.time=19723][sst.lat=70], 365, 391, 422, 448, 476, 495, 499, 497, 503, 495, 32767
sst.sst[sst.time=19754][sst.lat=78], -87, -37, 22, 69, 155, 126, 105, 77, -50, -180, -180
sst.sst[sst.time=19754][sst.lat=76], -30, 13, 77, 103, 145, 135, 139, 195, 150, -54, -180
sst.sst[sst.time=19754][sst.lat=74], -3, 43, 113, 199, 229, 223, 255, 338, 326, 206, 159
sst.sst[sst.time=19754][sst.lat=72], 157, 204, 260, 319, 357, 372, 405, 436, 443, 427, 368
sst.sst[sst.time=19754][sst.lat=70], 349, 378, 408, 432, 461, 483, 484, 477, 477, 468, 32767
sst.sst[sst.time=19782][s

### Getting the same subset of `sst` data using Hydax geospatial query

* Hydrax is a DAP server developed by the core project devs. **Essentially it allows one to query georeferenced data using lat/lon coordinates.**
* **Syntax:** `?geogrid(grid variable, upper lat, left lon, lower lat, right lon, *other expressions in double quotes.)`
    * Other expressions can include variable conditional queries (i.e., `"197<time<=203"`).
    * Other expressions must be comma delineated.
* **The double quotes and brackets can mess up the string url. We must use a URL parser like `urllib.parse.quote(url, safe=':/')`**

In [142]:
import urllib.parse

In [214]:
request_str = 'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?geogrid(sst,78,0,70,20,"19723<time<19874")'
print(request_str)

http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?geogrid(sst,78,0,70,20,"19723<time<19874")


In [215]:
# parse the url to ignore the double quotes
url_encoded = urllib.parse.quote(request_str, safe=':/')
print(url_encoded)

http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii%3Fgeogrid%28sst%2C78%2C0%2C70%2C20%2C%2219723%3Ctime%3C19874%22%29


In [216]:
# note that the first item is time, the second is latitude, the third is longitude
data_response = requests.get(url_encoded)

In [217]:
print(data_response.content.decode('utf-8'))

Dataset: function_result_sst.mnmean.nc
sst.lon, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20
sst.sst[sst.time=19754][sst.lat=78], -87, -37, 22, 69, 155, 126, 105, 77, -50, -180, -180
sst.sst[sst.time=19754][sst.lat=76], -30, 13, 77, 103, 145, 135, 139, 195, 150, -54, -180
sst.sst[sst.time=19754][sst.lat=74], -3, 43, 113, 199, 229, 223, 255, 338, 326, 206, 159
sst.sst[sst.time=19754][sst.lat=72], 157, 204, 260, 319, 357, 372, 405, 436, 443, 427, 368
sst.sst[sst.time=19754][sst.lat=70], 349, 378, 408, 432, 461, 483, 484, 477, 477, 468, 32767
sst.sst[sst.time=19782][sst.lat=78], -93, -45, 14, 65, 160, 139, 118, 99, 24, -180, -180
sst.sst[sst.time=19782][sst.lat=76], -32, 11, 77, 110, 165, 164, 163, 209, 164, 35, -100
sst.sst[sst.time=19782][sst.lat=74], 4, 52, 122, 210, 248, 248, 274, 351, 338, 221, 175
sst.sst[sst.time=19782][sst.lat=72], 163, 214, 267, 322, 360, 376, 408, 435, 440, 426, 367
sst.sst[sst.time=19782][sst.lat=70], 343, 375, 403, 423, 450, 471, 472, 463, 460, 452, 32767
sst.sst[sst

### Converting data in request via `linear_scale`

* We can use the following syntax `linear_scale(variable/query, scale_factor/m, offset/b)` where y = m*variable + b.

In [231]:
request_str = 'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?linear_scale(geogrid(sst,78,0,70,20,"19723<time<19874"),0.01,0)'
print(request_str)

http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?linear_scale(geogrid(sst,78,0,70,20,"19723<time<19874"),0.01,0)


In [232]:
# parse the url to ignore the double quotes
url_encoded = urllib.parse.quote(request_str, safe=':/')
print(url_encoded)

http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii%3Flinear_scale%28geogrid%28sst%2C78%2C0%2C70%2C20%2C%2219723%3Ctime%3C19874%22%29%2C0.01%2C0%29


In [233]:
# note that the first item is time, the second is latitude, the third is longitude
data_response = requests.get(url_encoded)

In [234]:
print(data_response.content.decode('utf-8'))

Dataset: function_result_sst.mnmean.nc
sst.lon, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20
sst.sst[sst.time=19754][sst.lat=78], -0.87, -0.37, 0.22, 0.69, 1.55, 1.26, 1.05, 0.77, -0.5, -1.8, -1.8
sst.sst[sst.time=19754][sst.lat=76], -0.3, 0.13, 0.77, 1.03, 1.45, 1.35, 1.39, 1.95, 1.5, -0.54, -1.8
sst.sst[sst.time=19754][sst.lat=74], -0.03, 0.43, 1.13, 1.99, 2.29, 2.23, 2.55, 3.38, 3.26, 2.06, 1.59
sst.sst[sst.time=19754][sst.lat=72], 1.57, 2.04, 2.6, 3.19, 3.57, 3.72, 4.05, 4.36, 4.43, 4.27, 3.68
sst.sst[sst.time=19754][sst.lat=70], 3.49, 3.78, 4.08, 4.32, 4.61, 4.83, 4.84, 4.77, 4.77, 4.68, 327.67
sst.sst[sst.time=19782][sst.lat=78], -0.93, -0.45, 0.14, 0.65, 1.6, 1.39, 1.18, 0.99, 0.24, -1.8, -1.8
sst.sst[sst.time=19782][sst.lat=76], -0.32, 0.11, 0.77, 1.1, 1.65, 1.64, 1.63, 2.09, 1.64, 0.35, -1
sst.sst[sst.time=19782][sst.lat=74], 0.04, 0.52, 1.22, 2.1, 2.48, 2.48, 2.74, 3.51, 3.38, 2.21, 1.75
sst.sst[sst.time=19782][sst.lat=72], 1.63, 2.14, 2.67, 3.22, 3.6, 3.76, 4.08, 4.35, 4.4, 4.26, 3

## Testing out Series data

* Endpoint is a Rhode Island dock weather station -  `http://test.opendap.org/dap/data/ff/gsodock.dat`.
* The series are stored more as a relational data table where rows correspond to sets of values across each variable column (including time).
* **The requests are a bit different, once needs to specify the variable(s) and threshold variables using a `Sequence_Name.VariableName` notation**.
    * Example: `URI_GSO-Dock.Salinity&URI_GSO-Dock.Time>35234.5&URI_GSO-Dock.Depth>2` woudl return just salinity at the described thresholds.
    * Note that url encoding is required for thresholding.
    * To pull multiple variables at once use commas to separate them! Once you use the "&" sign the following text is intepreted as query thresholds.

In [240]:
# get the dds
dds_series = requests.get(r'http://test.opendap.org/dap/data/ff/gsodock.dat.dds')
print(str(dds_series.content.decode('utf-8')))

Dataset {
    Sequence {
        Float64 Time;
        Float32 Depth;
        Float32 Sea_Temp;
        Float32 Salinity;
        Float32 DO_percent;
        Float32 pH;
        Float32 Turbidity;
        Float32 Air_Temp;
        Float32 Wind_Speed;
        Float32 Wind_Direction;
        Float32 Barometric_Pres;
        Float32 Solar_Radiation;
    } URI_GSO-Dock;
} gsodock.dat;



In [241]:
# get the das
das_series = requests.get(r'http://test.opendap.org/dap/data/ff/gsodock.dat.das')
print(str(das_series.content.decode('utf-8')))

Attributes {
    FF_GLOBAL {
        String Server "DODS FreeFrom based on FFND release 4.2.3";
        String text "Time    Dep     Temp    Sal     DO%     pH      Turb    AirT    WndSpd  WndDir  Baro    Solar";
        String location "University of Rhode Island/Oceanography School dock";
        Float32 latitude 41.4926;
        Float32 longitude -71.4201;
        String start_date "19 June 1996";
    }
    Time {
        String units "Days since 1/1/1900";
        Float64 min 35234.0;
        Float64 max 35235.0;
    }
    Depth {
        String units "m";
    }
    Sea_Temp {
        String units "degC";
    }
    Salinity {
        String units "psu";
    }
    DO_percent {
    }
    pH {
        String units "pct";
    }
    Turbidity {
        String units "FTU";
    }
    Air_Temp {
        String units "degC";
    }
    Wind_Speed {
        String units "m/s";
    }
    Wind_Direction {
        String units "deg";
    }
    Barometric_Pres {
        String units "mbar";
    }

In [292]:
# make a data request to get Ph and Barometric_Pres after time 35234.75
series_url = r'http://test.opendap.org/dap/data/ff/gsodock.dat.ascii?URI_GSO-Dock.pH,URI_GSO-Dock.Barometric_Pres&URI_GSO-Dock.Time>35234.75' #&URI_GSO-Dock.Depth>2'

In [293]:
# parse the url to ignore the double quotes
series_url_encoded = urllib.parse.quote(series_url, safe=':/')
print(series_url_encoded)

http://test.opendap.org/dap/data/ff/gsodock.dat.ascii%3FURI_GSO-Dock.pH%2CURI_GSO-Dock.Barometric_Pres%26URI_GSO-Dock.Time%3E35234.75


In [294]:
# note that the first item is time, the second is latitude, the third is longitude
series_data_response = requests.get(series_url_encoded)

In [295]:
print(series_data_response.content.decode('utf-8'))

Dataset: gsodock.dat
URI_GSO-Dock.pH, URI_GSO-Dock.Barometric_Pres
8.013, 768.1
8.006, 768.1
7.994, 768.1
8.013, 768.1
8.005, 768.1
8.005, 768.1
8.019, 768.1
7.94, 768.1
8.023, 768.1
8.039, 768.35
8.04, 768.35
8.044, 768.1
8.029, 767.84
8.031, 768.1
8.031, 768.1
8.011, 768.35
8.013, 768.1
8.015, 768.35
8.008, 768.35
8.007, 768.6
7.996, 768.86
7.978, 768.6
7.971, 768.86
8.006, 768.6
8.002, 768.6
8.007, 768.6
7.99, 768.86
7.976, 768.6
7.94, 768.6
7.948, 768.6
7.943, 768.6
7.942, 768.6
7.941, 768.6
7.968, 768.35
8.022, 768.35

