# hydro_opendata.data.minio模块
整合minio中的数据读写

## era5-land数据

In [1]:
from hydro_opendata.data.minio import Era5L
import numpy as np
import geopandas as gpd

In [2]:
era5 = Era5L()

### 主要通过catalog和reader两个属性对象进行数据操作

In [3]:
# dir(era5.catalog), dir(era5.reader)

print(era5.catalog.spatial_resolution)

0.1 x 0.1; Native resolution is 9 km.


### 获取流域范围数据

In [4]:
aoi = gpd.read_file('basin.geojson')
aoi

Unnamed: 0,id,geometry
0,0,"MULTIPOLYGON (((122.44241 39.80139, 122.39342 ..."


### 利用`catalog`获取era5数据的数据集

In [5]:
era5.catalog.datasets

{'wis': {'start_time': numpy.datetime64('2015-07-01T00:00:00'),
  'end_time': numpy.datetime64('2021-12-31T23:00:00'),
  'bbox': [115, 38, 136, 54]}}

- 利用`catalog.search()`方法搜索era5数据的数据集，可指定时间范围和空间范围

In [6]:
e = era5.catalog.search(aoi=aoi)
e

Unnamed: 0,id,dataset,start_time,end_time,geometry
0,era5-land,wis,2015-07-01T00:00:00,2021-12-31T23:00:00,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."


### 利用`reader`读取era5数据

- 通过指定四至范围读取era5数据

In [7]:
bbox=(121,39,123,40)
start_time=np.datetime64("2021-06-01T00:00:00.000000000")
end_time=np.datetime64("2021-06-30T23:00:00.000000000")

e1 = era5.reader.open_dataset(data_variables=['Total precipitation'], start_time=start_time, end_time=end_time, dataset='wis', bbox=bbox)
e1

Cannot find the ecCodes library


Unnamed: 0,Array,Chunk
Bytes,649.69 kiB,21.66 kiB
Shape,"(720, 21, 11)","(24, 21, 11)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 649.69 kiB 21.66 kiB Shape (720, 21, 11) (24, 21, 11) Dask graph 30 chunks in 6 graph layers Data type float32 numpy.ndarray",11  21  720,

Unnamed: 0,Array,Chunk
Bytes,649.69 kiB,21.66 kiB
Shape,"(720, 21, 11)","(24, 21, 11)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过矢量数据文件读取

In [8]:
shp = 'basin.geojson'
e2 = era5.reader.from_shp(data_variables=['Total precipitation'], start_time=start_time, end_time=end_time, dataset='wis', shp=shp)
e2

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 84.38 kiB 2.81 kiB Shape (720, 6, 5) (24, 6, 5) Dask graph 30 chunks in 6 graph layers Data type float32 numpy.ndarray",5  6  720,

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过已有aoi对象读取

In [9]:
e3 = era5.reader.from_aoi(data_variables=['Total precipitation'], start_time=start_time, end_time=end_time, dataset='wis', aoi=aoi)
e3

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 84.38 kiB 2.81 kiB Shape (720, 6, 5) (24, 6, 5) Dask graph 30 chunks in 6 graph layers Data type float32 numpy.ndarray",5  6  720,

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## gpm数据

In [10]:
from hydro_opendata.data.minio import GPM

In [11]:
gpm = GPM()

### 主要通过catalog和reader两个属性对象进行数据操作

In [12]:
# dir(gpm.catalog), dir(gpm.reader)

print(gpm.catalog.temporal_resolution)

half-hourly; 1 day


### 利用`catalog`获取gpm数据的数据集

In [13]:
gpm.catalog.datasets

{'wis': [{'time_resolution': '30 minutes',
   'start_time': numpy.datetime64('2016-01-01T00:00:00.000000000'),
   'end_time': numpy.datetime64('2023-10-11T08:30:00.000000000'),
   'bbox': [73, 3, 136, 54]},
  {'time_resolution': '1 day',
   'start_time': numpy.datetime64('2000-06-01T00:00:00.000000000'),
   'end_time': numpy.datetime64('2023-09-26T23:59:59.000000000'),
   'bbox': [73, 3, 136, 54]}],
 'camels': [{'time_resolution': '30 minutes',
   'start_time': numpy.datetime64('2022-01-01T00:00:00.000000000'),
   'end_time': numpy.datetime64('2023-08-31T23:30:00.000000000'),
   'bbox': [-125, 25, -66, 50]},
  {'time_resolution': '1 day',
   'start_time': numpy.datetime64('2000-06-01T00:00:00.000000000'),
   'end_time': numpy.datetime64('2014-12-31T23:59:59.000000000'),
   'bbox': [-125, 25, -66, 50]}]}

- 利用`catalog.search()`方法搜索gpm数据的数据集，可指定时间范围和空间范围

In [14]:
g = gpm.catalog.search(aoi=aoi)
g

Unnamed: 0,id,dataset,time_resolution,start_time,end_time,geometry
0,gpm-imerg-early,wis,30 minutes,2016-01-01T00:00:00.000000000,2023-10-11T08:30:00.000000000,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."
0,gpm-imerg-early,wis,1 day,2000-06-01T00:00:00.000000000,2023-09-26T23:59:59.000000000,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."


### 利用`reader`读取gpm数据

- 通过指定四至范围读取

In [15]:
bbox=(121,39,123,40)
start_time=np.datetime64("2023-06-01T00:00:00.000000000")
end_time=np.datetime64("2023-06-30T23:30:00.000000000")

g1 = gpm.reader.open_dataset(start_time=start_time, end_time=end_time, dataset='wis', bbox=bbox, time_resolution='30m')
g1

Unnamed: 0,Array,Chunk
Bytes,1.10 MiB,37.50 kiB
Shape,"(1440, 20, 10)","(48, 20, 10)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.10 MiB 37.50 kiB Shape (1440, 20, 10) (48, 20, 10) Dask graph 30 chunks in 3 graph layers Data type float32 numpy.ndarray",10  20  1440,

Unnamed: 0,Array,Chunk
Bytes,1.10 MiB,37.50 kiB
Shape,"(1440, 20, 10)","(48, 20, 10)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过矢量数据文件读取

In [16]:
shp = 'basin.geojson'
g2 = gpm.reader.from_shp(start_time=start_time, end_time=end_time, dataset='wis', shp=shp, time_resolution='30m')
g2

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.50 kiB 3.75 kiB Shape (1440, 5, 4) (48, 5, 4) Dask graph 30 chunks in 3 graph layers Data type float32 numpy.ndarray",4  5  1440,

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过已有aoi对象读取

In [17]:
aoi = gpd.read_file(shp)
g3 = gpm.reader.from_aoi(start_time=start_time, end_time=end_time, dataset='wis', aoi=aoi, time_resolution='30m')
g3

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.50 kiB 3.75 kiB Shape (1440, 5, 4) (48, 5, 4) Dask graph 30 chunks in 3 graph layers Data type float32 numpy.ndarray",4  5  1440,

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## gfs数据

In [18]:
from hydro_opendata.data.minio import GFS

In [19]:
gfs = GFS('tp')    # 需要指定变量名，tp表示降雨

### 主要通过catalog和reader两个属性对象进行数据操作

In [20]:
# dir(gfs.catalog), dir(gfs.reader)

print(gfs.catalog.data_sources)

NOAA


### 利用`catalog`获取gfs数据的数据集

In [21]:
gfs.catalog.datasets

{'wis': [{'start': '2016-07-10T00',
   'end': '2022-08-31T18',
   'bbox': [115, 38, 136, 54]},
  {'start': '2022-09-01T00',
   'end': '2023-10-11T06',
   'bbox': [73, 3, 136, 54]}]}

- 利用`catalog.search()`方法搜索gfs数据的数据集，可指定时间范围和空间范围

In [22]:
f = gfs.catalog.search(aoi=aoi)
f

Unnamed: 0,id,dataset,start_time,end_time,geometry
0,gfs_atmos.tp,wis,2016-07-10T00,2022-08-31T18,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."
0,gfs_atmos.tp,wis,2022-09-01T00,2023-10-11T06,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."


### 利用`reader`读取gfs数据

- 通过指定四至范围读取

In [23]:
bbox=(121,39,123,40)
creation_date=np.datetime64("2023-06-01")

f1 = gfs.reader.open_dataset(creation_date=creation_date, creation_time='00', dataset='wis', bbox=bbox)
f1

Unnamed: 0,Array,Chunk
Bytes,4.94 MiB,8.44 kiB
Shape,"(120, 120, 5, 9)","(24, 1, 5, 9)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.94 MiB 8.44 kiB Shape (120, 120, 5, 9) (24, 1, 5, 9) Dask graph 600 chunks in 3 graph layers Data type float64 numpy.ndarray",120  1  9  5  120,

Unnamed: 0,Array,Chunk
Bytes,4.94 MiB,8.44 kiB
Shape,"(120, 120, 5, 9)","(24, 1, 5, 9)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


- 通过矢量数据文件读取

In [24]:
shp = 'basin.geojson'
f2 = gfs.reader.from_shp(creation_date=creation_date, creation_time='00', dataset='wis', shp=shp)
f2

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 0.99 MiB 1.69 kiB Shape (120, 120, 3, 3) (24, 1, 3, 3) Dask graph 600 chunks in 3 graph layers Data type float64 numpy.ndarray",120  1  3  3  120,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


- 通过已有aoi对象读取

In [25]:
aoi = gpd.read_file(shp)
f3 = gfs.reader.from_aoi(creation_date=creation_date, creation_time='00', dataset='wis', aoi=aoi)
f3

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 0.99 MiB 1.69 kiB Shape (120, 120, 3, 3) (24, 1, 3, 3) Dask graph 600 chunks in 3 graph layers Data type float64 numpy.ndarray",120  1  3  3  120,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
