# 整合minio中的数据读写

## era5搜索和读取

In [1]:
from hydro_opendata.data.minio import Era5L
import numpy as np
import geopandas as gpd

In [2]:
era5 = Era5L('wis')    # wis为数据集名称，wis为国内范围，后期加入camels数据集范围

- 主要通过catalog和reader两个属性对象进行数据操作

In [3]:
# dir(era5.catalog), dir(era5.reader)

print(era5.catalog.spatial_resolution)
print(era5.catalog.start_time)
print(era5.catalog.end_time)

0.1 x 0.1; Native resolution is 9 km.
2015-07-01T00:00:00
2021-12-31T23:00:00


In [4]:
aoi = gpd.read_file('basin.geojson')
aoi

Unnamed: 0,id,geometry
0,0,"MULTIPOLYGON (((122.44241 39.80139, 122.39342 ..."


- 获取数据时间范围和空间范围

In [5]:
e = era5.catalog.search(aoi=aoi)
e

Unnamed: 0,id,start_time,end_time,geometry
0,era5-land,2015-07-01,2021-12-31 23:00:00,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."


- 通过指定四至范围读取

In [6]:
bbox=(121,39,123,40)
start_time=np.datetime64("2021-06-01T00:00:00.000000000")
end_time=np.datetime64("2021-06-30T23:00:00.000000000")

e1 = era5.reader.open_dataset(data_variables=['Total precipitation'], start_time=start_time, end_time=end_time, bbox=bbox)
e1

Cannot find the ecCodes library


Unnamed: 0,Array,Chunk
Bytes,649.69 kiB,21.66 kiB
Shape,"(720, 21, 11)","(24, 21, 11)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 649.69 kiB 21.66 kiB Shape (720, 21, 11) (24, 21, 11) Dask graph 30 chunks in 6 graph layers Data type float32 numpy.ndarray",11  21  720,

Unnamed: 0,Array,Chunk
Bytes,649.69 kiB,21.66 kiB
Shape,"(720, 21, 11)","(24, 21, 11)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过矢量数据文件读取

In [7]:
shp = 'basin.geojson'
e2 = era5.reader.from_shp(data_variables=['Total precipitation'], start_time=start_time, end_time=end_time, shp=shp)
e2

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 84.38 kiB 2.81 kiB Shape (720, 6, 5) (24, 6, 5) Dask graph 30 chunks in 6 graph layers Data type float32 numpy.ndarray",5  6  720,

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过已有aoi对象读取

In [8]:
e3 = era5.reader.from_aoi(data_variables=['Total precipitation'], start_time=start_time, end_time=end_time, aoi=aoi)
e3

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 84.38 kiB 2.81 kiB Shape (720, 6, 5) (24, 6, 5) Dask graph 30 chunks in 6 graph layers Data type float32 numpy.ndarray",5  6  720,

Unnamed: 0,Array,Chunk
Bytes,84.38 kiB,2.81 kiB
Shape,"(720, 6, 5)","(24, 6, 5)"
Dask graph,30 chunks in 6 graph layers,30 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## gpm搜索和读取

In [9]:
from hydro_opendata.data.minio import GPM

In [10]:
gpm = GPM('wis')    # wis为数据集名称，wis为国内范围，camels为camels数据集范围

- 主要通过catalog和reader两个属性对象进行数据操作

In [11]:
# dir(gpm.catalog), dir(gpm.reader)

print(gpm.catalog.temporal_resolution)
print(gpm.catalog.start_time)
print(gpm.catalog.end_time)

half-hourly
2016-01-01T00:00:00.000000000
2023-09-05T05:30:00.000000000


- 获取数据时间范围和空间范围

In [12]:
g = gpm.catalog.search(aoi=aoi)
g

Unnamed: 0,id,start_time,end_time,geometry
0,gpm-imerg-early,2016-01-01,2023-09-05 05:30:00,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."


- 通过指定四至范围读取

In [13]:
bbox=(121,39,123,40)
start_time=np.datetime64("2023-06-01T00:00:00.000000000")
end_time=np.datetime64("2023-06-30T23:30:00.000000000")

g1 = gpm.reader.open_dataset(start_time=start_time, end_time=end_time, bbox=bbox)
g1

Unnamed: 0,Array,Chunk
Bytes,1.10 MiB,37.50 kiB
Shape,"(1440, 20, 10)","(48, 20, 10)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.10 MiB 37.50 kiB Shape (1440, 20, 10) (48, 20, 10) Dask graph 30 chunks in 3 graph layers Data type float32 numpy.ndarray",10  20  1440,

Unnamed: 0,Array,Chunk
Bytes,1.10 MiB,37.50 kiB
Shape,"(1440, 20, 10)","(48, 20, 10)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过矢量数据文件读取

In [14]:
shp = 'basin.geojson'
g2 = gpm.reader.from_shp(start_time=start_time, end_time=end_time, shp=shp)
g2

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.50 kiB 3.75 kiB Shape (1440, 5, 4) (48, 5, 4) Dask graph 30 chunks in 3 graph layers Data type float32 numpy.ndarray",4  5  1440,

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


- 通过已有aoi对象读取

In [15]:
aoi = gpd.read_file(shp)
g3 = gpm.reader.from_aoi(start_time=start_time, end_time=end_time, aoi=aoi)
g3

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.50 kiB 3.75 kiB Shape (1440, 5, 4) (48, 5, 4) Dask graph 30 chunks in 3 graph layers Data type float32 numpy.ndarray",4  5  1440,

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,3.75 kiB
Shape,"(1440, 5, 4)","(48, 5, 4)"
Dask graph,30 chunks in 3 graph layers,30 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## gfs搜索和读取

In [16]:
from hydro_opendata.data.minio import GFS

In [17]:
gfs = GFS('wis', 'tp')    # wis为数据集名称，wis为国内范围，camels为camels数据集范围

- 主要通过catalog和reader两个属性对象进行数据操作

In [18]:
# dir(gfs.catalog), dir(gfs.reader)

print(gfs.catalog.data_sources)
print(gfs.catalog.start_time)
print(gfs.catalog.end_time)

NOAA
2016-07-10T00
2023-09-05T06


- 获取数据时间范围和空间范围

In [19]:
f = gfs.catalog.search(aoi=aoi)
f

Unnamed: 0,id,start_time,end_time,geometry
1,gfs_atmos.tp,2022-09-01,2023-09-05 06:00:00,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."
0,gfs_atmos.tp,2016-07-10,2022-08-31 18:00:00,"POLYGON ((122.39342 39.81027, 122.31744 39.833..."


- 通过指定四至范围读取

In [20]:
bbox=(121,39,123,40)
creation_date=np.datetime64("2023-06-01")

f1 = gfs.reader.open_dataset(creation_date=creation_date, creation_time='00', bbox=bbox)
f1

Unnamed: 0,Array,Chunk
Bytes,4.94 MiB,8.44 kiB
Shape,"(120, 120, 5, 9)","(24, 1, 5, 9)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.94 MiB 8.44 kiB Shape (120, 120, 5, 9) (24, 1, 5, 9) Dask graph 600 chunks in 3 graph layers Data type float64 numpy.ndarray",120  1  9  5  120,

Unnamed: 0,Array,Chunk
Bytes,4.94 MiB,8.44 kiB
Shape,"(120, 120, 5, 9)","(24, 1, 5, 9)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


- 通过矢量数据文件读取

In [21]:
shp = 'basin.geojson'
f2 = gfs.reader.from_shp(creation_date=creation_date, creation_time='00', shp=shp)
f2

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 0.99 MiB 1.69 kiB Shape (120, 120, 3, 3) (24, 1, 3, 3) Dask graph 600 chunks in 3 graph layers Data type float64 numpy.ndarray",120  1  3  3  120,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


- 通过已有aoi对象读取

In [22]:
aoi = gpd.read_file(shp)
f3 = gfs.reader.from_aoi(creation_date=creation_date, creation_time='00', aoi=aoi)
f3

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 0.99 MiB 1.69 kiB Shape (120, 120, 3, 3) (24, 1, 3, 3) Dask graph 600 chunks in 3 graph layers Data type float64 numpy.ndarray",120  1  3  3  120,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,1.69 kiB
Shape,"(120, 120, 3, 3)","(24, 1, 3, 3)"
Dask graph,600 chunks in 3 graph layers,600 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
