In [11]:
import numpy as np
import netCDF4 as nc
import pandas as pd
import numpy.ma as ma

In [12]:
import matplotlib as mpl
import matplotlib.pyplot as plt

In [13]:
import xarray as xar

In [14]:
file_path='./data/sanjioil.nc'

In [15]:
ds=nc.Dataset(file_path)

In [16]:
ds_xr=xar.open_dataset(file_path)

In [17]:
ds_xr.dims

Frozen(SortedKeysDict({'trajectory': 2958, 'time': 73}))

In [19]:
ds_xr.data_vars

Data variables:
    age_seconds                               (trajectory, time) float32 ...
    interfacial_area                          (trajectory, time) float32 ...
    mass_evaporated                           (trajectory, time) float32 ...
    water_fraction                            (trajectory, time) float32 ...
    y_wind                                    (trajectory, time) float32 ...
    sea_ice_area_fraction                     (trajectory, time) float32 ...
    density                                   (trajectory, time) float32 ...
    oil_film_thickness                        (trajectory, time) float32 ...
    x_sea_water_velocity                      (trajectory, time) float32 ...
    bulltime                                  (trajectory, time) float32 ...
    x_wind                                    (trajectory, time) float32 ...
    age_emulsion_seconds                      (trajectory, time) float32 ...
    sea_surface_wave_stokes_drift_y_velocity  (trajectory, t

In [18]:
ds_xr.coords

Coordinates:
  * trajectory  (trajectory) int32 1 2 3 4 5 6 ... 2953 2954 2955 2956 2957 2958
  * time        (time) datetime64[ns] 2018-01-14T22:20:00 ... 2018-01-17T22:20:00
    lon         (trajectory, time) float32 ...
    lat         (trajectory, time) float32 ...

## 获取`几个指定维度的`的`DataArray`  
如下可知：
    `mass_oil`有2958个值，该物理量有两个维度，分别为`trajectory`,`time`

In [29]:
# 读取一个 wind 
ds_xr.isel(time=1)['x_wind']

### 1- 尝试将xarray转为series

## 2 剔除掉nan的数据，只获取有效数据

#### 查看不同时刻的
并通过`where`剔除掉大于`max`以及`nan`的值  
使用xarray的dropna方法，注意与pandas中的方法有所区别

提示错误：
`ValueError: any must be a single dataset dimension`  
查看文档后发现`xarray.Dataset.dropna`尚不支持同时沿多个维度放置  

[参考](http://xarray.pydata.org/en/stable/generated/xarray.Dataset.dropna.html)  

考虑将xarray转换为dateframe，然后再清洗？  
[上网参考](https://stackoverflow.com/questions/52553925/python-xarray-remove-coordinates-with-all-missing-variables)
比较好的解决办法是：
1. 将`DataSet`->`DataFrame`
2. 在`DataFrame`中通过`pandas` `dropna` 即可

In [52]:
xr_temp=ds_xr.isel(time=30)['x_wind']
xr_temp

In [44]:
#xr_temp.where(xr_temp<=2000).dropna(dim='any')

In [53]:
# 将xarray转成df
xr_temp.where(xr_temp<2000).to_dataframe().head()

Unnamed: 0_level_0,time,lon,lat,x_wind
trajectory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2018-01-16 04:20:00,9.969210000000001e+36,9.969210000000001e+36,
2,2018-01-16 04:20:00,125.8521,28.50434,-2.856885
3,2018-01-16 04:20:00,9.969210000000001e+36,9.969210000000001e+36,
4,2018-01-16 04:20:00,125.8375,28.53567,-3.510324
5,2018-01-16 04:20:00,125.7466,28.53434,-3.477507


* 这么看此种方式比较可行

In [54]:
xr_final=xr_temp.where(xr_temp<2000).to_dataframe().dropna(how='any')

(901, 4)