In [1]:
import numpy as np
import netCDF4 as nc
import pandas as pd
import numpy.ma as ma

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

In [3]:
import xarray as xar

In [4]:
file_path='/Users/evaseemefly/Documents/03Data/nc/1/FORECAST/OIL/2020/4/10/ceshi_code_20200508125917369902.nc'

In [5]:
ds=nc.Dataset(file_path)

## 获取dateset中的 variables以及维度

In [28]:
ds_xr.data_vars

Data variables:
    status   (trajectory, time) int32 0 0 0 ... -2147483647 -2147483647

In [24]:
ds_xr.dims

Frozen(SortedKeysDict({'trajectory': 3000, 'time': 145}))

In [25]:
ds_xr.coords

Coordinates:
  * trajectory  (trajectory) int32 1 2 3 4 5 6 ... 2995 2996 2997 2998 2999 3000
  * time        (time) datetime64[ns] 2020-04-10 ... 2020-04-16
    lon         (trajectory, time) float32 ...
    lat         (trajectory, time) float32 ...

In [26]:
ds_xr.attrs

{'Conventions': 'CF-1.6',
 'standard_name_vocabulary': 'CF-1.6',
 'featureType': 'trajectory',
 'history': 'Created 2020-05-08 12:59:24.885743',
 'source': 'Output from simulation with OpenDrift',
 'model_url': 'https://github.com/OpenDrift/opendrift',
 'opendrift_class': 'OpenOil',
 'opendrift_module': 'opendrift.models.openoil',
 'readers': "odict_keys(['/Users/evaseemefly/Documents/03Data/nc/COMMON/DAILY/2020/4/10/ecs_new_current_20200410.nc', 'global_landmask'])",
 'time_coverage_start': '2020-04-10 00:00:00',
 'time_step_calculation': '0:30:00',
 'time_step_output': '1:00:00',
 'config_seed:oil_type': 'AASGAR',
 'config_seed:ocean_only': 'True',
 'config_general:use_auto_landmask': 'True',
 'config_general:coastline_action': 'stranding',
 'config_general:time_step_minutes': 60,
 'config_general:time_step_output_minutes': 'None',
 'config_drift:max_age_seconds': 'None',
 'config_drift:scheme': 'euler',
 'config_drift:stokes_drift': 'True',
 'config_drift:current_uncertainty': 0.1,


获取所有的维度  
* 注意获取维度时，不可以使用下标进行索引，会报错

## 获取`几个指定维度的`的`DataArray`  
如下可知：
    `mass_oil`有2958个值，该物理量有两个维度，分别为`trajectory`,`time`

#### 查看不同时刻的
并通过`where`剔除掉大于`max`以及`nan`的值  
使用xarray的dropna方法，注意与pandas中的方法有所区别

提示错误：
`ValueError: any must be a single dataset dimension`  
查看文档后发现`xarray.Dataset.dropna`尚不支持同时沿多个维度放置  

[参考](http://xarray.pydata.org/en/stable/generated/xarray.Dataset.dropna.html)  

考虑将xarray转换为dateframe，然后再清洗？  
[上网参考](https://stackoverflow.com/questions/52553925/python-xarray-remove-coordinates-with-all-missing-variables)
比较好的解决办法是：
1. 将`DataSet`->`DataFrame`
2. 在`DataFrame`中通过`pandas` `dropna` 即可

In [7]:
ds_xr.isel(time=30)['status']

In [8]:
ds_xr.isel(time=30)['mass_evaporated']

In [9]:
ds_xr.isel(time=30)['status']

对于未做任何处理的`dataarray`的len为`2958`  
使用max与min方法操作dataset  
`max`为1  
`min`为-2147xxxx

In [9]:
xr_temp_x=ds_xr.isel(time=30)['status']

In [10]:
xr_temp_x.max()

In [11]:
xr_temp_x.min()

In [12]:
ds_xr.isel(time=30)['lat'].mean().data

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


array(inf, dtype=float32)

## 1-先对dataarray进行过滤，先剔除<0的  
先剔除<0的部分后发现len并未减少？  
* 原因是看其中的array已经出现了nan的掩码，需要剔除

In [13]:
xr_temp_x.where(xr_temp_x>=0)

In [14]:
# 再对status进行<1的筛选，当=1 时表示粒子死亡（到了落地——需要再看api）
xr_temp_x.where(xr_temp_x>=0).where(xr_temp_x<1)

### dropna的方式1：直接对DataArray进行dropna  
* TypeError: dropna() missing 1 required positional argument: 'dim'

In [19]:
xr_filter=xr_temp_x.where(xr_temp_x>=0).dropna(dim='trajectory',how='any')
xr_filter.head()

## 问题:
输出的nc文件 变量有问题

In [16]:
# 查看一下长度
len(xr_filter)

1377

### dropna的方式2：由于xarray中`DataArray`的dropna也是继承自`pandas`与`numpy`的  
* xarray objects borrow the isnull(), notnull(), count(), dropna(), fillna(), ffill(), and bfill() methods for working with missing data from pandas:   
 
[missing values](https://xarray-test.readthedocs.io/en/latest/computation.html)

In [19]:
len(xr_temp_x.where(xr_temp_x>=0).to_dataframe().dropna(how='any'))

901

## 2 求均值

In [17]:
xr_filter['lat'].mean()

In [18]:
xr_filter['lon'].mean()

In [22]:
xr_filter.mean()

获取均值结果  
对于`ndarray.data.tolist()`

In [23]:
# 获取DataArray的唯一值
xr_filter['lat'].mean().data.tolist()

28.479509353637695

In [24]:
xr_filter['lon'].mean().data.tolist()

125.88218688964844

In [39]:
xr_filter['lon'].mean().values

array(125.88219, dtype=float32)

In [69]:
xr_filter.mean().data.tolist()

0.016648168701442843

## 20-05-08 处理动态生成的nc文件时出现的问题