In [1]:
import os
from glob import glob
import numpy as np
import dask
import xarray as xr
import xrft
import cartopy.crs as ccrs
from cmocean import cm
import h5py

from matplotlib import pyplot as plt
%matplotlib inline

from mitequinox.utils import *

  return f(*args, **kwds)
  return f(*args, **kwds)
  if not mpl.cbook.is_string_like(rgbin[0]):
  return f(*args, **kwds)


In [2]:
from dask_jobqueue import PBSCluster
local_dir = os.getenv('TMPDIR')
cluster = PBSCluster(local_directory=local_dir)
#print(cluster.job_script())
w = cluster.start_workers(5)



In [3]:
# get dask handles and check dask server status
from dask.distributed import Client
client = Client(cluster)

In [4]:
client

0,1
Client  Scheduler: tcp://10.148.0.124:58029  Dashboard: http://10.148.0.124:8787/status,Cluster  Workers: 4  Cores: 16  Memory: 200.00 GB


---


# 1. Drifter data files

In [5]:
# The prcessed dataset includes both the GPS- and Argo-tracked surface drifters.
# 2-month blocks with overlapping by 50%

data_dir = '/home1/datawork/xyu/Drifters/drifter_data_v1.02/'

drifter_dataset = h5py.File(data_dir+'Drifter_dataset_processed_30days.mat','r') 
list(drifter_dataset.items())

# mlat2: mean latitude over 60 days,     157978*1
# mlon2: mean longitude over 60 days,    157978*1

# qd: drogued drifters,                  157978*1
# qmed200: median water depth > 200 m,   157978*1
# qu: undrogued drifters,                157978*1

# uv2: (u, v),                           157978*1440

[('dpth2', <HDF5 dataset "dpth2": shape (352579, 720), type "<f8">),
 ('mdpth', <HDF5 dataset "mdpth": shape (352579, 1), type "<f8">),
 ('mlat2', <HDF5 dataset "mlat2": shape (352579, 1), type "<f8">),
 ('mlon2', <HDF5 dataset "mlon2": shape (352579, 1), type "<f8">),
 ('qall1000', <HDF5 dataset "qall1000": shape (352579, 1), type "|u1">),
 ('qall200', <HDF5 dataset "qall200": shape (352579, 1), type "|u1">),
 ('qall500', <HDF5 dataset "qall500": shape (352579, 1), type "|u1">),
 ('qd', <HDF5 dataset "qd": shape (352579, 1), type "|u1">),
 ('qmed1000', <HDF5 dataset "qmed1000": shape (352579, 1), type "|u1">),
 ('qmed200', <HDF5 dataset "qmed200": shape (352579, 1), type "|u1">),
 ('qmed500', <HDF5 dataset "qmed500": shape (352579, 1), type "|u1">),
 ('qu', <HDF5 dataset "qu": shape (352579, 1), type "|u1">),
 ('u2_err', <HDF5 dataset "u2_err": shape (352579, 720), type "<f8">),
 ('uv2', <HDF5 dataset "uv2": shape (352579, 720), type "|V16">),
 ('v2_err', <HDF5 dataset "v2_err": shape

In [6]:
# turn mean-lat into xarray_DataArray
lat2 = drifter_dataset['mlat2']
lat2 = np.array(lat2)
lat = xr.DataArray(lat2, dims=['lat', 'none'])
lat = xr.DataArray.squeeze(lat, dim='none')
lat = np.real(lat).rename('lat')
lat

<xarray.DataArray 'lat' (lat: 352579)>
array([ 44.071905,  43.645862,  44.406323, ..., -36.974844, -37.257942,
       -37.413325])
Dimensions without coordinates: lat

In [7]:
# turn mean-lat into xarray_DataArray
lon2 = drifter_dataset['mlon2']
lon2 = np.array(lon2)
lon = xr.DataArray(lon2, dims=['lon', 'none'])
lon = xr.DataArray.squeeze(lon, dim='none')
lon = np.real(lon).rename('lon')
lon

<xarray.DataArray 'lon' (lon: 352579)>
array([310.075746, 308.454926, 307.887965, ..., 325.537856, 325.246672,
       325.474971])
Dimensions without coordinates: lon

In [8]:
# turn drogued_information into xarray_DataArray
qd = drifter_dataset['qd']
qd = np.array(qd)
qd = xr.DataArray(qd, dims=['qd', 'none'])
qd = xr.DataArray.squeeze(qd, dim='none')
qd = np.real(qd).rename('qd')
qd

<xarray.DataArray 'qd' (qd: 352579)>
array([1, 1, 1, ..., 1, 1, 1], dtype=uint8)
Dimensions without coordinates: qd

In [9]:
# (u,v) --> u+iv
uv = drifter_dataset['uv2'].value.view(np.double).reshape((352579, 720, 2))
uv_drifters = uv[:,:,0] + 1j*uv[:,:,1]
uv_drifters.shape  # result is (10, 10, 10)
uv_drifters

array([[ 1.269e-01-0.2472j,  1.300e-01-0.2155j,  9.600e-02-0.2988j, ...,
         3.000e-04+0.1005j, -8.000e-04+0.0989j, -4.530e-02+0.055j ],
       [-2.048e-01+0.4936j, -3.023e-01+0.4859j, -2.971e-01+0.4856j, ...,
        -5.280e-02+0.1353j, -1.040e-02+0.1598j, -4.630e-02+0.1763j],
       [-7.770e-02+0.0613j, -9.660e-02+0.0699j, -9.630e-02+0.0702j, ...,
        -1.682e-01-0.0165j, -4.370e-02+0.0247j,  3.300e-02+0.0324j],
       ...,
       [ 6.220e-02+0.164j ,  5.890e-02+0.1348j,  6.710e-02+0.121j , ...,
         5.890e-02+0.0466j,  5.130e-02+0.0552j,  3.950e-02+0.0682j],
       [ 5.470e-02+0.034j ,  6.370e-02+0.0215j,  7.460e-02+0.0064j, ...,
         1.762e-01+0.2945j,  1.135e-01+0.2859j,  4.960e-02+0.2669j],
       [ 2.960e-02+0.0901j, -2.600e-03+0.1134j, -3.220e-02+0.1428j, ...,
        -1.189e-01+0.0092j, -1.027e-01-0.0244j, -7.390e-02-0.0525j]])

In [10]:
# Drifter Dataset (u+iv,lat,qd,time)
ds = xr.Dataset({'uv_drifters': (['lat', 'time'], uv_drifters)},
                 coords={'lat': (['lat'], lat),
                         'lon': (['lat'], lon),
                         'time':(['time'], np.arange(720)*3600),
                         'qd':(['lat'], qd)})
print(ds)
print('\n data size: %.1f GB' %(ds.nbytes / 1e9))

<xarray.Dataset>
Dimensions:      (lat: 352579, time: 720)
Coordinates:
  * lat          (lat) float64 44.07 43.65 44.41 44.79 44.91 45.02 45.08 ...
    lon          (lat) float64 310.1 308.5 307.9 307.5 306.9 306.4 305.7 ...
  * time         (time) int64 0 3600 7200 10800 14400 18000 21600 25200 ...
    qd           (lat) uint8 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 ...
Data variables:
    uv_drifters  (lat, time) complex128 (0.1269-0.2472j) (0.13-0.2155j) ...

 data size: 4.1 GB


# 2. FFT

In [11]:
uv_fft_complex = xrft.dft(ds.uv_drifters.fillna(0.), dim=['time'], shift=False, detrend='linear', window=True, chunks_to_segments=False)

E_uv_blocks = (uv_fft_complex*uv_fft_complex.conj()).where(uv_fft_complex!=0., drop=False)/720*3600
E_uv_blocks = np.real(E_uv_blocks).rename('E_uv_blocks')
E_uv_blocks = E_uv_blocks.assign_coords(freq_cpd=E_uv_blocks.freq_time * 86400.) 
E_uv_blocks = E_uv_blocks.assign_coords(lon=ds.lon) 
print(E_uv_blocks)

<xarray.DataArray 'E_uv_blocks' (lat: 352579, freq_time: 720)>
array([[8.275758e+02, 4.555162e+03, 2.000022e+03, ..., 1.875648e+04,
        3.435330e+04, 2.136664e+04],
       [1.425809e+02, 2.871336e+02, 6.629847e+02, ..., 9.242418e+01,
        3.532689e+02, 3.925055e+02],
       [5.310037e+01, 1.618015e+01, 4.191200e+01, ..., 2.495134e+02,
        2.545770e+02, 1.969597e+02],
       ...,
       [1.351784e+03, 4.948018e+02, 5.883382e+02, ..., 1.912037e+03,
        4.437057e+03, 5.022744e+03],
       [2.005802e+03, 7.705751e+02, 5.910980e+03, ..., 1.517837e+03,
        1.061298e+03, 1.545760e+03],
       [2.073965e+03, 8.435541e+03, 1.347270e+04, ..., 2.856774e+02,
        1.410359e+03, 1.297634e+03]])
Coordinates:
  * lat                (lat) float64 44.07 43.65 44.41 44.79 44.91 45.02 ...
  * freq_time          (freq_time) float64 0.0 3.858e-07 7.716e-07 1.157e-06 ...
    freq_time_spacing  float64 3.858e-07
    freq_cpd           (freq_time) float64 0.0 0.03333 0.06667 0.1 0.1333 ..

In [12]:
df = E_uv_blocks.freq_time[1] - E_uv_blocks.freq_time[0]
print(df)

<xarray.DataArray 'freq_time' ()>
array(3.858025e-07)
Coordinates:
    freq_time_spacing  float64 3.858e-07


In [13]:
f_drifter = 2.*2.*np.pi/86400. * np.sin(np.deg2rad(E_uv_blocks.lat))
f_cpd_drifter = f_drifter*86400/2./np.pi

In [14]:
E_semi = (E_uv_blocks.where((abs(E_uv_blocks.freq_cpd)>1.86) & (abs(E_uv_blocks.freq_cpd)<2.06) , other=0.)*df).sum(dim='freq_time')
E_semi = np.real(E_semi).rename('E_semi')

E_diurnal = (E_uv_blocks.where((abs(E_uv_blocks.freq_cpd)>0.9) & (abs(E_uv_blocks.freq_cpd)<1.1) , other=0.)*df).sum(dim='freq_time')
E_diurnal = np.real(E_diurnal).rename('E_diurnal')

E_high = (E_uv_blocks.where((abs(E_uv_blocks.freq_cpd)>=0.5), other=0.)*df).sum(dim='freq_time')
E_high  = np.real(E_high).rename('E_high')

E_f = (E_uv_blocks.where((abs(E_uv_blocks.freq_cpd+f_cpd_drifter)<0.1), other=0.)*df).sum(dim='freq_time')
E_f = np.real(E_f).rename('E_f')

In [15]:
E_drifter = xr.merge([E_semi.to_dataset(), E_diurnal.to_dataset(), E_high.to_dataset(), E_f.to_dataset()])
file_E = data_dir+'E_drifter_shift_30days.nc'
%time E_drifter.to_netcdf(file_E, mode='w')  

CPU times: user 1.28 s, sys: 116 ms, total: 1.39 s
Wall time: 1.73 s




In [17]:
cluster.scheduler.close()

<Future finished result=None>