In [1]:
import numpy as np
import matplotlib as mpl
import glob
import os
import matplotlib.pyplot as plt
from scipy.io import savemat,loadmat,whosmat
import pandas as pd
import datetime, time
import xarray as xr
import sys

In [5]:
df = pd.read_csv('../SWIFT/SWIFT_README.csv')
df = df.set_index('netcdf_var').transpose()
attr_dict = df.to_dict()
#print(my_dict)
print(attr_dict['lat']['units'])

degrees


In [6]:
for var in attr_dict:
    print(var,attr_dict[var]['units'])

uplooking_tkedissipationrate W/kg
uplooking_z m
downlooking_velocityprofile m/s
downlooking_z m
winddirT degrees_north
winddirstddev degrees
windspd m/s
windspdstddev m/s
airtemp degC
airtempstddev degC
sigwaveheight m
peakwaveperiod s
peakwavedirT degrees_north
wavespectra_energy m^2/Hz
wavespectra_freq Hz
wavespectra_a1 Hz
wavespectra_b1 Hz
wavespectra_a2 Hz
wavespectra_b2 Hz
lat degrees
lon degrees
watertemp degC
salinity PSU
puck nan
driftdirT degrees_true
driftspd m/s
z m
x m
y m
u m/s
v m/s


In [7]:
# convert matlab datenum to python datetime
def matlab2datetime(matlab_datenum):
    day = datetime.datetime.fromordinal(int(matlab_datenum))
    dayfrac = datetime.timedelta(days=matlab_datenum%1) - datetime.timedelta(days=366)
    return day + dayfrac

In [8]:
# Function expects input .mat file and a string 'identifier' e.g. 'AWAC' for BGEP mooring data
def swift_mat_to_netcdf(input_mat,identifier):
    test_dims = ['z','freq']    
    struct = loadmat(input_mat,squeeze_me=True)
    my_dict = struct[identifier]
    allds = 0
    
    if len(my_dict['time'].shape) >0:
        my_dict['time'] = [matlab2datetime(tval) for tval in my_dict['time']]
        my_dict['time'] = np.array(my_dict['time'],dtype='datetime64[s]')
        ntime = len(my_dict['time'])
        df = pd.DataFrame(my_dict)
        df = df.set_index('time')

        # the 1D (time) data reads directly 
        headers = [i for i in df]
        headers_1d = [i for i in df if [type(j) for j in df[i]][-1] is not np.ndarray]
        df_1d = df[headers_1d]

        # for those that are not 1d
        headers_multd = [h for h in headers if h not in headers_1d]
        list_ds = []
        print(headers_multd)
        headers_multd = [f for f in headers_multd if 'puck' not in f]

        for mult in headers_multd:
            nvar = len(df[mult].values[0][()])
            print(mult,nvar)
            ndim = len(df[mult].values[0][()][0])
            print(ndim)
            mystr = str(df[mult].values[0][()][()].dtype)
            varheaders = [var[3:-1] for var in mystr.split(',')[::2]]
            multidimdata = np.zeros([nvar,ntime,ndim])
            for t in range(ntime):
                for v in range(nvar):
                    multidimdata[v,t,:] = df[mult].values[t][()][v][:]

            my_dim = ''
            for f in range(len(test_dims)):
                if test_dims[f] in varheaders:
                    my_dim = test_dims[f]

            if my_dim !='': # found a dimension for multidimensional data
                ind_dim = varheaders.index(my_dim)
                varheaders.remove(my_dim)
                my_dim_vals = multidimdata[ind_dim,0,:]
                multidimdata = np.delete(multidimdata,ind_dim,axis=0)
                nvar = nvar - 1

                ds = xr.Dataset(
                    data_vars ={mult+'_'+varheaders[0] : xr.DataArray(multidimdata[0,:,:],dims=['time',my_dim+'_'+mult])},
                    coords={'time' : my_dict['time'], my_dim+'_'+mult : my_dim_vals}
                           )
                if nvar > 1:
                    for v in range(1,nvar):
                        ds[mult+'_'+varheaders[v]] = xr.DataArray(multidimdata[v,:,:],dims=['time',my_dim+'_'+mult])
                list_ds.append(ds)

        # combine everything
        list_ds.append(df_1d.to_xarray())
        allds = xr.merge(list_ds)

        # add some metadata
        allds.attrs= {'description' : identifier, # add something more here? 
                'processed by' : 'Jim Thomson, APL' }


        for var in attr_dict:
            if var in allds:
                allds[var].attrs['units'] = attr_dict[var]['units']
                allds[var].attrs['description'] = attr_dict[var]['description']




        allds.to_netcdf(input_mat[:-4]+'_converted.nc') # creates netcdf file in directory where data is
        print('saved')
    return allds

In [9]:
allfiles = sorted([f for f in os.listdir('.') if '30min.mat' in f])
print(allfiles)

# write files without time slicing
for f,file in enumerate(allfiles):
    print(file)
    ds = swift_mat_to_netcdf(file,'SWIFT')

['SWIFT09_02Nov2015_30min.mat', 'SWIFT09_06-08Oct2015_30min.mat', 'SWIFT09_11-14Oct2015_30min.mat', 'SWIFT09_23-24Oct2015_30min.mat', 'SWIFT09_31Oct-01Nov2015_30min.mat', 'SWIFT11_04Oct2015_30min.mat', 'SWIFT11_10Oct2015_30min.mat', 'SWIFT11_11-14Oct2015_30min.mat', 'SWIFT11_16-18Oct2015_30min.mat', 'SWIFT11_23-24Oct2015_30min.mat', 'SWIFT11_31Oct-01Nov2015_30min.mat', 'SWIFT12_02Oct2015_30min.mat', 'SWIFT12_04Oct2015_30min.mat', 'SWIFT12_06-08Oct2015_30min.mat', 'SWIFT12_11-14Oct2015_30min.mat', 'SWIFT12_16-17Oct2015_30min.mat', 'SWIFT12_18Oct2015_30min.mat', 'SWIFT12_23-24Oct2015_30min.mat', 'SWIFT13_11-14Oct2015_30min.mat', 'SWIFT13_23-24Oct2015_30min.mat', 'SWIFT13_31Oct-01Nov2015_30min.mat', 'SWIFT14_04Oct2015_30min.mat', 'SWIFT14_10Oct2015_30min.mat', 'SWIFT14_11-13Oct2015_30min.mat', 'SWIFT14_16-18Oct2015_30min.mat', 'SWIFT14_23-24Oct2015_30min.mat', 'SWIFT15_11-13Oct2015_30min.mat', 'SWIFT15_16-18Oct2015_30min.mat', 'SWIFT15_23-25Oct2015_30min.mat', 'SWIFT15_31Oct-01Nov2015_30m