In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import os, sys
import glob

Join multiple files into one using NCO utilities.
[here](http://forum.marine.copernicus.eu/discussion/205/how-to-mergeconcatenate-cmems-netcdf-files-tips/p1)

The following routine concats files without the need for NCO, but can be slow if there are many files.

In [3]:
files = glob.glob('/storage/ShenResults/*.csv')

In [4]:
files

['/storage/ShenResults/mTempData_3.75.csv',
 '/storage/ShenResults/mTempData_9.75.csv',
 '/storage/ShenResults/mTempData_16.0.csv',
 '/storage/ShenResults/mTempData_25.0.csv',
 '/storage/ShenResults/mTempData_40.0.csv',
 '/storage/ShenResults/mTempData_62.5.csv',
 '/storage/ShenResults/mTempData_87.5.csv',
 '/storage/ShenResults/mTempData_112.5.csv',
 '/storage/ShenResults/mTempData_137.5.csv',
 '/storage/ShenResults/mTempData_175.0.csv',
 '/storage/ShenResults/mTempData_225.0.csv',
 '/storage/ShenResults/mTempData_275.0.csv',
 '/storage/ShenResults/mTempData_350.0.csv',
 '/storage/ShenResults/mTempData_450.0.csv',
 '/storage/ShenResults/mTempData_550.0.csv',
 '/storage/ShenResults/mTempData_650.0.csv',
 '/storage/ShenResults/mTempData_750.0.csv',
 '/storage/ShenResults/mTempData_850.0.csv',
 '/storage/ShenResults/mTempData_950.0.csv',
 '/storage/ShenResults/mTempData_1050.0.csv',
 '/storage/ShenResults/mTempData_1150.0.csv',
 '/storage/ShenResults/mTempData_1250.0.csv',
 '/storage/She

In [5]:
datasets = []
for file in files: # each file is 
    df = pd.read_csv(file, index_col=0)
    df = df.set_index(['lat', 'long', 'tdx', 'pres'])
    ds = df.to_xarray()
    datasets.append(ds)
combined = xr.concat(datasets, dim='pres') # can concat along pressure dimension

In [6]:
comp = dict(zlib=True, complevel=9) # compresss level set to max
encoding = {var: comp for var in combined.data_vars}
combined.to_netcdf('mTempData.nc', encoding=encoding)

# Check if file was created properly

In [8]:
ds = xr.open_dataset('mTempData.nc')

In [9]:
ds

<xarray.Dataset>
Dimensions:    (lat: 552, long: 1233, pres: 26, tdx: 2)
Coordinates:
  * lat        (lat) float64 -89.0 -74.25 -73.75 -73.5 ... 72.0 74.5 75.25 78.25
  * long       (long) float64 -179.5 -179.2 -179.0 -178.8 ... 179.2 179.5 179.8
  * tdx        (tdx) int64 1093 1094
  * pres       (pres) float64 3.75 9.75 16.0 ... 1.45e+03 1.625e+03 1.875e+03
Data variables:
    mean       (lat, long, tdx, pres) float64 ...
    std        (lat, long, tdx, pres) float64 ...
    dof        (lat, long, tdx, pres) float64 ...
    startDate  (lat, long, tdx, pres) object ...
    endDate    (lat, long, tdx, pres) object ...

In [42]:
df = df.dropna(axis=0)

In [43]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,std,dof,startDate,endDate
lat,long,pres,tdx,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-89.0,-0.0,3.75,1094,-1.471143,0.004502,14.0,2018-12-27,2018-12-31
-89.0,-0.0,2.25,1094,-1.4705,0.003775,8.0,2018-12-27,2018-12-31
-89.0,-0.0,10.0,1094,0.823216,1.202438,37.0,2018-12-27,2018-12-31
-89.0,-0.0,4.0,1094,-1.277733,0.158675,15.0,2018-12-27,2018-12-31
-89.0,-0.0,5.0,1094,-0.959263,0.280545,19.0,2018-12-27,2018-12-31
