In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import pymongo
import pdb
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from scipy.io import loadmat
import os
import glob

In [45]:
def transform_lon(lon):
    '''
    Transforms longitude from absolute to -180 to 180 deg
    '''
    if lon >= 180:
        lon -= 360
    return lon

def make_docs(df, date, presLevel, dataVariable, param, measurement, gridName, units, nChunks):
    '''
    Takes df and converts it into a document for mongodb
    '''
    docs = []
    df = df.rename(index=str, columns={dataVariable: 'value'})
    dataDict = df.to_dict(orient='records')
    for idx in range(0, nChunks):
        doc = {}
        doc['gridName'] = gridName
        doc['measurement'] = measurement #temperature or psal
        doc['units'] = units # degrees celsius or psu
        doc['data'] = list(divide_chunks(dataDict, nChunks))
        doc['variable'] = dataVariable # ARGO_TEMPERATURE_ANOMALY or ARGO_TEMPERATURE_MEAN or total
        doc['date'] = date
        doc['pres'] = presLevel
        doc['param'] = param # anomaly, mean, or total
        doc['cellsize'] = 1  #  Degree
        doc['NODATA_value'] = np.NaN
        docs.append(doc)
    return docs

def create_grid_df(chunk, latName, longName):
    df = chunk.to_dataframe()
    df = df.reset_index()
    df = df.rename(columns={latName:'lat', longName: 'lon', 'Z': 'pres'})
    df['lon'] = df['lon'].apply(lambda lon: transform_lon(lon))
    return df

def divide_chunks(gridData, nChunks): 
    # Yield successive nChunks
    # chunks from list gridData. 
    # looping till length l 
    for i in range(0, len(gridData), nChunks):  
        yield gridData[i:i + nChunks]
def format_date(date):
    return datetime.strptime(str(date.year)+'-'+str(date.month), '%Y-%m')
    
def insert_pres_time_grid(da, coll, param, measurement, gridName, latName, longName, units,
                          dataVariable, maskName, nChunks=5):
    baseDate = datetime.strptime('2008-01-01', '%Y-%m-%d')
    for sec, chunk in da.groupby('time'):
        date = baseDate + timedelta(seconds=int(sec))
        date = format_date(date)
        print(date)
        df = create_grid_df(chunk, latName, longName)
        pdb.set_trace()
        df[param] = df[param] = df.loc[(df[maskName] != 1), param] = np.NaN
        for pdx, presDf in df.groupby('pres'):
            if not pdx in [-2.1, -12.15, -95, -1800]:
                continue
            presDf['pres'] = presDf['pres'].apply(lambda x: np.round(-1 * x, 5))
            presDf = presDf.drop(['time', 'pres', 'iter', 'drF', 'Depth', 'rA', 'hFacC'], axis=1)
            docs = make_docs(presDf, date, pdx, dataVariable, param, measurement, gridName, units, nChunks)
            coll.insert_many(docs)


In [46]:
def create_collection(dbName, collectionName):
    dbUrl = 'mongodb://localhost:27017/'
    client = pymongo.MongoClient(dbUrl)
    db = client[dbName]
    coll = db[collectionName]
    coll = init_profiles_collection(coll)
    return coll

def init_profiles_collection(coll):
    try:
        coll.create_index([('date', pymongo.DESCENDING)])
        coll.create_index([('pres', pymongo.DESCENDING)])
        coll.create_index([('data.lat', pymongo.DESCENDING)])
        coll.create_index([('data.lon', pymongo.ASCENDING)])
        
        #may want to store as geojson feature collection one day
        #coll.create_index([('data.geometries', pymongo.GEOSPHERE)])

    except:
        logging.warning('not able to get collections or set indexes')
    return coll

so.info()

[(np.round(x, 5), x) for x in np.unique(so.Z.data).astype(np.float64)]

np.unique(so.Depth.data)

In [47]:
np.unique(so.hFacC.data).()

SyntaxError: invalid syntax (<ipython-input-47-901dfe36f455>, line 1)

In [48]:
soFilename = '/storage/bsose/bsose_i105_2008to2012_monthly_O2.nc'
so = xr.open_dataset(soFilename, decode_times=False)
so.info()

xarray.Dataset {
dimensions:
	XC = 1080 ;
	YC = 294 ;
	Z = 52 ;
	time = 60 ;

variables:
	int64 iter(time) ;
		iter:long_name = model timestep number ;
		iter:standard_name = timestep ;
	int64 time(time) ;
		time:long_name = Time ;
		time:standard_name = time ;
		time:axis = T ;
		time:units = seconds since 2008-01-01 ;
		time:calendar = proleptic_gregorian ;
	float32 XC(XC) ;
		XC:coordinate = YC XC ;
		XC:units = degrees_east ;
		XC:standard_name = longitude ;
		XC:long_name = longitude ;
		XC:axis = X ;
	float32 YC(YC) ;
		YC:coordinate = YC XC ;
		YC:units = degrees_north ;
		YC:standard_name = latitude ;
		YC:long_name = latitude ;
		YC:axis = Y ;
	float32 Z(Z) ;
		Z:units = m ;
		Z:positive = down ;
		Z:standard_name = depth ;
		Z:long_name = vertical coordinate of cell center ;
		Z:axis = Z ;
	float32 Depth(YC, XC) ;
		Depth:coordinate = XC YC ;
		Depth:units = m ;
		Depth:standard_name = ocean_depth ;
		Depth:long_name = ocean depth ;
	float32 rA(YC, XC) ;
		rA:coordinate = YC 

In [49]:
dataVariable='TRAC03'
soDataArray = so[dataVariable]
param='TRAC03'
maskName='hFacC'
measurement='doxy'
latName = 'YC'
longName = 'XC'
gridName='soseDoxy'
units = 'mol O/m'
dbName = 'argo'
nChunks = 5

# Sea Ice

In [80]:
def format_date(date):
    '''keeps only the year and month of the date object'''
    return datetime.strptime(str(date.year)+'-'+str(date.month), '%Y-%m')

def insert_time_grid(da, coll, param, measurement, gridName, latName, longName, units,
                          dataVariable, maskName, nChunks=5):
    baseDate = datetime.strptime('2013-01-01', '%Y-%m-%d')
    presLevel = 0
    for sec, chunk in da.groupby('t'):
        date = baseDate + timedelta(seconds=int(sec))
        date = format_date(date)
        print(date)
        df = create_grid_df(chunk, latName, longName)
        df = df.rename(columns={'t': 'time'})
        df = df.drop(['time'], axis=1)
        df.lat = df.lat.round(3)
        df.lon = df.lon.round(3)
        docs = make_docs(df, date, presLevel, dataVariable, param, measurement, gridName, units, nChunks)
        coll.insert_many(docs)

In [81]:
soFilename = '/storage/bsose/bsose_i133_2013to2018_1deg_monthly_SeaIceArea.nc'
so = xr.open_dataset(soFilename, decode_times=False)
so.info()

xarray.Dataset {
dimensions:
	t = 72 ;
	x = 360 ;
	y = 98 ;

variables:
	float32 t(t) ;
		t:units = seconds since 2013-01-01 ;
	float32 x(x) ;
		x:units = degrees East ;
	float32 y(y) ;
		y:units = degrees North ;
	float64 SIarea(t, y, x) ;
		SIarea:units = m^2/m^2 ;
		SIarea:long_name = SEAICE fractional ice-covered area [0 to 1] ;
		SIarea:standard_name = SIarea ;

// global attributes:
}

In [89]:
so.y.to

<xarray.DataArray 'y' (y: 98)>
array([-77.89544 , -77.683945, -77.46881 , -77.24999 , -77.0274  , -76.800995,
       -76.57072 , -76.33649 , -76.09825 , -75.85595 , -75.60951 , -75.358864,
       -75.10396 , -74.84472 , -74.58106 , -74.31294 , -74.04026 , -73.76299 ,
       -73.481026, -73.1943  , -72.90275 , -72.6063  , -72.30486 , -71.99836 ,
       -71.686745, -71.36991 , -71.0478  , -70.72034 , -70.38742 , -70.04899 ,
       -69.70496 , -69.355255, -68.99979 , -68.63849 , -68.27126 , -67.89803 ,
       -67.518715, -67.13325 , -66.741516, -66.34345 , -65.93899 , -65.528015,
       -65.11046 , -64.68624 , -64.25529 , -63.8175  , -63.3728  , -62.92111 ,
       -62.46234 , -61.996414, -61.52326 , -61.04279 , -60.554928, -60.05959 ,
       -59.556713, -59.0462  , -58.52799 , -58.00201 , -57.46819 , -56.926456,
       -56.376743, -55.818985, -55.25311 , -54.67906 , -54.096783, -53.506214,
       -52.9073  , -52.29999 , -51.68424 , -51.059998, -50.427227, -49.785877,
       -49.135937, -4

In [90]:
np.diff([-77.89544 , -77.683945, -77.46881 , -77.24999 , -77.0274  , -76.800995,
       -76.57072 , -76.33649 , -76.09825 , -75.85595 , -75.60951 , -75.358864,
       -75.10396 , -74.84472 , -74.58106 , -74.31294 , -74.04026 , -73.76299 ,
       -73.481026, -73.1943  , -72.90275 , -72.6063  , -72.30486 , -71.99836 ,
       -71.686745, -71.36991 , -71.0478  , -70.72034 , -70.38742 , -70.04899 ,
       -69.70496 , -69.355255, -68.99979 , -68.63849 , -68.27126 , -67.89803 ,
       -67.518715, -67.13325 , -66.741516, -66.34345 , -65.93899 , -65.528015,
       -65.11046 , -64.68624 , -64.25529 , -63.8175  , -63.3728  , -62.92111 ,
       -62.46234 , -61.996414, -61.52326 , -61.04279 , -60.554928, -60.05959 ,
       -59.556713, -59.0462  , -58.52799 , -58.00201 , -57.46819 , -56.926456,
       -56.376743, -55.818985, -55.25311 , -54.67906 , -54.096783, -53.506214,
       -52.9073  , -52.29999 , -51.68424 , -51.059998, -50.427227, -49.785877,
       -49.135937, -48.47737 , -47.81012 , -47.134186, -46.449554, -45.75621 ,
       -45.054134, -44.343334, -43.62381 , -42.895573, -42.158634, -41.413017,
       -40.658756, -39.89586 , -39.124363, -38.344326, -37.555805, -36.75885 ,
       -35.95352 , -35.139896, -34.318054, -33.488094, -32.650097, -31.804174,
       -30.95044 , -30.08657 ])

array([0.211495, 0.215135, 0.21882 , 0.22259 , 0.226405, 0.230275,
       0.23423 , 0.23824 , 0.2423  , 0.24644 , 0.250646, 0.254904,
       0.25924 , 0.26366 , 0.26812 , 0.27268 , 0.27727 , 0.281964,
       0.286726, 0.29155 , 0.29645 , 0.30144 , 0.3065  , 0.311615,
       0.316835, 0.32211 , 0.32746 , 0.33292 , 0.33843 , 0.34403 ,
       0.349705, 0.355465, 0.3613  , 0.36723 , 0.37323 , 0.379315,
       0.385465, 0.391734, 0.398066, 0.40446 , 0.410975, 0.417555,
       0.42422 , 0.43095 , 0.43779 , 0.4447  , 0.45169 , 0.45877 ,
       0.465926, 0.473154, 0.48047 , 0.487862, 0.495338, 0.502877,
       0.510513, 0.51821 , 0.52598 , 0.53382 , 0.541734, 0.549713,
       0.557758, 0.565875, 0.57405 , 0.582277, 0.590569, 0.598914,
       0.60731 , 0.61575 , 0.624242, 0.632771, 0.64135 , 0.64994 ,
       0.658567, 0.66725 , 0.675934, 0.684632, 0.693344, 0.702076,
       0.7108  , 0.719524, 0.728237, 0.736939, 0.745617, 0.754261,
       0.762896, 0.771497, 0.780037, 0.788521, 0.796955, 0.805

In [83]:
soFilename = '/storage/bsose/bsose_i133_2013to2018_1deg_monthly_SeaIceArea.nc'
so = xr.open_dataset(soFilename, decode_times=False)
dataVariable='SIarea'
soDataArray = so[dataVariable]
param='SIarea'
maskName=None
measurement='SIarea'
gridName='soseSIarea'
latName = 'y'
longName = 'x'
units = 'm^2/m^2'
dbName = 'argo'
nChunks = 1

In [84]:
coll = create_collection(dbName, gridName)
coll.drop()
insert_time_grid(soDataArray, coll, param, 
                      measurement, gridName, latName, longName, units,
                      param, maskName, nChunks)

2013-01-01 00:00:00
2013-02-01 00:00:00
2013-03-01 00:00:00
2013-04-01 00:00:00
2013-05-01 00:00:00
2013-06-01 00:00:00
2013-07-01 00:00:00
2013-08-01 00:00:00
2013-09-01 00:00:00
2013-10-01 00:00:00
2013-11-01 00:00:00
2013-12-01 00:00:00
2014-01-01 00:00:00
2014-02-01 00:00:00
2014-03-01 00:00:00
2014-04-01 00:00:00
2014-05-01 00:00:00
2014-06-01 00:00:00
2014-07-01 00:00:00
2014-08-01 00:00:00
2014-09-01 00:00:00
2014-10-01 00:00:00
2014-11-01 00:00:00
2014-12-01 00:00:00
2015-01-01 00:00:00
2015-02-01 00:00:00
2015-03-01 00:00:00
2015-04-01 00:00:00
2015-05-01 00:00:00
2015-06-01 00:00:00
2015-07-01 00:00:00
2015-08-01 00:00:00
2015-09-01 00:00:00
2015-10-01 00:00:00
2015-11-01 00:00:00
2015-12-01 00:00:00
2016-01-01 00:00:00
2016-02-01 00:00:00
2016-03-01 00:00:00
2016-04-01 00:00:00
2016-05-01 00:00:00
2016-06-01 00:00:00
2016-07-01 00:00:00
2016-08-01 00:00:00
2016-09-01 00:00:00
2016-10-01 00:00:00
2016-11-01 00:00:00
2016-12-01 00:00:00
2017-01-01 00:00:00
2017-02-01 00:00:00


In [32]:
datetime.strptime('2013-1', '%Y-%m')

datetime.datetime(2013, 1, 1, 0, 0)