In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import pymongo
import pdb
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from scipy.io import loadmat
import os
import glob

In [2]:
rgFilename = 'RG_ArgoClim_Temp.nc'
rg = xr.open_dataset(rgFilename, decode_times=False)

In [3]:
rg.PRESSURE

<xarray.DataArray 'PRESSURE' (PRESSURE: 58)>
array([   2.5,   10. ,   20. ,   30. ,   40. ,   50. ,   60. ,   70. ,   80. ,
         90. ,  100. ,  110. ,  120. ,  130. ,  140. ,  150. ,  160. ,  170. ,
        182.5,  200. ,  220. ,  240. ,  260. ,  280. ,  300. ,  320. ,  340. ,
        360. ,  380. ,  400. ,  420. ,  440. ,  462.5,  500. ,  550. ,  600. ,
        650. ,  700. ,  750. ,  800. ,  850. ,  900. ,  950. , 1000. , 1050. ,
       1100. , 1150. , 1200. , 1250. , 1300. , 1350. , 1412.5, 1500. , 1600. ,
       1700. , 1800. , 1900. , 1975. ], dtype=float32)
Coordinates:
  * PRESSURE  (PRESSURE) float32 2.5 10.0 20.0 30.0 ... 1800.0 1900.0 1975.0
Attributes:
    units:          dbar
    positive:       down
    point_spacing:  uneven
    axis:           Z
    standard_name:  depth
    bounds:         PRESSURE_bnds

In [4]:
def transform_lon(lon):
    '''
    Transforms longitude from absolute to -180 to 180 deg
    '''
    if lon >= 180:
        lon -= 360
    return lon

def make_doc(df, date, presLevel, dataVal):
    '''
    Takes df and converts it into a document for mongodb
    '''
    doc = {}
    df = df.rename(index=str, columns={dataVal: 'value'})
    df = df.fillna(float(-9999))
    dataDict = df.to_dict(orient='records')
    doc['dataVal'] = dataVal
    doc['data'] = dataDict
    doc['date'] = date
    doc['pres'] = float(presLevel)
    doc['cellsize'] = 1  #  Degree
    doc['NODATA_value'] = -9999
    return doc

def insert_pres_time_grid(tempAnom, coll, dataVal='ARGO_TEMPERATURE_ANOMALY', insertOne=False):
    for tdx, chunk in tempAnom.groupby('TIME'):
        month = int(tdx % 12 + 1)
        year = int(2004 + tdx // 12)
        date = datetime.strptime('{0}-{1}'.format(year, month), '%Y-%m')
        print(date)
        df = chunk.to_dataframe()
        df = df.reset_index()
        df['LONGITUDE'] = df['LONGITUDE'] - 20
        df['LONGITUDE'] = df['LONGITUDE'].apply(lambda lon: transform_lon(lon))

        for pdx, presDf in df.groupby('PRESSURE'):
            presDf = presDf.drop(['TIME', 'PRESSURE'], axis=1)
            doc = make_doc(presDf, date, pdx, dataVal)
            coll.insert_one(doc)
            if insertOne: # Use for testing
                return

In [5]:
def create_collection(dbName='argo2', collectionName='rgTempAnom'):
    dbUrl = 'mongodb://localhost:27017/'
    client = pymongo.MongoClient(dbUrl)
    db = client[dbName]
    coll = db[collectionName]
    coll = init_profiles_collection(coll)
    return coll

def init_profiles_collection(coll):
    try:
        coll.create_index([('date', pymongo.DESCENDING)])
        coll.create_index([('pres', pymongo.DESCENDING)])
        coll.create_index([('data.LATITUDE', pymongo.DESCENDING)])
        coll.create_index([('data.LONGITUDE', pymongo.ASCENDING)])
        
        #may want to store as geojson feature collection one day
        #coll.create_index([('data.geometries', pymongo.GEOSPHERE)])

    except:
        logging.warning('not able to get collections or set indexes')
    return coll

In [7]:
coll = create_collection()
dataVal='ARGO_TEMPERATURE_ANOMALY'
tempAnom = rg[dataVal]
insert_pres_time_grid(tempAnom, coll, dataVal)

2004-01-01 00:00:00


KeyboardInterrupt: 

In [8]:
# make for express testing
testColl = create_collection(dbName='argo-express-test', collectionName='rgTempAnom')
insert_pres_time_grid(tempAnom, testColl, dataVal='ARGO_TEMPERATURE_ANOMALY', insertOne=True)

2004-01-01 00:00:00
