In [3]:
import numpy as np
import pandas as pd
import xarray as xr
import pymongo
import pdb
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from scipy.io import loadmat
import os
import glob

ModuleNotFoundError: No module named 'boobs'

In [6]:
a_dict = {'boobs': True}

In [10]:
a_dict.values()

dict_values([True])

In [2]:
def transform_lon(lon):
    '''
    Transforms longitude from absolute to -180 to 180 deg
    '''
    if lon >= 180:
        lon -= 360
    return lon

def make_doc(df, date, presLevel, dataVariable, param, measurement, gridName, units):
    '''
    Takes df and converts it into a document for mongodb
    '''
    doc = {}
    df = df.rename(index=str, columns={dataVariable: 'value'})
    dataDict = df.to_dict(orient='records')
    #print(date)
    doc['gridName'] = gridName
    doc['measurement'] = measurement #temperature or psal
    doc['units'] = units # degrees celsius or psu
    doc['data'] = dataDict 
    doc['variable'] = dataVariable # ARGO_TEMPERATURE_ANOMALY or ARGO_TEMPERATURE_MEAN or total
    doc['date'] = date
    doc['pres'] = float(presLevel)
    doc['param'] = param # anomaly, mean, or total
    doc['cellsize'] = 1  #  Degree
    doc['NODATA_value'] = np.NaN
    return doc

def create_grid_df(chunk):
    df = chunk.to_dataframe()
    df = df.reset_index()
    df = df.rename(columns={'LATITUDE':'lat', 'LONGITUDE':'lon'})
    df['lon'] = df['lon'].apply(lambda lon: transform_lon(lon))
    return df
    
def insert_pres_time_grid(xrDataArray, coll, param, measurement, gridName, units,
                          dataVariable, insertOne=False, meanDf=None):
    for tdx, chunk in xrDataArray.groupby('TIME'):
        month = int(tdx % 12 + 1)
        year = int(2004 + tdx // 12)
        #if not year >= 2019:
        #    continue
        date = datetime.strptime('{0}-{1}'.format(year, month), '%Y-%m')
        print(date)
        df = create_grid_df(chunk)
        
        if param == 'total':
            df[dataVariable] = df['ARGO_SALINITY_ANOMALY'] + meanDf['ARGO_SALINITY_ANOMALY'.replace('ANOMALY', 'MEAN')]
            df=df.drop(dataVariable, axis=1)
        for pdx, presDf in df.groupby('PRESSURE'):
            #if not pdx in [10, 50, 200]:
            #    continue
            presDf = presDf.drop(['TIME', 'PRESSURE'], axis=1)
            doc = make_doc(presDf, date, pdx, dataVariable, param, measurement, gridName, units)
            if insertOne: # Use for testing
                coll.insert_one(doc)
                return
            else:
                coll.insert_one(doc)


In [3]:
def create_collection(dbName, collectionName):
    dbUrl = 'mongodb://localhost:27017/'
    client = pymongo.MongoClient(dbUrl)
    db = client[dbName]
    coll = db[collectionName]
    coll = init_profiles_collection(coll)
    return coll

def init_profiles_collection(coll):
    try:
        coll.create_index([('date', pymongo.ASCENDING)])
        coll.create_index([('pres', pymongo.ASCENDING)])
        coll.create_index([('gridName', pymongo.ASCENDING)])
        coll.create_index([('date', pymongo.ASCENDING), ('pres', pymongo.ASCENDING), ('gridName', pymongo.ASCENDING)])
        coll.create_index([('data.lat', pymongo.DESCENDING)])
        coll.create_index([('data.lon', pymongo.ASCENDING)])
    except:
        logging.warning('not able to get collections or set indexes')
    return coll

In [12]:
rgFilename = '/storage/RG_ArgoClim_Temp.nc'
rg = xr.open_dataset(rgFilename, decode_times=False)
meanDf = create_grid_df(rg['ARGO_TEMPERATURE_MEAN'])
dataVariable='ARGO_TEMPERATURE_ANOMALY'
rgDataArray = rg[dataVariable]
param='anomaly'
measurement='temperature'
gridName='rgTempAnom'
units = 'Degrees Celsius'

In [None]:
totalColl = create_collection(dbName='argo', collectionName='rgTempTotal')
#totalColl.drop()
insert_pres_time_grid(rgDataArray, totalColl, 'total', measurement, 'rgTempTotal', units,
                          'total', insertOne=False, meanDf=meanDf)

In [22]:
totalColl = create_collection(dbName='argo', collectionName='rgTempTotal')

anomColl = create_collection(dbName='argo', collectionName='rgTempAnom')
#anomColl.drop()
insert_pres_time_grid(rgDataArray, anomColl, param, measurement, gridName, units,
                          dataVariable, insertOne=False)

# Salinity

In [4]:
rgFilename = '/storage/RG_ArgoClim_Psal.nc'
rg = xr.open_dataset(rgFilename, decode_times=False)
meanDf = create_grid_df(rg['ARGO_SALINITY_MEAN'])
dataVariable='ARGO_SALINITY_ANOMALY'
rgDataArray = rg[dataVariable]
param='anomaly'
measurement='salinity'
gridName='rgPsalAnom'
units = 'Practical Salinity Units'

In [None]:
anomColl = create_collection(dbName='argo', collectionName=gridName)
#anomColl.drop()
insert_pres_time_grid(rgDataArray, anomColl, param, measurement, gridName, units,
                          dataVariable, insertOne=False)

In [None]:
totalColl = create_collection(dbName='argo', collectionName='rgPsalTotal')
#totalColl.drop()
insert_pres_time_grid(rgDataArray, totalColl, 'total', measurement, 'rgPsalTotal', units,
                          'total', insertOne=False, meanDf=meanDf)

# Scratch

In [21]:
#fixes value renaming
cursor = totalColl.find()

for doc in cursor:
    #pdb.set_trace()
    #print(doc['pres'])
    df = pd.DataFrame(doc['data'])
    df = df.rename(columns={'ARGO_SALINITY_ANOMALY': 'value'})
    doc['data'] = df.to_dict(orient='records')
    totalColl.replace_one({'_id': doc['_id']}, doc)

In [17]:
doc['_id']

ObjectId('5f3851aaafc6ec0f5923e3d4')

In [13]:
df.rename(columns={'ARGO_SALINITY_ANOMALY':'value'})

Unnamed: 0,ARGO_SALINITY_ANOMALY,lat,lon
0,-0.0,-64.5,20.5
1,-0.001,-64.5,21.5
2,-0.001,-64.5,22.5
3,-0.001,-64.5,23.5
4,-0.0,-64.5,24.5


In [19]:
# make for express testing
testColl = create_collection(dbName='argo-express-test', collectionName='rgTempAnom')
#testColl.drop()
insert_pres_time_grid(rgDataArray, testColl, param, measurement, gridName, units,
                          dataVariable, insertOne=True)

2012-01-01 00:00:00
rgTempAnom


# Add mean field

In [5]:
rgFilename = '/home/tyler/Desktop/RG_ArgoClim_Temp.nc'
rg = xr.open_dataset(rgFilename, decode_times=False)
coll = create_collection(dbName='argo', collectionName='rgTempMean')
coll.drop()
dataVariable='ARGO_TEMPERATURE_MEAN'
param='mean'
measurement='temperature'
gridName='rgTemperatureAnomaly'
units = 'Degrees Celsius'

rgDataArray = rg[dataVariable]
insert_pres_time_grid(rgDataArray, coll, param, measurement, gridName, units,
                          dataVariable, insertOne=False)

NameError: name 'insert_pres_time_grid' is not defined

In [7]:
cursor = coll.find()

for doc in cursor:
    print(doc['pres'])

NameError: name 'coll' is not defined

In [8]:
coll

NameError: name 'coll' is not defined