In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import pymongo
import pdb
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from scipy.io import loadmat
import os
import glob
import itertools

In [2]:
def create_collection(dbName, collectionName, init_collection):
    dbUrl = 'mongodb://localhost:27017/'
    client = pymongo.MongoClient(dbUrl)
    db = client[dbName]
    coll = db[collectionName]
    coll = init_collection(coll)
    return coll

def init_anom_collection(coll):
    coll.create_index([('date', pymongo.DESCENDING)])
    coll.create_index([('pres', pymongo.DESCENDING)])
    coll.create_index([('data.LATITUDE', pymongo.DESCENDING)])
    coll.create_index([('data.LONGITUDE', pymongo.ASCENDING)])

    return coll

def init_param_collection(coll):
    coll.create_index([('pres', pymongo.DESCENDING)])
    coll.create_index([('data.LATITUDE', pymongo.DESCENDING)])
    coll.create_index([('data.LONGITUDE', pymongo.ASCENDING)])
    coll.create_index([('param', pymongo.DESCENDING),
                       ('trend', pymongo.DESCENDING),
                       ('model', pymongo.DESCENDING),
                       ('modelParam', pymongo.DESCENDING)])
        
    return coll

In [11]:
def transform_lon(lon):
    '''
    Transforms longitude from absolute to -180 to 180 deg
    '''
    if lon >= 180:
        lon -= 360
    return lon

def make_anom_docs(files, gridName, dataVal='predGrid'):
    docs = []
    for file in files:
        doc = {}
        anomData = loadmat(file)
        fa = file.split('/')[-1].split('_')
        year = fa[-1].replace('.mat', '')
        month = fa[-2]
        year_month = year + month

        date = datetime.strptime(year_month, '%Y%m')
        presLevel = float(fa[-6].replace('at', '').replace('dbar', ''))
        latGrid = anomData['latGrid'].flatten()
        lonGrid = anomData['longGrid'].flatten()
        values = anomData[dataVal].flatten()
        df = pd.DataFrame()
        df['LATITUDE'] = latGrid
        df['LONGITUDE'] = lonGrid
        df['LONGITUDE'] = df['LONGITUDE'].apply(lambda lon: transform_lon(lon))
        df['value'] = values
        df = df.fillna(float(-9999))
        dataDict = df.to_dict(orient='records')
        
        doc['measurement'] = 'Temperature'
        doc['param'] = 'anomaly'
        doc['gridName'] = gridName
        doc['dataVal'] = dataVal
        doc['data'] = dataDict
        doc['date'] = date
        doc['pres'] = float(presLevel)
        doc['cellsize'] = 1  #  Degree
        doc['NODATA_value'] = -9999
        docs.append(doc)
    return docs

def make_param_docs(grids):
    docs = []
    for pres, measurement, model, trend in grids:
        paramFiles = '{0}Trend{1}*.mat'.format(model, measurement)

        path = os.path.join(kuuselaBase, pres,\
                                'outliers_removed', trend, 'Results',\
                                paramFiles)
        filename = glob.glob(path) # should be one
        print(paramFiles)
        if not filename:
            print('file not found: {}'.format(path))
            continue
        paramData = loadmat(filename[0])
        if model == 'localMLESpace':
            modelParams = spaceParams
        else:
            modelParams = spaceTimeParams

        for modelParam in modelParams:
            print(pres, measurement, model, trend, modelParam)
            doc = make_param_doc(paramData, modelParam, model, trend, measurement, pres)
            docs.append(doc)
    return docs

def make_param_doc(paramData, modelParam, model, trend, measurement, pres):
    values = paramData[modelParam].flatten()
    latGrid = paramData['latGrid'].flatten()
    lonGrid = paramData['longGrid'].flatten()
    gridName = 'ks' + model + modelParam + trend

    df = pd.DataFrame()
    df['LATITUDE'] = latGrid
    df['LONGITUDE'] = lonGrid
    df['LONGITUDE'] = df['LONGITUDE'].apply(lambda lon: transform_lon(lon))
    df['value'] = values
    df = df.fillna(-9999)
    dataDict = df.to_dict(orient='records')
    doc = {}
    doc['gridName'] = gridName
    doc['model'] = model
    doc['param'] = modelParam
    doc['trend'] = trend
    doc['measurement'] = measurement
    doc['data'] = dataDict
    doc['pres'] = float(pres)
    doc['cellsize'] = 1  #  Degree
    doc['NODATA_value'] = -9999
    return doc

In [12]:
kuuselaBase = os.path.join('/home','tyler','Kuusela-Stein', 'Data', 'Data')
pressures = [str(10.0)]
measurements = ['Temp']
trends = ['Trend', 'NoTrend', 'Trend2']
models = ['localMLESpace', 'localMLESpaceTime']
spaceTimeParams  = ['nResGrid', 'nll', 'sigmaOpt', 'thetaLatOpt', 'thetaLongOpt', 'thetasOpt', 'thetatOpt']
spaceParams = ['aOpt', 'latGrid', 'longGrid', 'nResGrid', 'nll', 'sigmaOpt', 'theta1Opt', 'theta2Opt']

allIters = [pressures, measurements, models, trends]
grids = list(itertools.product(*allIters))

docs = make_param_docs(grids)


localMLESpaceTrendTemp*.mat
10.0 Temp localMLESpace Trend aOpt
10.0 Temp localMLESpace Trend latGrid
10.0 Temp localMLESpace Trend longGrid
10.0 Temp localMLESpace Trend nResGrid
10.0 Temp localMLESpace Trend nll
10.0 Temp localMLESpace Trend sigmaOpt
10.0 Temp localMLESpace Trend theta1Opt
10.0 Temp localMLESpace Trend theta2Opt
localMLESpaceTrendTemp*.mat
file not found: /home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/NoTrend/Results/localMLESpaceTrendTemp*.mat
localMLESpaceTrendTemp*.mat
file not found: /home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/Trend2/Results/localMLESpaceTrendTemp*.mat
localMLESpaceTimeTrendTemp*.mat
10.0 Temp localMLESpaceTime Trend nResGrid
10.0 Temp localMLESpaceTime Trend nll
10.0 Temp localMLESpaceTime Trend sigmaOpt
10.0 Temp localMLESpaceTime Trend thetaLatOpt
10.0 Temp localMLESpaceTime Trend thetaLongOpt
10.0 Temp localMLESpaceTime Trend thetasOpt
10.0 Temp localMLESpaceTime Trend thetatOpt
localMLESpaceTimeTrendTemp*.mat
file not

In [13]:
collName = 'ksParams'
coll = create_collection('argo', collName, init_param_collection)
coll.drop()
coll.insert_many(docs)

<pymongo.results.InsertManyResult at 0x7f38bd530d88>

In [32]:
allIters = [pressures, measurements, models, trends]
grids = list(itertools.product(*allIters))

for pres, measurement, model, trend in grids:
    model_label = model.strip('localMLE')
    pres_label = '_at{}dbar'.format(pres)
    anomFiles = 'anomaly{0}{1}{2}{3}*.mat'.format(model_label, trend, measurement, pres_label)
    path = os.path.join(kuuselaBase, pres,\
                        'outliers_removed', trend, 'Results',\
                        anomFiles)
    print(path)
    anomMats = glob.glob(path)
    if not anomMats:
        print('file not found: {}'.format(path))
        continue
    print(len(anomMats))
    collName = 'ks' + model_label + measurement + trend
    coll = create_collection('argo', collName, init_anom_collection)
    coll.drop()
    print(collName)
    for fileChunk in np.array_split(anomMats, 3):
        docs = make_anom_docs(fileChunk, collName)
        print(len(docs))
        coll.insert_many(docs)
# make for express testing
testColl = create_collection('argo-express-test', collName, init_anom_collection)
testColl.drop()
testColl.insert_many(docs)

/home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/Trend/Results/anomalySpaceTrendTemp_at10.0dbar*.mat
144
ksSpaceTempTrend
48
48
48
/home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/NoTrend/Results/anomalySpaceNoTrendTemp_at10.0dbar*.mat
12
ksSpaceTempNoTrend
4
4
4
/home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/Trend2/Results/anomalySpaceTrend2Temp_at10.0dbar*.mat
12
ksSpaceTempTrend2
4
4
4
/home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/Trend/Results/anomalySpaceTimeTrendTemp_at10.0dbar*.mat
12
ksSpaceTimeTempTrend
4
4
4
/home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/NoTrend/Results/anomalySpaceTimeNoTrendTemp_at10.0dbar*.mat
file not found: /home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/NoTrend/Results/anomalySpaceTimeNoTrendTemp_at10.0dbar*.mat
/home/tyler/Kuusela-Stein/Data/Data/10.0/outliers_removed/Trend2/Results/anomalySpaceTimeTrend2Temp_at10.0dbar*.mat
12
ksSpaceTimeTempTrend2
4
4
4


<pymongo.results.InsertManyResult at 0x7f38b8da2848>

In [30]:
testColl = create_collection('argo-express-test', collName, init_anom_collection)
testColl.drop()
testColl.insert_many(docs)

<pymongo.results.InsertManyResult at 0x7f38a9c09fc8>

In [39]:


for trend in trends:
    path = os.path.join(kuuselaBase, pres,\
                        'outliers_removed', trend, 'Results',\
                        anomFiles)
    anomMats = glob.glob(path)
    print(len(anomMats))
    collName = 'ks' + 'Space' + param + trend
    coll = create_collection('argo', collName, init_anom_collection)
    coll.drop()
    print(collName)
    for fileChunk in np.array_split(anomMats, 10):
        docs = make_docs(fileChunk, collName)
        print(len(docs))
        coll.insert_many(docs)
    # make for express testing
    testColl = create_collection('argo-express-test', collName)
    testColl.drop()
    testColl.insert_many(docs)

12
ksSpaceTempTrend
2
2
1
1
1
1
1
1
1
1
12
ksSpaceTempNoTrend
2
2
1
1
1
1
1
1
1
1
12
ksSpaceTempTrend2
2
2
1
1
1
1
1
1
1
1
