In [1]:
import numpy as np
import pandas as pd
import pymongo


In [2]:
import json

def make_doc(filename):
    with open(filename) as f:
        data = json.load(f)
        doc = {key: data['attrs'][key] for key in data['attrs'].keys()}
        doc['name'] = data['name']
        [doc.update( {key: data['coords'][key]['data']}) for key in data['coords'].keys()]
        data = data['data']
        measurements = []
        pres = doc['pressure']
        del doc['pressure']
        for idx in range(len(pres)):
            meas = {'data': data[idx], 'pres': pres[idx]}
            measurements.append(meas)
        doc['measurements'] = measurements
        doc['geoLocation'] = {'type': 'Point', 'coordinates': [doc['longitude'], doc['latitude']]}
    return doc

doc = make_doc('./7.005_165.075_20020706T1206_ctd_salinity.json')

In [4]:
with open('example_profile.json', 'w') as outfile:
    json.dump(doc, outfile)

In [72]:
doc

{'whp_name': 'CTDSAL',
 'whp_unit': 'PSS-78',
 'standard_name': 'sea_water_practical_salinity',
 'units': '1',
 'reference_scale': 'PSS-78',
 'ancillary_variables': 'ctd_salinity_qc',
 'name': 'ctd_salinity',
 'latitude': 7.005,
 'longitude': 165.075,
 'time': '2002-07-06T12:06:00',
 'expocode': '33KMGP402_1',
 'station': '2',
 'cast': 1,
 'sample': ['13', '12', '11', '10', '9', '8', '7', '6', '5', '4', '3', '1'],
 'measurements': [{'data': 33.90599822998047, 'pres': 3.2},
  {'data': 33.90700149536133, 'pres': 9.9},
  {'data': 33.92660140991211, 'pres': 24.5},
  {'data': 33.93960189819336, 'pres': 39.6},
  {'data': 34.27519989013672, 'pres': 60.0},
  {'data': 34.880699157714844, 'pres': 100.3},
  {'data': 34.55080032348633, 'pres': 150.5},
  {'data': 34.605098724365234, 'pres': 200.0},
  {'data': 34.625099182128906, 'pres': 400.8},
  {'data': 34.56570053100586, 'pres': 600.2},
  {'data': 34.546600341796875, 'pres': 799.0},
  {'data': 34.561100006103516, 'pres': 1001.0}],
 'geoLocation'

In [73]:
def create_collection(dbName, collectionName):
    dbUrl = 'mongodb://localhost:27017/'
    client = pymongo.MongoClient(dbUrl)
    db = client[dbName]
    coll = db[collectionName]
    coll = init_profiles_collection(coll)
    return coll

def init_profiles_collection(coll):
    coll.create_index([('time', pymongo.DESCENDING)])
    coll.create_index([('latitude', pymongo.DESCENDING)])
    coll.create_index([('longitude', pymongo.DESCENDING)])
    coll.create_index([('geoLocation', pymongo.DESCENDING)])
    coll.create_index([('measurements.psal', pymongo.DESCENDING)])
    coll.create_index([('measurements.lon', pymongo.DESCENDING)])
    return coll

dbName = 'goship'
collName = 'profiles'
coll = create_collection(dbName, collName)
coll.drop()
coll.insert_one(doc)

<pymongo.results.InsertOneResult at 0x7f4e26831248>

In [75]:
coll.find_one( {'expocode': '33KMGP402_1'} )

{'_id': ObjectId('5f7bbba5afc6ec18cbc70aff'),
 'whp_name': 'CTDSAL',
 'whp_unit': 'PSS-78',
 'standard_name': 'sea_water_practical_salinity',
 'units': '1',
 'reference_scale': 'PSS-78',
 'ancillary_variables': 'ctd_salinity_qc',
 'name': 'ctd_salinity',
 'latitude': 7.005,
 'longitude': 165.075,
 'time': '2002-07-06T12:06:00',
 'expocode': '33KMGP402_1',
 'station': '2',
 'cast': 1,
 'sample': ['13', '12', '11', '10', '9', '8', '7', '6', '5', '4', '3', '1'],
 'measurements': [{'data': 33.90599822998047, 'pres': 3.2},
  {'data': 33.90700149536133, 'pres': 9.9},
  {'data': 33.92660140991211, 'pres': 24.5},
  {'data': 33.93960189819336, 'pres': 39.6},
  {'data': 34.27519989013672, 'pres': 60.0},
  {'data': 34.880699157714844, 'pres': 100.3},
  {'data': 34.55080032348633, 'pres': 150.5},
  {'data': 34.605098724365234, 'pres': 200.0},
  {'data': 34.625099182128906, 'pres': 400.8},
  {'data': 34.56570053100586, 'pres': 600.2},
  {'data': 34.546600341796875, 'pres': 799.0},
  {'data': 34.561