In [3]:
import DataSetQuery
import datetime

#Sets the URL of the Malard ServiceGateway.
query = DataSetQuery.DataSetQuery('http://localhost:9000')

#Sets the output directory for all users of the server.
query.setEnvironment('test','/data/puma1/scratch/malard/export/')
#Retrieves the environment that has just been set.
print(query.getEnvironment())

#determines the databases that store multiple datasets 
print(query.getParentDataSets())
#retrieve the data sets for a given parent
print(query.getDataSets('mtngla'))
#The overall data cube for the two datasets
print(query.getDataSetBoundingBox( 'mtngla','tandemx' ))
print(query.getDataSetBoundingBox( 'mtngla','srtm' ))

#Setup the bounding box
minX=0
maxX=100000
minY=0
maxY=100000
minT=datetime.datetime(2010,7,1,0,0)
maxT=datetime.datetime(2010,12,31,0,0)

#Queries the catalogue and returns the gridcells grouped by x and y with the min and max times.
gcs = query.getGridCells( 'mtngla','tandemx',minX,maxX,minY,maxY,minT,maxT )
#print(gcs)

#Queries the catalogue to determine the shards that comprise the bounding box of interest.
shards = query.getShards('mtngla','tandemx',minX,maxX,minY,maxY,minT,maxT)
#print(shards)



{"name":"test","outputCdfPath":"/data/puma1/scratch/malard/export/"}
{"dataSets":[{"name":"mtngla"},{"name":"tempstore"}]}
{"dataSets":[{"name":"srtm"},{"name":"tandemx"}]}
{"gridCellMinX":-1900000,"gridCellMaxX":1700000,"gridCellMinY":-1300000,"gridCellMaxY":900000,"minTime":1279266673000,"maxTime":1546296307000,"totalPoints":5307063627,"numberOfShards":53566}
{"gridCellMinX":-1900000,"gridCellMaxX":1700000,"gridCellMinY":-1300000,"gridCellMaxY":900000,"minTime":1279266673000,"maxTime":1546296307000,"totalPoints":5307063627,"numberOfShards":53828}


In [4]:
#loads the json output of the getGridCells call and creates a flattened DataFrame.
from pandas.io.json import json_normalize
import json

data = json.loads(gcs)
df = json_normalize(data['boxes'])

print(df)

totalPoints = df['totalPoints'].sum()
maxNumPoints = df['totalPoints'].max()

print('TP=[%d] MaxPoints in Cell=[%d]'%(totalPoints,maxNumPoints))   



   gridCellMaxX  gridCellMaxY  gridCellMinX  gridCellMinY        maxTime  \
0        200000        200000        100000        100000  1292715924000   
1             0        200000       -100000        100000  1293061242000   
2        100000             0             0       -100000  1292888583000   
3        200000             0        100000       -100000  1292543264000   
4        100000        100000             0             0  1293061242000   
5             0        100000       -100000             0  1293233898000   
6        200000        100000        100000             0  1292543264000   
7        100000        200000             0        100000  1292888583000   
8             0             0       -100000       -100000  1293061242000   

         minTime  numberOfShards  totalPoints  
0  1279824627000               5       588262  
1  1280515145000               6       950622  
2  1280169839000               5       903633  
3  1279997229000               5       828783  

In [5]:
#Converts the Json output of the getShards call into a DataFrame.
dataShard = json.loads(shards)
dfShards = json_normalize(dataShard['shards'])

print(dfShards['shardName'])


0    /data/snail1/scratch/tandemx/swath/y2010/m9/ce...
1    /data/snail1/scratch/tandemx/swath/y2010/m11/c...
2    /data/snail1/scratch/tandemx/swath/y2010/m10/c...
3    /data/snail1/scratch/tandemx/swath/y2010/m12/c...
4    /data/snail1/scratch/tandemx/swath/y2010/m7/ce...
Name: shardName, dtype: object


In [6]:
#Gets the available column names from the NetCDFs that are in the BoundingBox.
print(query.getDataSetColumns('mtngla','tandemx',minX,maxX,minY,maxY,minT,maxT))

http://localhost:9000/point/datasetcolumns/mtngla/tandemx
{"column":[{"name":"lon"},{"name":"lat"},{"name":"elev"},{"name":"heading"},{"name":"demDiff"},{"name":"demDiffMad"},{"name":"demDiffMad2"},{"name":"phaseAmb"},{"name":"meanDiffSpread"},{"name":"wf_number"},{"name":"sampleNb"},{"name":"power"},{"name":"powerdB"},{"name":"phase"},{"name":"phaseS"},{"name":"phaseSSegment"},{"name":"phaseConfidence"},{"name":"coh"},{"name":"x"},{"name":"y"},{"name":"time"},{"name":"swathFileId"}]}


In [7]:
import MalardHelpers

#Returns a file handle to a NetCDF that is a merge and filter of all the shards in a BoundingBox.
fileName = query.getNetCdfFile('mtngla','tandemx',minX,maxX,minY,maxY,minT,maxT)

print("File created: %s" % (fileName))

#Converts the NetCDF into a DataFrame.
df = MalardHelpers.getDataFrameFromNetCDF(fileName)

print("Max Elevation %f" % (df['elev'].max()))
print("Min Elevation %f" % (df['elev'].min()))


File created: /data/puma1/scratch/malard/export/mtngla_tandemx_0_100000_0_100000_1277938800000_1293753600000.nc
Max Elevation 8791.354492
Min Elevation 4438.363770


In [11]:

#a list of columns to see in the output (x,y,time will be added)
projections = ['elev','power','coh'] 
#a list of dictionaries that specify the filters
#only numeric fields are supported with the following operations gt(greater than), gte ( greater than equals), lt (less than) and lte (less than equals)
#filters are treated as and conditions
filters = [{'column':'power','op':'gt','threshold':10000},{'column':'coh','op':'gt','threshold':0.8}]

#No filters
nofilters = query.executeQuery( 'mtngla','tandemx',minX,maxX,minY,maxY,minT,maxT,projections,[])
#Converts the NetCDF into a DataFrame.
dfNoFilter = MalardHelpers.getDataFrameFromNetCDF(nofilters)

print( "Max Coh [%f] Min Coh [%f] Max Power [%f] Min Power [%f] Count [%d]" % ( dfNoFilter['coh'].max(),  dfNoFilter['coh'].min(), dfNoFilter['power'].max(), dfNoFilter['power'].min(), dfNoFilter['power'].count() ) )  

#With filters
withfilters = query.executeQuery( 'mtngla','tandemx',minX,maxX,minY,maxY,minT,maxT,projections,filters)
#Converts the NetCDF into a DataFrame.
withFilter = MalardHelpers.getDataFrameFromNetCDF(withfilters)

print( "Max Coh [%f] Min Coh [%f] Max Power [%f] Min Power [%f] Count [%d]" % ( withFilter['coh'].max(),  withFilter['coh'].min(), withFilter['power'].max(), withFilter['power'].min(), withFilter['power'].count() ) )  


Max Coh [1.000000] Min Coh [0.600000] Max Power [60915.500000] Min Power [1000.004211] Count [868704]
Max Coh [1.000000] Min Coh [0.800000] Max Power [60325.578125] Min Power [10000.048828] Count [189280]
