In [1]:
import DataSetQuery
import MalardHelpers
import datetime

#Sets the URL of the Malard ServiceGateway.
query = DataSetQuery.DataSetQuery('http://localhost:9000')

#Sets the output directory for all users of the server.
query.setEnvironment('test','/data/puma1/scratch/malard/export/')




'Environment Set [test]'

In [2]:
#Setup the bounding box
import pandas as pd

minX=0
maxX=100000
minY=0
maxY=100000
minT=datetime.datetime(2012,1,1,0,0)
maxT=datetime.datetime(2012,1,31,0,0)

#a list of columns to see in the output (x,y,time will be added)
projections = ['lat','lon','elev','power','coh','swathFileId'] 
#a list of dictionaries that specify the filters
#only numeric fields are supported with the following operations gt(greater than), gte ( greater than equals), lt (less than) and lte (less than equals)
#filters are treated as and conditions
filters = [{'column':'swathFileId','op':'gte','threshold':11783}]

#No filters
nofiltersTandemX = query.executeQuery( 'mtngla','tandemx',minX,maxX,minY,maxY,minT,maxT,projections,filters)
print(nofiltersTandemX)
dfTx = MalardHelpers.getDataFrameFromNetCDF(nofiltersTandemX)

nofiltersSrtm = query.executeQuery( 'mtngla','srtm',minX,maxX,minY,maxY,minT,maxT,projections,filters)
#Converts the NetCDF into a DataFrame.
dfSrtm = MalardHelpers.getDataFrameFromNetCDF(nofiltersSrtm)

print("TandemX MaxElev:[%f] MinElev:[%f] Points:[%d]" % (dfTx['elev'].max(),dfTx['elev'].min(),dfTx['elev'].count()))
print("Srtm MaxElev:[%f] MinElev:[%f] Points:[%d]" % (dfSrtm['elev'].max(),dfSrtm['elev'].min(),dfSrtm['elev'].count()))


df = pd.merge(dfTx,dfSrtm,left_on=['lat','lon'],right_on=['lat','lon'],how='left')

df.to_csv("/data/puma1/scratch/test/alljoin.csv")

query.releaseCache(nofiltersTandemX)
query.releaseCache(nofiltersSrtm)



/data/puma1/scratch/malard/export/mtngla_tandemx_-270025030.nc
TandemX MaxElev:[8665.295898] MinElev:[4781.474121] Points:[2632]
Srtm MaxElev:[8665.295898] MinElev:[4781.474121] Points:[2430]


'Released cache file /data/puma1/scratch/malard/export/mtngla_srtm_-270025030.nc '

In [7]:
print(dfTx.groupby(['swathFileId']).agg(['count']))

              elev  power    coh      x      y   time
             count  count  count  count  count  count
swathFileId                                          
1836         22532  22532  22506  22532  22532  22532
2161         98289  98289  98202  98289  98289  98289
8984         91214  91214  91140  91214  91214  91214
11783         2632   2632   2579   2632   2632   2632


In [8]:
print(dfSrtm.groupby(['swathFileId']).agg(['count']))

              elev  power    coh      x      y   time
             count  count  count  count  count  count
swathFileId                                          
1836         22309  22309  22283  22309  22309  22309
2161         98289  98289  98202  98289  98289  98289
8984         91214  91214  91140  91214  91214  91214
11783         2430   2430   2377   2430   2430   2430


In [13]:

sourceTandemX = '/data/puma1/scratch/test/tandemx/CS_LTA__SIR_SIN_2S_20120122T040316_20120122T040631_C001.nc'
sourceSrtm = '/data/puma1/scratch/test/srtm/CS_LTA__SIR_SIN_2S_20120122T040316_20120122T040631_C001.nc'

rawTx = MalardHelpers.getDataFrameFromNetCDF(sourceTandemX)
rawSrtm = MalardHelpers.getDataFrameFromNetCDF(sourceSrtm)


In [15]:
print(rawTx['lat'].count())
print(rawSrtm['lat'].count())

print(rawTx['lon'].sum())
print(rawSrtm['lon'].sum())



850805
850805
71864560.56501755
71865290.96876465


In [16]:
df = pd.merge(rawTx,rawSrtm,left_on=['lat','lon'],right_on=['lat','lon'],how='inner')

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 838949 entries, 0 to 838948
Data columns (total 34 columns):
lon                  838949 non-null float64
lat                  838949 non-null float64
elev_x               838949 non-null float32
heading_x            838949 non-null float32
demDiff_x            836598 non-null float32
demDiffMad_x         836630 non-null float32
demDiffMad2_x        836630 non-null float32
phaseAmb_x           838949 non-null int16
meanDiffSpread_x     838949 non-null float32
wf_number_x          838949 non-null int16
sampleNb_x           838949 non-null int16
power_x              838949 non-null float32
powerdB_x            838949 non-null float32
phase_x              838949 non-null float32
phaseS_x             0 non-null float32
phaseSSegment_x      0 non-null float32
phaseConfidence_x    0 non-null float32
coh_x                837953 non-null float32
elev_y               838949 non-null float32
heading_y            838949 non-null float32
demDiff_y 

In [18]:
df['lat'].count()

838949