Creating a SQLite3 database of earthquake travel-time data
Files begin in input format for struct3dp inversion program

First read in the travel time data from the different files

In [54]:
import pandas as pd
import numpy as np

# define the directory that has the struct3dp data files
modelname='Run_230'
rundir='/Users/ulberg/research/MSH/crosson/Runs'
datadir='{}/{}/Data'.format(rundir,modelname)

# These file names are always associated with an inversion
stafile='combined.sta' # the stations that recorded travel times
eqobsfile='quakes.obs' # the travel time observations for earthquakes
eqlocfile='quakes.loc' # the locations of earthquakes
exobsfile='explos.obs' # the travel time observations for explosions
exlocfile='explos.loc' # the locations of explosions

sta=pd.read_csv(datadir + '/' + stafile, names=['Station','Longitude','Latitude','Depth'], sep=' ', skipinitialspace=True)
obs_eq=pd.read_csv(datadir + '/' + eqobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_eq=pd.read_csv(datadir + '/' + eqlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)
obs_ex=pd.read_csv(datadir + '/' + exobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_ex=pd.read_csv(datadir + '/' + exlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)


Also read in files with information relating PNSN events to Antelope events

In [158]:
mapfile='map.map'
antUWfile='AntUWreview.txt'

mapcols=['dbname','orid','SourceID','method','Longitude-ant','Latitude-ant','Depth-ant','FullTime']
antUWcols=['PNSNid','dbname','orid','picker','Latitude-uw','Longitude-uw','Depth-uw']

datamap=pd.read_csv(datadir + '/' + mapfile, names=mapcols, sep=' ', skipinitialspace=True)
# antUW=pd.read_csv(rundir + '/' + modelname + '/ANT/' + antUWfile, names=antUWcols, sep=' ', skipinitialspace=True)
antUW=pd.read_csv('{}/{}/ANT/{}'.format(rundir,modelname,antUWfile), names=antUWcols, sep=' ', skipinitialspace=True)

We need to relate the source id's to the antelope database so we can get a full origin time for the event. Start with the ones that were recorded on the iMUSH broadband instruments (SourceID starts with '4', '5', '6')

In [147]:
# sources were organized based on the first 1 or 2 digits of the id
# define what the initial character means
src_antelope=['4','5','6']
dict_ant={}
for k in src_antelope:
    for j in range(1,5):
        dict_ant[k+str(j)]='201' + k + '_Q' + str(j)
        
src_pnsn=['9']
src_shot=['8'] # or anything else

# read in the first and second characters in the SourceID, this bit could use some cleaning
srcID0=loc_eq['SourceID'].apply(lambda x: x[0])
# srcID1=loc_eq['SourceID'].apply(lambda x: x[1])
# srcID01=loc_eq['SourceID'].apply(lambda x: x[:2])
# print('Data types: ' + str(srcID0.unique()))

# is it from antelope?
# isAnt=srcID0.apply(lambda x: x in src_antelope)
loc_eq['dbname']=loc_eq['SourceID'].apply(lambda x: x[:2]).map(dict_ant)
# loc_eq.dbname.value_counts() # how many events from each quarter are there?

# if the source is in antelope, get the antelope orid (This could also be done with datamap, or doesn't have to be done here at all)
loc_eq['SourceID'][loc_eq['dbname'].notnull()]
loc_eq['orid']=loc_eq[['SourceID','dbname']].apply(lambda x: x[0][-5:-1] if pd.notnull(x[1]) else 'NaN', axis=1)

In [150]:
loc_eq.head()

Unnamed: 0,SourceID,Longitude,Latitude,Depth,EventTime,dbname,orid
0,4301394p,-122.4719,45.8537,0.0,26.897,2014_Q3,1394
1,4301395p,-122.6557,45.5633,15.5,41.314,2014_Q3,1395
2,4301396p,-122.4267,46.1724,0.123,28.559,2014_Q3,1396
3,4301401p,-122.4558,46.3526,1.42,30.893,2014_Q3,1401
4,4301404p,-121.9153,46.8328,10.4,52.527,2014_Q3,1404


In [160]:
datamap.head()

Unnamed: 0,dbname,orid,SourceID,method,Longitude-ant,Latitude-ant,Depth-ant,FullTime
0,SHOTS,151,8000151p,shots2s3dp,-121.55266,46.7137,-0.668,20140724T113500.020
1,SHOTS,152,8000152p,shots2s3dp,-121.77356,46.20077,-1.029,20140725T050000.020
2,SHOTS,155,8000155p,shots2s3dp,-121.99266,46.18471,-1.009,20140725T072000.020
3,SHOTS,156,8000156p,shots2s3dp,-122.4447,46.04884,-0.834,20140725T073500.020
4,SHOTS,164,8000164p,shots2s3dp,-122.13281,46.08131,-0.594,20140725T085000.020
