Creating a SQLite3 database of earthquake travel-time data
Files begin in input format for struct3dp inversion program

First read in the travel time data from the different files

In [2]:
import pandas as pd
import numpy as np

# define the directory that has the struct3dp data files
modelname='Run_230'
rundir='/Users/ulberg/research/MSH/crosson/Runs'
datadir='{}/{}/Data'.format(rundir,modelname)

# These file names are always associated with an inversion
stafile='combined.sta' # the stations that recorded travel times
eqobsfile='quakes.obs' # the travel time observations for earthquakes
eqlocfile='quakes.loc' # the locations of earthquakes
exobsfile='explos.obs' # the travel time observations for explosions
exlocfile='explos.loc' # the locations of explosions

sta=pd.read_csv(datadir + '/' + stafile, names=['Station','Longitude','Latitude','Depth'], sep=' ', skipinitialspace=True)
obs_eq=pd.read_csv(datadir + '/' + eqobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_eq=pd.read_csv(datadir + '/' + eqlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)
obs_ex=pd.read_csv(datadir + '/' + exobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_ex=pd.read_csv(datadir + '/' + exlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)


Also read in files with information relating PNSN events to Antelope events

In [3]:
mapfile='map.map'
antUWfile='AntUWreview.txt'

mapcols=['dbname','orid','SourceID','method','Longitude-ant','Latitude-ant','Depth-ant','FullTime']
antUWcols=['PNSNid','dbname','orid','picker','Latitude-uw','Longitude-uw','Depth-uw']

datamap=pd.read_csv(datadir + '/' + mapfile, names=mapcols, sep=' ', skipinitialspace=True)
# antUW=pd.read_csv(rundir + '/' + modelname + '/ANT/' + antUWfile, names=antUWcols, sep=' ', skipinitialspace=True)
antUW=pd.read_csv('{}/{}/ANT/{}'.format(rundir,modelname,antUWfile), names=antUWcols, sep=' ', skipinitialspace=True)

We need to relate the source id's to the antelope database so we can get a full origin time for the event. Start with the ones that were recorded on the iMUSH broadband instruments (SourceID starts with '4', '5', '6')

In [8]:
# sources were organized based on the first 1 or 2 digits of the id
# define what the initial character means
src_antelope=['4','5','6']
dict_ant={}
for k in src_antelope:
    for j in range(1,5):
        dict_ant[k+str(j)]='201' + k + '_Q' + str(j)
        
src_pnsn=['9']
src_shot=['8'] # or anything else

# read in the first and second characters in the SourceID, this bit could use some cleaning
srcID0=loc_eq['SourceID'].apply(lambda x: x[0])
# srcID1=loc_eq['SourceID'].apply(lambda x: x[1])
# srcID01=loc_eq['SourceID'].apply(lambda x: x[:2])
# print('Data types: ' + str(srcID0.unique()))

# is it from antelope?
# isAnt=srcID0.apply(lambda x: x in src_antelope)
loc_eq['dbname']=loc_eq['SourceID'].apply(lambda x: x[:2]).map(dict_ant)
# loc_eq.dbname.value_counts() # how many events from each quarter are there?

# if the source is in antelope, get the antelope orid (This could also be done with datamap, or doesn't have to be done here at all)
loc_eq['SourceID'][loc_eq['dbname'].notnull()]
loc_eq['orid']=loc_eq[['SourceID','dbname']].apply(lambda x: x[0][-5:-1] if pd.notnull(x[1]) else 'NaN', axis=1)

join the loc_eq and datamap 

In [66]:
df=loc_eq.merge(datamap[['SourceID','dbname','orid','FullTime']],left_on=['SourceID','dbname','orid'],right_on=['SourceID','dbname','orid'],how='left')
len(loc_eq)

899

In [5]:
loc_eq.head()
# datamap.head()

Unnamed: 0,SourceID,Longitude,Latitude,Depth,EventTime,dbname,orid
0,4301394p,-122.4719,45.8537,0.0,26.897,2014_Q3,1394
1,4301395p,-122.6557,45.5633,15.5,41.314,2014_Q3,1395
2,4301396p,-122.4267,46.1724,0.123,28.559,2014_Q3,1396
3,4301401p,-122.4558,46.3526,1.42,30.893,2014_Q3,1401
4,4301404p,-121.9153,46.8328,10.4,52.527,2014_Q3,1404


In [34]:
datamap[['dbname', 'orid', 'FullTime']].head()
datamap.head()

Unnamed: 0,dbname,orid,SourceID,method,Longitude-ant,Latitude-ant,Depth-ant,FullTime
0,SHOTS,151,8000151p,shots2s3dp,-121.55266,46.7137,-0.668,20140724T113500.020
1,SHOTS,152,8000152p,shots2s3dp,-121.77356,46.20077,-1.029,20140725T050000.020
2,SHOTS,155,8000155p,shots2s3dp,-121.99266,46.18471,-1.009,20140725T072000.020
3,SHOTS,156,8000156p,shots2s3dp,-122.4447,46.04884,-0.834,20140725T073500.020
4,SHOTS,164,8000164p,shots2s3dp,-122.13281,46.08131,-0.594,20140725T085000.020


In [63]:
np.where(datamap["SourceID"] == "4301395p")
datamap.iloc[[ 24, 409]]

Unnamed: 0,dbname,orid,SourceID,method,Longitude-ant,Latitude-ant,Depth-ant,FullTime
24,2014_Q3,1395,4301395p,ant2s3dp,-122.6557,45.5633,15.5,20140703T132841.314
409,UW,60058633,4301395p,uw2s3dp,-122.65567,45.56317,15.5,20140703T132841.314


In [67]:
df.head()

Unnamed: 0,SourceID,Longitude,Latitude,Depth,EventTime,dbname,orid,FullTime
0,4301394p,-122.4719,45.8537,0.0,26.897,2014_Q3,1394,20140701T222626.897
1,4301395p,-122.6557,45.5633,15.5,41.314,2014_Q3,1395,20140703T132841.314
2,4301396p,-122.4267,46.1724,0.123,28.559,2014_Q3,1396,20140709T181928.559
3,4301401p,-122.4558,46.3526,1.42,30.893,2014_Q3,1401,20140714T173030.893
4,4301404p,-121.9153,46.8328,10.4,52.527,2014_Q3,1404,20140715T135452.527


In [60]:
loc_eq.head()

Unnamed: 0,SourceID,Longitude,Latitude,Depth,EventTime,dbname,orid
0,4301394p,-122.4719,45.8537,0.0,26.897,2014_Q3,1394
1,4301395p,-122.6557,45.5633,15.5,41.314,2014_Q3,1395
2,4301396p,-122.4267,46.1724,0.123,28.559,2014_Q3,1396
3,4301401p,-122.4558,46.3526,1.42,30.893,2014_Q3,1401
4,4301404p,-121.9153,46.8328,10.4,52.527,2014_Q3,1404


In [61]:

for k in range(len(df)):
    print('{}: {} - {}'.format(k,loc_eq.iloc[k]['SourceID'],df.iloc[k]['SourceID']))


0: 4301394p - 4301394p
1: 4301395p - 4301395p
2: 4301396p - 4301395p
3: 4301401p - 4301396p
4: 4301404p - 4301401p
5: 4301408p - 4301404p
6: 4301414p - 4301404p
7: 4301415p - 4301408p
8: 4301444p - 4301414p
9: 4301446p - 4301414p
10: 4301473p - 4301415p
11: 4301479p - 4301415p
12: 4301489p - 4301444p
13: 4301507p - 4301446p
14: 4301508p - 4301473p
15: 4301529p - 4301479p
16: 4301545p - 4301489p
17: 4301546p - 4301507p
18: 4301551p - 4301507p
19: 4301552p - 4301508p
20: 4301554p - 4301529p
21: 4301555p - 4301545p
22: 4301557p - 4301546p
23: 4301558p - 4301551p
24: 4301560p - 4301551p
25: 4301563p - 4301552p
26: 4301564p - 4301552p
27: 4301565p - 4301554p
28: 4301567p - 4301554p
29: 4301568p - 4301555p
30: 4301569p - 4301555p
31: 4301570p - 4301557p
32: 4301571p - 4301558p
33: 4301573p - 4301558p
34: 4301575p - 4301560p
35: 4301576p - 4301560p
36: 4301579p - 4301563p
37: 4301583p - 4301563p
38: 4301584p - 4301564p
39: 4301585p - 4301565p
40: 4301586p - 4301565p
41: 4301590p - 4301567p
42

709: 9060563581p - 9010799238p
710: 9060564227p - 9010799263p
711: 9060565272p - 9010798743p
712: 9060566067p - 9010801233p
713: 9060566701p - 9010802478p
714: 9060566721p - 9010802718p
715: 9060566831p - 9010802938p
716: 9060567171p - 9010802993p
717: 9060567316p - 9010803098p
718: 9060569236p - 9010803208p
719: 9060572321p - 9010803723p
720: 9060572731p - 9010804053p
721: 9060573291p - 9010804068p
722: 9060578187p - 9010804223p
723: 9060578217p - 9010794843p
724: 9060578232p - 9010798768p
725: 9060578277p - 9010802703p
726: 9060578337p - 9010803048p
727: 9060578342p - 9010803078p
728: 9060578602p - 9010803123p
729: 9060580297p - 9010803168p
730: 9060580512p - 9010803883p
731: 9060581247p - 9010794713p
732: 9060582486p - 9010796123p
733: 9060584536p - 9010797133p
734: 9060588317p - 9010798758p
735: 9060591012p - 9010799068p
736: 9060598967p - 9010800243p
737: 9060602732p - 9010801018p
738: 9060603257p - 9010794633p
739: 9060604262p - 9010795808p
740: 9060606652p - 9010796153p
741: 906

IndexError: single positional indexer is out-of-bounds