Creating a SQLite3 database of earthquake travel-time data

Files begin in input format for struct3dp inversion program

First read in the travel time data from the different files

In [1]:
import pandas as pd
import numpy as np

# define the directory that has the struct3dp data files
modelname='Run_230'
rundir='/Users/ulberg/research/MSH/crosson/Runs'
datadir='{}/{}/Data'.format(rundir,modelname)

# These file names are always associated with an inversion
stafile='combined.sta' # the stations that recorded travel times
eqobsfile='quakes.obs' # the travel time observations for earthquakes
eqlocfile='quakes.loc' # the locations of earthquakes
exobsfile='explos.obs' # the travel time observations for explosions
exlocfile='explos.loc' # the locations of explosions

sta=pd.read_csv(datadir + '/' + stafile, names=['Station','Longitude','Latitude','Depth'], sep=' ', skipinitialspace=True)
obs_eq=pd.read_csv(datadir + '/' + eqobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_eq=pd.read_csv(datadir + '/' + eqlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)
obs_ex=pd.read_csv(datadir + '/' + exobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_ex=pd.read_csv(datadir + '/' + exlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)


Also read in files with information relating PNSN events to Antelope events

In [4]:
mapfile='map.map'
antUWfile='AntUWreview.txt'

mapcols=['dbname','orid','SourceID','method','Longitude-ant','Latitude-ant','Depth-ant','FullTime']
antUWcols=['PNSNid','dbname','orid','picker','Latitude-uw','Longitude-uw','Depth-uw']

datamap=pd.read_csv(datadir + '/' + mapfile, names=mapcols, sep=' ', skipinitialspace=True)
# antUW=pd.read_csv(rundir + '/' + modelname + '/ANT/' + antUWfile, names=antUWcols, sep=' ', skipinitialspace=True)
antUW=pd.read_csv('{}/{}/ANT/{}'.format(rundir,modelname,antUWfile), names=antUWcols, sep=' ', skipinitialspace=True, dtype={'orid': str})
antUW['orid']=antUW['orid'].astype('object') # so that merging later will work
antUW['PNSNid']=antUW['PNSNid'].astype('object') # so that merging later will work

We need to relate the source id's to the antelope database so we can get a full origin time for the event. Start with the ones that were recorded on the iMUSH broadband instruments (SourceID starts with '4', '5', '6')

In [5]:
# sources were organized based on the first 1 or 2 digits of the id
# define what the initial character means
src_antelope=['4','5','6']
dict_ant={}
for k in src_antelope:
    for j in range(1,5):
        dict_ant[k+str(j)]='201' + k + '_Q' + str(j)
        
src_pnsn=['9']
src_shot=['8'] # or anything else

# read in the first and second characters in the SourceID, this bit could use some cleaning
srcID0=loc_eq['SourceID'].apply(lambda x: x[0])
# srcID1=loc_eq['SourceID'].apply(lambda x: x[1])
# srcID01=loc_eq['SourceID'].apply(lambda x: x[:2])
# print('Data types: ' + str(srcID0.unique()))

# is it from antelope?
# isAnt=srcID0.apply(lambda x: x in src_antelope)
loc_eq['dbname']=loc_eq['SourceID'].apply(lambda x: x[:2]).map(dict_ant)
# loc_eq.dbname.value_counts() # how many events from each quarter are there?

# if the source is in antelope, get the antelope orid (This could also be done with datamap, or doesn't have to be done here at all)
loc_eq['SourceID'][loc_eq['dbname'].notnull()]
loc_eq['orid']=loc_eq[['SourceID','dbname']].apply(lambda x: x[0][-5:-1] if pd.notnull(x[1]) else 'NaN', axis=1)

Add the full origin time from datamap to loc_eq, plus pnsn location

In [6]:
df=loc_eq.merge(datamap[['SourceID','dbname','orid','FullTime']],left_on=['SourceID','dbname','orid'],right_on=['SourceID','dbname','orid'],how='left')
# df['orid']=df['orid'].astype('object')
# df['dbname']=df['dbname'].astype('object')
# antUW['orid']=antUW['orid'].astype('object')
# antUW['dbname']=antUW['dbname'].astype('object')
df2=df.merge(antUW[['PNSNid','dbname','orid','Latitude-uw','Longitude-uw','Depth-uw']],left_on=['dbname','orid'],right_on=['dbname','orid'],how='left')
df2.head()

Unnamed: 0,SourceID,Longitude,Latitude,Depth,EventTime,dbname,orid,FullTime,PNSNid,Latitude-uw,Longitude-uw,Depth-uw
0,4301394p,-122.4719,45.8537,0.0,26.897,2014_Q3,1394,20140701T222626.897,60810642,45.8663,-122.4492,0.0
1,4301395p,-122.6557,45.5633,15.5,41.314,2014_Q3,1395,20140703T132841.314,60058633,45.5633,-122.6557,15.9
2,4301396p,-122.4267,46.1724,0.123,28.559,2014_Q3,1396,20140709T181928.559,60814162,46.1728,-122.4225,0.0
3,4301401p,-122.4558,46.3526,1.42,30.893,2014_Q3,1401,20140714T173030.893,0,46.3526,-122.4558,1.4
4,4301404p,-121.9153,46.8328,10.4,52.527,2014_Q3,1404,20140715T135452.527,60818252,46.8328,-121.9153,11.8


In [7]:
## check lengths of df (merged), loc_eq (s3dp), datamap (antelope), antUW (pnsn)
print('df: {}'.format(len(df)))
print('df2: {}'.format(len(df2)))
print('loc_eq: {}'.format(len(loc_eq)))
print('datamap: {}'.format(len(datamap)))
print('antUW: {}'.format(len(antUW)))

df: 899
df2: 899
loc_eq: 899
datamap: 2177
antUW: 449


Now put this into a sql database

In [41]:
# following https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html

import sqlite3
import os

dbdir='/Users/ulberg/Documents/GitHub/travelTimeDB/DB' # make sure this directory is created before running
filename=dbdir + '/tt_db.sqlite'

# remove database to start (DANGEROUS)
if os.path.exists(filename):
    os.remove(filename)
else:
    print("The file does not exist")

# open a connection to the database
conn=sqlite3.connect(filename)
c=conn.cursor()

tableA='station'
fieldA1='name'
typeA1='TEXT'

fieldA2='lat'
typeA2='REAL'

# creates table and adds column names, using different methods for testing
c.execute("CREATE TABLE {tn} ({cn} {ct} PRIMARY KEY)".format(tn=tableA,cn=fieldA1,ct=typeA1))
c.execute("ALTER TABLE {tn} ADD COLUMN '{cn}' {ct}".format(tn=tableA,cn=fieldA2,ct=typeA2))
c.execute("ALTER TABLE {tn} ADD COLUMN '{cn}' {ct}".format(tn=tableA,cn='lon',ct='REAL'))
c.execute("ALTER TABLE {tn} ADD COLUMN '{cn}' {ct}".format(tn=tableA,cn='depth',ct='REAL'))

# inserts one row (for string entry, need extra quotes around it - '{v0}')
c.execute("INSERT INTO {tn} ({idf}, {cn1}, {cn2}, {cn3}) VALUES ('{v0}', {v1}, {v2}, {v3})".\
         format(tn=tableA, idf=fieldA1, cn1='lat', cn2='lon', cn3='depth', v0=sta.iloc[0]['Station'],\
               v1=sta.iloc[0]['Latitude'],v2=sta.iloc[0]['Longitude'],v3=sta.iloc[0]['Depth']))

conn.commit()
conn.close()

# in sqlite, use "PRAGMA table_info('station')" to check column names
# SELECT * FROM station limit 100 to show first 100 entries


In [37]:
sta.iloc[0].Station

'MB05'