## This version tries to use almost all SQL (besides python wrapper)

In [2]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

# define the directory that has the struct3dp data files
modelname='Run_230'
rundir='/Users/ulberg/research/MSH/crosson/Runs'
datadir='{}/{}/Data'.format(rundir,modelname)

# These file names are always associated with an inversion
stafile='combined.sta' # the stations that recorded travel times
eqobsfile='quakes.obs' # the travel time observations for earthquakes
eqlocfile='quakes.loc' # the locations of earthquakes
exobsfile='explos.obs' # the travel time observations for explosions
exlocfile='explos.loc' # the locations of explosions

# read into dataframes
sta=pd.read_csv(datadir + '/' + stafile, names=['Station','Longitude','Latitude','Depth'], sep=' ', skipinitialspace=True)
obs_eq=pd.read_csv(datadir + '/' + eqobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_eq=pd.read_csv(datadir + '/' + eqlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)
obs_ex=pd.read_csv(datadir + '/' + exobsfile, names=['SourceID','Station','Phase','ArrivalTime','Uncertainty'], sep=' ', skipinitialspace=True)
loc_ex=pd.read_csv(datadir + '/' + exlocfile, names=['SourceID','Longitude','Latitude','Depth','EventTime'], sep=' ', skipinitialspace=True)

# read in more files that provide some mapping between other files
mapfile='map.map'
antUWfile='AntUWreview.txt'

mapcols=['dbname','orid','SourceID','method','Longitude-ant','Latitude-ant','Depth-ant','FullTime']
antUWcols=['PNSNid','dbname','orid','picker','Latitude-uw','Longitude-uw','Depth-uw']

datamap=pd.read_csv(datadir + '/' + mapfile, names=mapcols, sep=' ', skipinitialspace=True)
antUW=pd.read_csv('{}/{}/ANT/{}'.format(rundir,modelname,antUWfile), names=antUWcols, sep=' ', skipinitialspace=True, dtype={'orid': str})

In [4]:
print('datamap')
print(datamap.head())
print('antUW')
print(antUW.head())

datamap
  dbname orid  SourceID      method  Longitude-ant  Latitude-ant  Depth-ant  \
0  SHOTS  151  8000151p  shots2s3dp     -121.55266      46.71370     -0.668   
1  SHOTS  152  8000152p  shots2s3dp     -121.77356      46.20077     -1.029   
2  SHOTS  155  8000155p  shots2s3dp     -121.99266      46.18471     -1.009   
3  SHOTS  156  8000156p  shots2s3dp     -122.44470      46.04884     -0.834   
4  SHOTS  164  8000164p  shots2s3dp     -122.13281      46.08131     -0.594   

              FullTime  
0  20140724T113500.020  
1  20140725T050000.020  
2  20140725T072000.020  
3  20140725T073500.020  
4  20140725T085000.020  
antUW
     PNSNid   dbname  orid picker  Latitude-uw  Longitude-uw  Depth-uw
0  60810532  2014_Q3     2   Carl      46.5397     -122.9978       0.0
1  60810642  2014_Q3  1394   Carl      45.8663     -122.4492       0.0
2  60058633  2014_Q3  1395   Carl      45.5633     -122.6557      15.9
3  60814162  2014_Q3  1396   Carl      46.1728     -122.4225       0.0
4     

In [5]:
### add data to sqlite database

dbdir='/Users/ulberg/Documents/GitHub/travelTimeDB/DB' # make sure this directory is created before running
filename=dbdir + '/tt_db_sql.sqlite'

# create engine
engine=create_engine('sqlite:///' + filename)

### create tables in 'raw' state (with '_r'), with all columns
### will work on these in sql to create the final tables
# add station table
sta.to_sql('sta_r', con=engine, if_exists='replace', index=False) # convert dataframe to sqlite db, replace old version (DANGEROUS)
obs_eq.to_sql('obs_r', con=engine, if_exists='replace', index=False)
loc_eq.to_sql('loc_r', con=engine, if_exists='replace', index=False)
datamap.to_sql('map_r', con=engine, if_exists='replace', index=False)
antUW.to_sql('antUW_r', con=engine, if_exists='replace', index=False)


## Now start manipulating tables to get final usable products
Do commands with 'engine.execute("sqlCommand")'
example:engine.execute("SELECT * FROM station")