This notebook is designed to generate the AK135 predicted event<->sta pairs and store to CSV for the continious results.

In [1]:
import pandas as pd
from obspy.taup import TauPyModel
import obspy
from tqdm import tqdm
from pandarallel import pandarallel

In [2]:
model = TauPyModel(model="ak135")
tqdm.pandas()
pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 10 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [3]:
associated_df = pd.read_csv(
        "/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/continious_associated_catalog.csv",
        usecols=["time","longitude", "latitude", "z(km)", "event_index"],
        sep=r"\s+",
    )
associated_df["time"]=pd.to_datetime(associated_df["time"])
associated_df.rename(
        columns={
            "longitude": "longitude",
            "latitude": "latitude",
            "z(km)": "depth",
            "event_index": "event_index",
        },
        inplace=True,
    )
associated_assignment_df = pd.read_csv(
        "/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/continious_associated_assignment.csv",
        skiprows=1,
        names=[
            "id",
            "date",
            "time",
            "amp",
            "type",
            "prob",
            "event_index",
            "gamma_score",
        ],
        sep=r"\s+",
    )
associated_assignment_df = associated_assignment_df[
        associated_assignment_df["gamma_score"] > 0
    ]
# filter associated_df so event_index appears at least 10 times in associated_assignment_df
associated_df = associated_df[
        associated_df["event_index"].isin(
            associated_assignment_df["event_index"]
            .value_counts()[associated_assignment_df["event_index"].value_counts() > 10]
            .index
        )
    ]
associated_df.head()

Unnamed: 0,time,depth,event_index,longitude,latitude
7,2010-03-22 10:19:17.558,37.395109,857,-174.019518,-22.143588
10,2010-04-15 01:53:59.089,356.973,1513,-179.353219,-21.176853
11,2010-04-15 01:53:49.567,475.973,1514,-179.209502,-20.870826
15,2010-04-27 22:08:58.514,316.537,27,-173.970578,-23.51037
20,2010-09-23 23:30:12.772,629.213524,538,-177.499262,-18.009047


In [4]:
relocated_df = pd.read_csv(
        "/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/continious_relocated.csv",
        usecols=[0,1, 2, 3,10,11,12,13,14,15],
        names=["event_index","latitude", "longitude", "depth","year","month","day","hour","minute","second"],
        sep=r"\s+",
    )
relocated_df["time"]=pd.to_datetime(relocated_df[["year","month","day","hour","minute","second"]])
relocated_df.drop(["year","month","day","hour","minute","second"],axis=1,inplace=True)
relocated_df.head()

Unnamed: 0,event_index,latitude,longitude,depth,time
0,27,-23.287991,-173.793732,336.826,2010-04-27 22:08:59.620
1,55,-21.058491,-177.236542,369.412,2010-01-18 06:48:53.230
2,68,-16.277441,-174.479782,200.926,2010-01-26 01:47:50.640
3,74,-20.470518,-176.965103,474.381,2010-06-28 04:23:34.690
4,81,-18.396822,-174.699707,20.341,2009-12-03 04:17:37.290


In [5]:
bootstrapped_df = pd.read_csv(
        "/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/continious_bootstrapped.csv",
        usecols=[0,1, 2, 3,4],
    )
bootstrapped_df.rename(columns={"id":"event_index","datetime":"time"},inplace=True)
bootstrapped_df["time"]=pd.to_datetime(bootstrapped_df["time"])
bootstrapped_df.head()

Unnamed: 0,event_index,latitude,longitude,depth,time
0,55,-21.078356,-177.22974,365.65588,2010-01-18 06:48:53.679099136
1,68,-16.359084,-174.50576,204.380435,2010-01-26 01:47:51.560340480
2,74,-20.49493,-176.895955,478.382117,2010-06-28 04:23:34.643994112
3,88,-17.442211,-177.878019,646.372351,2010-04-16 04:21:24.130119936
4,93,-18.800355,-175.508731,28.436913,2010-07-12 04:08:03.839479552


In [6]:
semi_df = pd.read_csv(
        "/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/continious_semi.csv",
        usecols=[0,1, 2, 3,4],
    )
semi_df.rename(columns={"id":"event_index","datetime":"time"},inplace=True)
semi_df["time"]=pd.to_datetime(semi_df["time"])
semi_df.head()

Unnamed: 0,event_index,latitude,longitude,depth,time
0,49,-24.055055,-178.753175,581.541092,2010-09-05 06:20:43.693259008
1,59,-22.996563,-175.655182,85.202101,2010-02-03 07:50:12.698582784
2,64,-23.101836,-178.642513,554.767645,2010-07-21 13:36:11.897348096
3,78,-21.863043,-175.590572,140.706452,2010-02-07 15:23:13.122398720
4,95,-20.365366,-177.123331,446.403303,2010-04-16 07:59:40.670819840


In [7]:
print(len(associated_df),len(relocated_df),len(bootstrapped_df),len(semi_df))

13111 13019 9427 13799


In [8]:
stations=pd.read_csv('/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/stations/stations.csv',sep="\s+")
stations.rename(columns={'station':'sta',"lat":"stla","lon":"stlo","elev":"stel_m"},inplace=True)
stations["key"]=1
stations.head()

Unnamed: 0,sta,stla,stlo,stel_m,key
0,A01,-21.53,-175.62,-1059.0,1
1,A02W,-21.49,-175.79,-2015.0,1
2,A03,-21.44,-175.9,-1955.0,1
3,A05,-21.35,-176.17,-2368.0,1
4,A06W,-21.3,-176.32,-2137.0,1


In [16]:
def get_p_s(row):
    evla=row['latitude']
    evlo=row['longitude']
    evdp=row['depth']
    stla=row['stla']
    stlo=row['stlo']
    if evdp<0:
        return None
    # get the theoritical p and s arrival time
    distance_in_degree=obspy.geodetics.locations2degrees(lat1=evla, long1=evlo, lat2=stla, long2=stlo)
    p_arrival_time = model.get_travel_times(source_depth_in_km=evdp, distance_in_degree=distance_in_degree, phase_list=["p","P"])[0].time
    s_arrival_time = model.get_travel_times(source_depth_in_km=evdp, distance_in_degree=distance_in_degree, phase_list=["s","S"])[0].time
    origin_time=row['time']
    p_arrival_time=origin_time+pd.Timedelta(seconds=p_arrival_time)
    s_arrival_time=origin_time+pd.Timedelta(seconds=s_arrival_time)
    return pd.Series(
        {   
            "event_index":row["event_index"],
            "sta":row["sta"],
            "p_arrival_time":p_arrival_time,
            "s_arrival_time":s_arrival_time
        }
    )

def get_arrivals(df,stations):
    df["key"]=1
    df=df.merge(stations,on="key")
    df_taup=df.parallel_apply(get_p_s,axis=1)
    return df_taup

# associated
associated_df_taup=get_arrivals(associated_df,stations)
associated_df_taup.to_csv("/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/associated_df_taup.csv",index=False)
# relocated
relocated_df_taup=get_arrivals(relocated_df,stations)
relocated_df_taup.to_csv("/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/relocated_df_taup.csv",index=False)
# bootstrapped
bootstrapped_df_taup=get_arrivals(bootstrapped_df,stations)
bootstrapped_df_taup.to_csv("/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/bootstrapped_df_taup.csv",index=False)
# semi
semi_df_taup=get_arrivals(semi_df,stations)
semi_df_taup.to_csv("/Users/ziyixi/Library/CloudStorage/OneDrive-MichiganStateUniversity/Paper/PhaseNetTF_myturn/PhaseNet-TF-Figures/phasenettf/data/catalog/semi_df_taup.csv",index=False)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=88530), Label(value='0 / 88530')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=64104), Label(value='0 / 64104')))…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=93834), Label(value='0 / 93834')))…