In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as DT
import pytz
from sklearn.metrics.pairwise import haversine_distances

In [2]:
R0 = 6371
R1 = 150
nDays = 45
min_event_mag = 3

In [3]:
def t2dt(atime):
    """
    Convert atime (a float) to DT.datetime
    This is the inverse of dt2t.
    assert dt2t(t2dt(atime)) == atime
    """
    year = int(atime)
    remainder = atime - year
    boy = DT.datetime(year, 1, 1)
    eoy = DT.datetime(year + 1, 1, 1)
    seconds = remainder * (eoy - boy).total_seconds()
    res =  boy + DT.timedelta(seconds=seconds)
    return res

In [4]:
# this catalog has dates in JST time zone, we need to fix this.
jap_cat = pd.read_csv('japan_catalog.dat', sep='\s+', header=None, names=['lon','lat', 'frtime', 'month', 'day', 'mag', 'stuff1','stuff2','stuff3','stuff4'])
jap_cat['datetime'] = jap_cat['frtime'].apply(t2dt)
jap_cat['datetime'] = pd.to_datetime(jap_cat['datetime'])
jap_cat = jap_cat[['datetime', 'lat', 'lon', 'mag']]
jap_cat.set_index('datetime', inplace=True, drop=True)
# save the new UTC file
jap_cat.tz_localize('Asia/Tokyo').tz_convert('UTC').tz_localize(None).to_csv('jap_cat_utc.csv', sep=' ', index_label='datetime')

In [9]:
mainshocks_list = pd.read_csv('japan-ms-list.csv', sep='\s+', parse_dates=['datetime'])
jap_cat = pd.read_csv('jap_cat_utc.csv', sep='\s+', parse_dates=['datetime'])

In [10]:
sequence_id = 0
sequences = []
for _, ms in mainshocks_list.iterrows():
    earthquakes_mask = jap_cat.datetime >= ms.datetime
    earthquakes_mask &= jap_cat.datetime <= ms.datetime + np.timedelta64(nDays, 'D')
    dists = haversine_distances(np.radians(jap_cat[['lat','lon']].values.astype('float')),
        np.radians(ms[['lat','lon']].values.astype('float'))[None,:])[:,0]
    earthquakes_mask &= dists <= (R1*2*np.sqrt(2)/R0)
    earthquakes_mask &= jap_cat.mag >= min_event_mag
    sequence = jap_cat[earthquakes_mask].reset_index(drop=True)
    sequence['type'] = 2
    if(sequence.loc[0,'mag']==ms.mag):
        sequence.loc[0,'type'] = 1
        sequence['seq_id'] = sequence_id
        sequence_id += 1
        sequences.append(sequence)
    else:
        print('Error')
sequences = pd.concat(sequences)

In [11]:
sequences = sequences.reset_index(drop=True)

In [12]:
sequences.to_csv('giuseppe_catalog.csv', index=False, sep=' ')