In [162]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
import re
import dateutil
from datetime import datetime, timedelta
from math import ceil, floor
import matplotlib.pyplot as plt

### Download

In [None]:
def jma_monthly_fetch(verbose=False, min_year=0):
    dataframes = {}
    BASE = "https://www.fnet.bosai.go.jp/event/mcata/"
    URL_DATA = BASE + "data/"
    HEADERS = {"User-Agent": "research/academic use (schimmenti@pks.mpg.de)"}
    html = requests.get(BASE, headers=HEADERS, timeout=30).text
    soup = BeautifulSoup(html, "html.parser")
    years = []
    for el in soup.find_all("td"):
        if(el.get('class') == ['cdir']):
            year = int(el.text.strip())
            years.append(year)
    for year in years:
        if(year < min_year):
            continue
        for month in range(1,13):
            url = URL_DATA +  f"{year}/{year:04d}{month:02d}_UT.txt"
            try:
                df = pd.read_csv(url, comment='#', sep='\\s+')
                df['Origin_Time(UT)'] = df['Origin_Time(UT)'].str.replace(",", "T")
                df['Origin_Time(UT)'] = pd.to_datetime(df['Origin_Time(UT)'].str.replace("/", "-"))
                df['Strike'] = df['Strike'].str.replace(";", ".")
                df['Dip'] = df['Dip'].str.replace(";", ".")
                df['Rake'] = df['Rake'].str.replace(";", ".")
                dataframes[(year, month)] = df
                if(verbose):
                    print(f"Fetched year {year} and month {month}")
            except Exception as e:
                continue
    return pd.concat(dataframes).reset_index(drop=True)


In [None]:
dataframes = jma_monthly_fetch(verbose=True, min_year=-1)

In [None]:
dataframes.to_csv('japanese-cat.csv', index=False, sep=' ')

### Load

In [109]:
catalog = pd.read_csv('japanese-cat.csv', sep=' ', parse_dates=['Origin_Time(UT)']).sort_values(by='Origin_Time(UT)').reset_index(drop=True)

In [None]:
time_origin = np.datetime64(datetime(1997,1,1))
time_resolution = np.timedelta64(timedelta(days=0.1)).astype('timedelta64[s]').astype('float')
catalog['gap'] = np.ceil((catalog['Origin_Time(UT)'] - time_origin).astype('timedelta64[s]').values.astype('float')/time_resolution).astype('int')

In [None]:
min_lat = 18.0
max_lat = 50.0
min_lon = 119.0
max_lon = 156.0
space_resolution = 0.1
catalog['lat_idx'] = np.floor((catalog['Latitude(deg)'] - min_lat)/space_resolution).astype('int')
catalog['lon_idx'] = np.floor((catalog['Longitude(deg)'] - min_lon)/space_resolution).astype('int')
spatial_shape = (ceil((max_lat-min_lat)/space_resolution), ceil((max_lon-min_lon)/space_resolution))
counts = np.zeros(spatial_shape)
avg_m = np.zeros(spatial_shape)
np.add.at(counts, (catalog['lat_idx'].values,catalog['lon_idx'].values ), 1.0)
np.add.at(avg_m, (catalog['lat_idx'].values,catalog['lon_idx'].values ), catalog['MT_Magnitude(Mw)'].values)
avg_m = np.divide(avg_m, counts, where=counts>0)