In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from iris import irisRequests
import ngl
import datetime
from numba import njit
from numba.typed import Dict
from sklearn.metrics.pairwise import haversine_distances

In [5]:
#define where we put all the data on the earthquakes

rootpath = "csv_24/"

In [43]:
#regions['country'] contains minimum lat and max lat; minimum long and max long of that country

regions = {}
regions['california'] = (30, 41, -125, -113)
regions['japan'] = (20, 50, 120, 150)
regions['italy'] = (35, 46, 6, 19)
regions_m = {}
regions_m['japan'] = (6.5, 4.0)
regions_m['italy'] = (5.0, 3.0)
regions_m['greece'] = (6.0, 4.0)
regions_m['california'] = (5.0, 3.0)

In [7]:
#list of all stations with lat. and long. measured each day (24h) with 2weeks latency (data is collected 2weeks after the event). Each station has a beginning and an end
station_list = ngl.ngl_process_list(ngl.ngl_24h_2w) 

In [26]:
#station_info['california'] has names of all stations in california, their latitudes, their longitudes
station_info = {}
for name, region in regions.items():
    station_names, station_lats, station_lons  = ngl.get_all_stations_box(station_list, *region)
    station_info[name]= (station_names, station_lats, station_lons)

In [27]:
#station_data contains the GPS data of lats and longs on all stations, at all days in some time interval. This will be used to extract info of the stations' positions at earthquake times

station_data = {} 
for name, stationn_info in station_info.items(): 
    for _, stationn_name in enumerate(stationn_info[0]): 
        df, status = ngl.ngl_retrieve_24h(rootpath, stationn_name) 
        print(stationn_name, status)
        station_data[stationn_name] = df

5PTS loaded
7OAK loaded
7ODM loaded
ACSB loaded
ACSX loaded
AERO loaded
AGMT loaded
AIAH loaded
AISD loaded
ALAM loaded
ALPP loaded
ALTH loaded
ANA1 loaded
ANTB loaded
ANTE loaded
ANTV loaded
AOA1 loaded
APEX loaded
AQUA loaded
AR27 loaded
AR53 loaded
ARGU loaded
ARM1 loaded
ARM2 loaded
ARMY loaded
ARTE loaded
ASHM loaded
AST1 loaded
AST2 loaded
ASTA loaded
AVRY loaded
AZBH loaded
AZDS loaded
AZFM loaded
AZGV loaded
AZKG loaded
AZLH loaded
AZLQ loaded
AZMP loaded
AZNE loaded
AZPK loaded
AZQZ loaded
AZRY loaded
AZU1 loaded
AZWE loaded
AZYC loaded
AZYU loaded
AZYW loaded
BADL loaded
BAK1 loaded
BAKR loaded
BALD loaded
BAMO loaded
BAR1 loaded
BATM loaded
BATT loaded
BBDM loaded
BBRY loaded
BCUT loaded
BCWR loaded
BDSP loaded
BEAT loaded
BEDE loaded
BEER loaded
BEES loaded
BEMT loaded
BEPK loaded
BERR loaded
BFLD loaded
BFLO loaded
BFLT loaded
BFSH loaded
BGIS loaded
BHIL loaded
BIGP loaded
BIGS loaded
BILL loaded
BIRD loaded
BKAP loaded
BKMS loaded
BKR1 loaded
BKR2 loaded
BLAC loaded
BLIN

In [52]:
#get the magnitude, latitude longitude and time of the earthquakes
                                                                                                                                                                                                                
start_time = datetime.datetime(2012, 1, 1, 0, 0, 0)
end_time =  datetime.datetime(2024, 1, 1, 0, 0, 0)
catalogs = {}
for name, region in regions.items():
    min_lat = region[0]
    max_lat = region[1]
    min_long = region[2]
    max_long = region[3]
    download_url =irisRequests.url_events_box(start_time, end_time, min_lat, max_lat, min_long, max_long, minmag=3, magtype="MW")
    df = pd.read_csv(download_url, sep="|", comment="#")
    df.Time = pd.to_datetime(df.Time, errors='coerce')
    df.dropna(axis=0, inplace=True)
    df.sort_values(by="Time", inplace=True)
    df.reset_index(inplace=True, drop=True)
    catalogs[name] = df

In [None]:
maximal_time_shift = np.timedelta64(7, 'D')
minimal_time_shift = np.timedelta64(1, 'D')
dlat = 0.1
dlon = 0.1
datasets = {}
for name, catalog in catalogs.items():
    dataset = []
    print(name)
    # discretization of the region
    region = regions[name]
    N_lat = int((region[1]-region[0])/dlat)
    N_lon = int((region[3]-region[2])/dlon)
    grid_latlat, grid_lonlon = np.meshgrid( region[0] + np.arange(0, N_lat)*dlat, region[2] + np.arange(0, N_lon)*dlon, indexing='ij')
    grid_latlat = grid_latlat.flatten()
    grid_lonlon = grid_lonlon.flatten()
    grid = np.hstack([grid_latlat[:,None], grid_lonlon[:,None]])
    stations_coords = np.hstack([station_info[name][1][:,None], station_info[name][2][:,None]])
    grid_stations_dists = haversine_distances(np.radians(grid), np.radians(stations_coords))
    grid_stations_dists = grid_stations_dists.reshape((N_lat, N_lon, -1))

In [135]:
name = 'japan'
region = regions[name]
dlat = 0.1
dlon = 0.1
N_lat = int((region[1]-region[0])/dlat)
N_lon = int((region[3]-region[2])/dlon)
grid_latlat, grid_lonlon = np.meshgrid( region[0] + np.arange(0, N_lat)*dlat, region[2] + np.arange(0, N_lon)*dlon, indexing='ij')
grid_latlat = grid_latlat.flatten()
grid_lonlon = grid_lonlon.flatten()
grid = np.hstack([grid_latlat[:,None], grid_lonlon[:,None]]) #coordinates of all cells (at fixed lat and then varying lon)
stations_coords = np.hstack([station_info[name][1][:,None], station_info[name][2][:,None]]) #coordinates of all stations 
grid_stations_dists = haversine_distances(np.radians(grid), np.radians(stations_coords)) #angular distance between each point of the grid and each station
grid_stations_dists = grid_stations_dists.reshape((N_lat, N_lon, -1)) 

In [132]:
grid.shape

(90000, 2)

In [136]:
grid_stations_dists.shape

(300, 300, 1683)

In [61]:
@njit(nogil=True)
def construct_map(u, idx, d, d_cutoff = 0.01):
    # u = (N_s, 3)
    # idx = (N_s)
    # d = (N_lat, N_lon, N_s_tot)
    u_map = np.zeros((d.shape[0], d.shape[1], 3))
    for i in range(0, d.shape[0]):
        for j in range(0, d.shape[1]):
            cnt = 0
            for i_n,n in enumerate(idx):
                d_ijn = d[i,j,n]
                if(d_ijn <= d_cutoff):
                    u_map[i,j,:] = u_map[i,j,:] + u[i_n, :]
                    cnt += 1
            if(cnt > 0):
                u_map[i,j,:] = u_map[i,j,:]/float(cnt)
            else:
                u_map[i,j,0] = np.nan
                u_map[i,j,1] = np.nan
                u_map[i,j,2] = np.nan
    return u_map

0       2012-01-01 02:25:26+00:00
1       2012-01-01 02:30:12+00:00
2       2012-01-01 02:45:34+00:00
3       2012-01-01 05:27:55+00:00
4       2012-01-01 05:49:06+00:00
                   ...           
30696   2023-10-14 09:46:40+00:00
30697   2023-10-14 14:04:04+00:00
30698   2023-10-14 16:25:12+00:00
30699   2023-10-14 23:44:59+00:00
30700   2023-10-15 09:13:26+00:00
Name: Time, Length: 30701, dtype: datetime64[ns, UTC]