In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import floor, ceil
from datetime import datetime, time, timedelta

In [None]:
data = pd.read_csv('japanese-cat.csv', sep=r'\s+', parse_dates=['Origin_Time(UT)']  )
data = data[['Origin_Time(UT)', 'Latitude(deg)', 'Longitude(deg)', 'MT_Magnitude(Mw)']]
data.rename({'Origin_Time(UT)': 'time', 'Latitude(deg)': 'lat', 'Longitude(deg)': 'lon', 'MT_Magnitude(Mw)': 'mag'}, axis=1, inplace=True)
min_mag = 3.0
data = data[data['mag'] >= min_mag].reset_index(drop=True)

In [None]:
min_lat = floor(data['lat'].min()) 
max_lat = ceil(data['lat'].max())
min_lon = floor(data['lon'].min())
max_lon = ceil(data['lon'].max())
time0 = data['time'].min()

In [141]:
time_deltas = np.unique( np.logspace(0, 2, 50) * np.timedelta64(1, 'D'))
space_deg_deltas = np.array([0.5, 1.0, 2.0])
lat_relative = (data['lat'].values - min_lat)
lon_relative = (data['lon'].values - min_lon)
time_relative = (data['time'] - data['time'].min()).values

In [176]:
from scipy.ndimage import binary_fill_holes, binary_dilation
from skimage.measure import label, regionprops
def get_largest_connected_component(mask):
    labels = label(mask, connectivity=1)  # 2 per 8-connectivity in 2D
    counts = np.bincount(labels.ravel())

    counts[0] = 0          # escludi background
    largest_label = counts.argmax()

    largest_cc = (labels == largest_label)
    return largest_cc

for (space_deg_i, space_deg_delta) in enumerate(space_deg_deltas):
    num_space_bins_lat = ceil((max_lat - min_lat) / space_deg_delta)
    num_space_bins_lon = ceil((max_lon - min_lon) / space_deg_delta)
    lat_int = (lat_relative // space_deg_delta).astype(int)
    lon_int = (lon_relative // space_deg_delta).astype(int)
    total_events = np.zeros((num_space_bins_lat, num_space_bins_lon), dtype=int)
    np.add.at(total_events, (lat_int, lon_int), 1)
    spatial_mask = total_events > 0
    while(True):
        candidate = binary_fill_holes(spatial_mask)
        if candidate.mean() >= 0.25:
            spatial_mask = candidate
            break
        else:
            spatial_mask = binary_dilation(spatial_mask, iterations=1)
    spatial_mask = get_largest_connected_component(spatial_mask)
    for (dt_i, dt) in enumerate(time_deltas):
        time_int = (time_relative // dt).astype(int)
        max_time_int = time_int.max()
        num_time_windows = max_time_int + 1
        num_events = np.zeros((num_time_windows, num_space_bins_lat, num_space_bins_lon), dtype=int)
        np.add.at(num_events, (time_int, lat_int, lon_int), 1)
        event_occurred = num_events == 1
        occurrence_lags = np.argwhere(np.diff(event_occurred, axis=0, prepend=False) == 1)
        occurrence_times = np.argmax(np.diff(event_occurred, axis=0, prepend=False)  == 1, axis=0)
        
    #plt.imshow(spatial_mask, origin='lower', extent=(min_lon, max_lon, min_lat, max_lat))
    #plt.gca().set_aspect('equal', adjustable='box')
    #plt.colorbar(label='Number of Events')
    #plt.xlabel('Longitude')
    #plt.ylabel('Latitude')
    #plt.title(f'Spatial Distribution of Events (space_deg_delta={space_deg_delta})')
    #plt.show()

In [None]:
#tau_units = np.arange(1, 1001, 10)
#chi_4_values = np.zeros((len(space_deg_deltas), len(time_deltas), len(tau_units)), dtype=float)
#for (space_deg_i, space_deg_delta) in enumerate(space_deg_deltas):
#    print(f"Processing space_deg_delta={space_deg_delta}")
#    num_space_bins_lat = ceil((max_lat - min_lat) / space_deg_delta)
#    num_space_bins_lon = ceil((max_lon - min_lon) / space_deg_delta)
#    lat_int = (lat_relative // space_deg_delta).astype(int)
#    lon_int = (lon_relative // space_deg_delta).astype(int)
#    total_events = np.zeros((num_space_bins_lat, num_space_bins_lon), dtype=int)
#    np.add.at(total_events, (lat_int, lon_int), 1)
#    spatial_mask = total_events > 1  # Only consider spatial bins with more than 1 event
#    num_spatial_bins = np.sum(spatial_mask)
#    for (dt_i, dt) in enumerate(time_deltas):
#        print(f"  Processing time_delta={dt}")
#        time_int = (time_relative // dt).astype(int)
#        max_time_int = time_int.max()
#        num_time_windows = max_time_int + 1
#        num_events = np.zeros((num_time_windows, num_space_bins_lat, num_space_bins_lon), dtype=int)
#        
#        np.add.at(num_events, (time_int, lat_int, lon_int), 1)
#
#        #avg_mag = np.zeros((num_time_windows, num_space_bins_lat, num_space_bins_lon), dtype=float)
#        #np.add.at(avg_mag, (time_int, lat_int, lon_int), data['mag'].values)
#        #np.divide(avg_mag, num_events, out=avg_mag, where=num_events>0)
#        
#        for (tau_i, tau) in enumerate(tau_units):
#            if tau >= num_time_windows:
#                chi_4_values[space_deg_i, dt_i, tau_i:] = np.nan
#                break
#            delta_ev = (num_events[tau:]  > 0) + ( num_events[:-tau] > 0)
#            pi_t = (delta_ev[:, spatial_mask] > 0).mean(axis=1)   
#            chi_4_values[space_deg_i, dt_i, tau_i] = pi_t.var() * num_spatial_bins * (space_deg_delta**2)
#        #non_zero_mask = num_events > 0
#        #non_zero_indices = np.argwhere(non_zero_mask)
#        #corr_function = np.zeros((num_time_windows, num_time_windows), dtype=float)
#        

In [None]:
for ds_i, ds in enumerate(space_deg_deltas):
    for dt_i, dt in enumerate(time_deltas):
        plt.plot(tau_units*dt, chi_4_values[ds_i, dt_i], label=f"space={ds}, time={dt}")
        plt.show()

In [None]:
chi_4_values.shape, space_deg_deltas.shape, time_deltas.shape, tau_units.shape

In [None]:
plt.contourf(tau_units, time_deltas, chi_4_values[0,:,:], levels=100)
plt.colorbar(label='Chi_4')
plt.show()