# Fitting data for the Gutenberg–Richter Model of Earthquake Magnitudes

Here, we plot the curve of the number of earthquakes
greater than magnitude M, for each M value.

So, we loop over the earthquakes, and store the
frequency of each magnitude. At the end of the loop,
we compute the cumulative distribution such that the
value at magnitude M will be the integral of the frequency
distribution for >= M. This is what the Gutenberg-Richter
Model predicts. 

The data we use are from [here](http://earthquake.usgs.gov/earthquakes/search/). 

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt

from least_squares import least_squares

In [None]:
# Make the plots a bit bigger to see
# NOTE: Must be done in a separate cell
plt.rcParams['figure.dpi'] = 100

In [None]:
# data downloaded from http://earthquake.usgs.gov/earthquakes/search/
print ('Earthquake data: Gutenberg-Richter Model')
magvalues = np.genfromtxt(fname='california_earthquakes_2010_to_2013.csv', usecols=(4), skip_header=1, delimiter=',')
print(magvalues)

In [None]:
dN_values, edges = np.histogram(magvalues,bins=90)

# x data = M values sorted in increasing order
# y data = log_10(N) where N = number of events with magnitude >= M
M_values = edges[:-1] # Take the left edge of each bin, since we are integrating from there upwards
log10N_values = np.log10(np.cumsum(dN_values[::-1]))[::-1] # np.cumsum() sums from 0 to i, we want i to the end. So, just reverse the array before calling np.cumsum(). 
print(log10N_values)

# perform a least square fit
yint, slope, sigma, sigma_yint, sigma_slope = least_squares(M_values, log10N_values)
print ('least_squares fit to data:')
print (f'slope = {slope:6.3f} +- {sigma_slope:6.3f}')
print (f'intercept = {sigma_yint:6.3f} +- {sigma_yint:6.3f}')
print (f'log_10(N) variance = {sigma:6.3f}')

# Make fit function to plot
fit_y = yint + slope * M_values

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
ax.scatter(edges[:-1], 10**log10N_values)
ax.plot(M_values, 10**fit_y)
ax.set_xlabel("Magnitude (M)")
ax.set_ylabel(r"N ($\geq$M)")
ax.set_yscale("log")

#plt.figure(1)
#plt.hist(magvalues, bins=90, range=[1.0,10.0], log=True, bottom=0.1,cumulative=-1)
#plt.xlabel( 'Magnitude (M)' )
#plt.ylabel( 'N' )

# Same plot but linear y scale
fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))
ax2.scatter(edges[:-1], 10**log10N_values)
ax2.plot(M_values, 10**fit_y)
ax2.set_xlabel("Magnitude (M)")
ax2.set_ylabel(r"N ($\geq$M)")


In [None]:
# Try to improve the fit
# https://doi.org/10.1063/1.1387590 only fits M>4, let's try that.
# Also, wider bins
threshold = 4.0
binwidth = 0.5

magvalues = np.genfromtxt(fname='california_earthquakes_2010_to_2013.csv', usecols=(4), skip_header=1, delimiter=',')
magvalues = magvalues[magvalues > 4]

magmin = np.min(magvalues)
magmax = np.max(magvalues)
binmin = math.floor(magmin / binwidth) * binwidth
binmax = math.ceil(magmax / binwidth) * binwidth
print(binmin)
print(binmax)

bins = np.arange(binmin, binmax + binwidth, binwidth)
dN_values, edges = np.histogram(magvalues, bins=bins)
#print(dN_values)
#print(edges)

# x data = M values sorted in increasing order
# y data = log_10(N) where N = number of events with magnitude >= M
M_values = edges[:-1] # Take the midpoint of each bin
log10N_values = np.log10(np.cumsum(dN_values[::-1]))[::-1] # np.cumsum() sums from 0 to i, we want i to the end. So, just reverse the array before calling np.cumsum(). 
np.nan_to_num(log10N_values) # Protect against nans, least_squares can't handle them

# perform a least square fit
yint, slope, sigma, sigma_yint, sigma_slope = least_squares(M_values, log10N_values)
print ('least_squares fit to data:')
print (f'slope = {slope:6.3f} +- {sigma_slope:6.3f}')
print (f'intercept = {sigma_yint:6.3f} +- {sigma_yint:6.3f}')
print (f'log_10(N) variance = {sigma:6.3f}')

# Make fit function to plot
fit_y = yint + slope * M_values

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
ax.scatter(edges[:-1], 10**log10N_values)
ax.plot(M_values, 10**fit_y)
ax.set_xlabel("Magnitude (M)")
ax.set_ylabel(r"N ($\geq$M)")
ax.set_yscale("log")

#plt.figure(1)
#plt.hist(magvalues, bins=90, range=[1.0,10.0], log=True, bottom=0.1,cumulative=-1)
#plt.xlabel( 'Magnitude (M)' )
#plt.ylabel( 'N' )

fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))
ax2.scatter(edges[:-1], 10**log10N_values)
ax2.plot(M_values, 10**fit_y)
ax2.set_xlabel("Magnitude (M)")
ax2.set_ylabel(r"N ($\geq$M)")
