Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["setuptools", "setuptools-scm"]
requires = ["setuptools", "setuptools-scm", "cython", "numpy"]
build-backend = "setuptools.build_meta"

[project]
Expand Down
16 changes: 16 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy as np
from Cython.Build import cythonize
from setuptools import Extension, setup

extensions = [
Extension(
"source_modelling.srf_reader",
["source_modelling/srf_reader.pyx"],
include_dirs=[np.get_include()],
),
]

setup(
name="source_modelling",
ext_modules=cythonize(extensions),
)
279 changes: 209 additions & 70 deletions source_modelling/srf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,54 @@
import dataclasses
import functools
import re
from collections.abc import Sequence
from pathlib import Path
from typing import Optional, TextIO

import numpy as np
import pandas as pd
import scipy as sp

from source_modelling import srf_reader

PLANE_COUNT_RE = r"PLANE (\d+)"
POINT_COUNT_RE = r"POINTS (\d+)"


class Segments(Sequence):
"""A read-only view for SRF segments."""

def __init__(self, header: pd.DataFrame, points: pd.DataFrame):
self._header = header
self._points = points

def __getitem__(self, index: int) -> pd.DataFrame:
"""Get the nth segment in the SRF.

Parameters
----------
index : int
The index of the segment.

Returns
-------
int
The nth segment in the SRF.
"""
if not isinstance(index, int):
raise TypeError("Segment index must an integer, not slice or tuple")
points_offset = (self._header["nstk"] * self._header["ndip"]).cumsum()
if index == 0:
return self._points.iloc[: points_offset.iloc[index]]
return self._points.iloc[
points_offset.iloc[index - 1] : points_offset.iloc[index]
]

def __len__(self) -> int:
"""int: The number of segments in the SRF."""
return len(self._header)


@dataclasses.dataclass
class SrfFile:
"""
Expand All @@ -69,13 +107,83 @@ class SrfFile:
The version of this SrfFile
header : pd.DataFrame
A list of SrfSegment objects representing the header of the SRF file.
The columns of the header are:

- elon: The centre longitude of the plane.
- elot: The centre latitude of the plane.
- nstk: The number of patches along strike for the plane.
- ndip: The number of patches along dip for the plane.
- len: The length of the plane (in km).
- wid: The width of the plane (in km).
- stk: The plane strike.
- dip: The plane dip.
- dtop: The top of the plane.
- dbottom: The bottom of the plane.
- shyp: The hypocentre location in strike coordinates.
- dhyp: The hypocentre location in dip coordinates.


points : pd.DataFrame
A list of SrfPoint objects representing the points in the SRF file.
A list of SrfPoint objects representing the points in the SRF
file. The columns of the points dataframe are:

- lon: longitude of the patch.
- lat: latitude of the patch.
- dep: depth of the patch (in kilometres).
- stk: local strike.
- dip: local dip.
- area: area of the patch (in cm^2).
- tinit: initial rupture time for this patch (in seconds).
- dt: the timestep for all slipt* columns (in seconds).
- rake: local rake.
- slip1: total slip in the first component.
- slip2: total slip in the second component.
- slip3: total slip in the third component.
- slip: total slip.

The final two columns are computed from the SRF and are not saved to
disk. See the linked documentation on the SRF format for more details.

slipt{i}_array : csr_array
A sparse array containing the ith component of slip for each point and at each timestep, where
slipt{i}_array[i, j] is the slip for the ith patch at time t = j * dt. See also: SRFFile.slip.

References
----------
SRF File Format Doc: https://wiki.canterbury.ac.nz/display/QuakeCore/File+Formats+Used+On+GM
"""

version: str
header: pd.DataFrame
points: pd.DataFrame
slipt1_array: sp.sparse.csr_array
slipt2_array: sp.sparse.csr_array
slipt3_array: sp.sparse.csr_array

@property
def slip(self):
"""csr_array: sparse array representing slip in all components."""
slip_array = self.slipt1_array.power(2)
if self.slipt2_array:
slip_array += self.slipt2_array.power(2)
if self.slipt3_array:
slip_array += self.slipt3_array.power(2)
return slip_array.sqrt()

@property
def nt(self):
"""int: The number of timeslices in the SRF."""
return self.slipt1_array.shape[1]

@property
def dt(self):
"""float: time resolution of SRF."""
return self.points["dt"].iloc[0]

@property
def segments(self) -> Segments:
"""Segments: A sequence of segments in the SRF."""
return Segments(self.header, self.points)


class SrfParseError(Exception):
Expand Down Expand Up @@ -151,50 +259,6 @@ def read_int(srf_file: TextIO, label: Optional[str] = None) -> int:
raise SrfParseError(f'Expecting int, got: "{int_str}"')


def read_srf_point(srf_file: TextIO) -> dict[str, int | float]:
"""Read a single SRF point from a file handle.

Parameters
----------
srf_file : TextIO
The file handle of the SRF file, pointing to the start of the
line.

Returns
-------
dict[str, int | float]
A dictionary containing the values for the point.
"""
# skip optional first spaces
row = {
"lon": read_float(srf_file, label="lon"),
"lat": read_float(srf_file, label="lat"),
"dep": read_float(srf_file, label="dep"),
"stk": read_float(srf_file, label="stk"),
"dip": read_float(srf_file, label="dip"),
"area": read_float(srf_file, label="area"),
"tinit": read_float(srf_file, label="tinit"),
"dt": read_float(srf_file, label="dt"),
"rake": read_float(srf_file, label="rake"),
"slip1": read_float(srf_file, label="slip1"),
}
nt1 = read_int(srf_file, label="nt1")
row["slip2"] = read_float(srf_file, label="slip2")
nt2 = read_int(srf_file, label="nt2")
row["slip3"] = read_float(srf_file, label="slip3")
nt3 = read_int(srf_file, label="nt3")
row["slipt1"] = np.fromiter(
(read_float(srf_file, label="slipt1") for _ in range(nt1)), float
)
row["slipt2"] = np.fromiter(
(read_float(srf_file, label="slipt2") for _ in range(nt2)), float
)
row["slipt3"] = np.fromiter(
(read_float(srf_file, label="slipt2") for _ in range(nt3)), float
)
return row


def read_srf(srf_ffp: Path) -> SrfFile:
"""Read an SRF file into an SrfFile object.

Expand All @@ -209,7 +273,7 @@ def read_srf(srf_ffp: Path) -> SrfFile:
The filepath of the SRF file.
"""
with open(srf_ffp, mode="r", encoding="utf-8") as srf_file_handle:
version = float(srf_file_handle.readline())
version = srf_file_handle.readline().strip()

plane_count_line = srf_file_handle.readline().strip()
plane_count_match = re.match(PLANE_COUNT_RE, plane_count_line)
Expand Down Expand Up @@ -237,6 +301,8 @@ def read_srf(srf_ffp: Path) -> SrfFile:
}
)
headers = pd.DataFrame(segments)
headers["nstk"] = headers["nstk"].astype(int)
headers["ndip"] = headers["ndip"].astype(int)

points_count_line = srf_file_handle.readline().strip()
points_count_match = re.match(POINT_COUNT_RE, points_count_line)
Expand All @@ -246,11 +312,33 @@ def read_srf(srf_ffp: Path) -> SrfFile:
)
point_count = int(points_count_match.group(1))

points = pd.DataFrame(
(read_srf_point(srf_file_handle) for _ in range(point_count))
points_metadata, slipt1_array, slipt2_array, slipt3_array = (
srf_reader.read_srf_points(srf_file_handle, point_count)
)
points_df = pd.DataFrame(
data=points_metadata,
columns=[
"lon",
"lat",
"dep",
"stk",
"dip",
"area",
"tinit",
"dt",
"rake",
"slip1",
"slip2",
"slip3",
],
)

return SrfFile(version, headers, points)
points_df["slip"] = np.sqrt(
points_df["slip1"] ** 2 + points_df["slip2"] ** 2 + points_df["slip3"] ** 2
)
return SrfFile(
version, headers, points_df, slipt1_array, slipt2_array, slipt3_array
)


def write_slip(srf_file: TextIO, slips: np.ndarray) -> None:
Expand All @@ -271,28 +359,74 @@ def write_slip(srf_file: TextIO, slips: np.ndarray) -> None:
)


def write_srf_point(srf_file: TextIO, point: pd.Series) -> None:
def write_srf_point(srf_file: TextIO, srf: SrfFile, point: pd.Series) -> None:
"""Write out a single SRF point.

Parameters
----------
srf_file : TextIO
The SRF file to write to.
srf : SrfFile
The SRF file object to write.
point : pd.Series
The point to write.
"""
index = int(point["point_index"])
# We need to get the raw slip values for the slip arrays to write out to the SRF.
# The slip values for the ith point in the SRF are stored in the ith row of the slip array.
# The indptr array contains the indices for each row in the slip array, so:
#
# - indptr[i] is the start index in the data array for the ith row and
# - indptr[i + 1] is the start index in the data array for the (i + 1)th row.
#
# Hence, data[indptr[i]:indptr[i+1]] collects the slip values for the ith row.
# Note we cannot use standard multi-dimensional array indexing arr[i, :]
# because scipy sparse arrays do not support this kind of indexing.
#
# Visually:
#
# +----+-----+----+---+---+--+
# | | | | | | | indptr
# +----+---\-+----+---+-\-+--+
# \ \
# \ \
# +----------+\-----------+\----------+-------------+
# | ... | row i | row i + 1 | ... | data
# +----------+------------+-----------+-------------+
# row_i = data[indptr[i]:indptr[i+1]]
slipt1 = (
srf.slipt1_array.data[
srf.slipt1_array.indptr[index] : srf.slipt1_array.indptr[index + 1]
]
if srf.slipt1_array is not None
else None
)
slipt2 = (
srf.slipt2_array.data[
srf.slipt2_array.indptr[index] : srf.slipt2_array.indptr[index + 1]
]
if srf.slipt2_array is not None
else None
)
slipt3 = (
srf.slipt3_array.data[
srf.slipt3_array.indptr[index] : srf.slipt3_array.indptr[index + 1]
]
if srf.slipt3_array is not None
else None
)
srf_file.write(
f"{point['lon']:.6f} {point['lat']:.6f} {point['dep']:g} {point['stk']:g} {point['dip']:g} {point['area']:.4E} {point['tinit']:.4f} {point['dt']:.6E}\n"
)
srf_file.write(
f"{point['rake']:g} {point['slip1']:.4f} {len(point['slipt1'])} {point['slip2']:.4f} {len(point['slipt2'])} {point['slip3']:.4f} {len(point['slipt3'])}\n"
f"{point['rake']:g} {point['slip1']:.4f} {len(slipt1) if slipt1 is not None else 0} {point['slip2']:.4f} {len(slipt2) if slipt2 is not None else 0} {point['slip3']:.4f} {len(slipt3) if slipt3 is not None else 0}\n"
)
if len(point["slipt1"]):
write_slip(srf_file, point["slipt1"])
if len(point["slipt2"]):
write_slip(srf_file, point["slipt2"])
if len(point["slipt3"]):
write_slip(srf_file, point["slipt3"])
if slipt1 is not None:
write_slip(srf_file, slipt1)
if slipt2 is not None:
write_slip(srf_file, slipt2)
if slipt3 is not None:
write_slip(srf_file, slipt3)


def write_srf(srf_ffp: Path, srf: SrfFile) -> None:
Expand All @@ -308,18 +442,23 @@ def write_srf(srf_ffp: Path, srf: SrfFile) -> None:
with open(srf_ffp, mode="w", encoding="utf-8") as srf_file_handle:
srf_file_handle.write("1.0\n")
srf_file_handle.write(f"PLANE {len(srf.header)}\n")
srf_file_handle.write(
srf.header.to_string(
index=False,
header=None,
formatters={
"elon": lambda elon: f"{elon:.6f}",
"elat": lambda elat: f"{elat:.6f}",
"len": lambda len: f"{len:.4f}",
"wid": lambda wid: f"{wid:.4f}",
},
# Cannot use srf.header.to_string because the newline separating headers is significant!
# This is ok because the number of headers is typically very small (< 100)
for _, plane in srf.header.iterrows():
srf_file_handle.write(
"\n".join(
[
f"{plane['elon']:.6f} {plane['elat']:.6f} {int(plane['nstk'])} {int(plane['ndip'])} {plane['len']:.4f} {plane['wid']:.4f}",
f"{plane['stk']:.4f} {plane['dip']:.4f} {plane['dtop']:.4f} {plane['shyp']:.4f} {plane['dhyp']:.4f}",
"",
]
)
)
+ "\n"
)

srf_file_handle.write(f"POINTS {len(srf.points)}\n")
srf.points.apply(functools.partial(write_srf_point, srf_file_handle), axis=1)
srf.points["point_index"] = np.arange(len(srf.points))

srf.points.apply(
functools.partial(write_srf_point, srf_file_handle, srf), axis=1
)
srf.points = srf.points.drop(columns="point_index")
Loading