ucgmsim · lispandfound · Aug 20, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "setuptools-scm"]
+requires = ["setuptools", "setuptools-scm", "cython", "numpy"]
 build-backend = "setuptools.build_meta"
 
 [project]

diff --git a/setup.py b/setup.py
@@ -0,0 +1,16 @@
+import numpy as np
+from Cython.Build import cythonize
+from setuptools import Extension, setup
+
+extensions = [
+    Extension(
+        "source_modelling.srf_reader",
+        ["source_modelling/srf_reader.pyx"],
+        include_dirs=[np.get_include()],
+    ),
+]
+
+setup(
+    name="source_modelling",
+    ext_modules=cythonize(extensions),
+)
diff --git a/source_modelling/srf.py b/source_modelling/srf.py
@@ -48,16 +48,54 @@
 import dataclasses
 import functools
 import re
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Optional, TextIO
 
 import numpy as np
 import pandas as pd
+import scipy as sp
+
+from source_modelling import srf_reader
 
 PLANE_COUNT_RE = r"PLANE (\d+)"
 POINT_COUNT_RE = r"POINTS (\d+)"
 
 
+class Segments(Sequence):
+    """A read-only view for SRF segments."""
+
+    def __init__(self, header: pd.DataFrame, points: pd.DataFrame):
+        self._header = header
+        self._points = points
+
+    def __getitem__(self, index: int) -> pd.DataFrame:
+        """Get the nth segment in the SRF.
+
+        Parameters
+        ----------
+        index : int
+            The index of the segment.
+
+        Returns
+        -------
+        int
+            The nth segment in the SRF.
+        """
+        if not isinstance(index, int):
+            raise TypeError("Segment index must an integer, not slice or tuple")
+        points_offset = (self._header["nstk"] * self._header["ndip"]).cumsum()
+        if index == 0:
+            return self._points.iloc[: points_offset.iloc[index]]
+        return self._points.iloc[
+            points_offset.iloc[index - 1] : points_offset.iloc[index]
+        ]
+
+    def __len__(self) -> int:
+        """int: The number of segments in the SRF."""
+        return len(self._header)
+
+
 @dataclasses.dataclass
 class SrfFile:
     """
@@ -69,13 +107,83 @@ class SrfFile:
         The version of this SrfFile
     header : pd.DataFrame
         A list of SrfSegment objects representing the header of the SRF file.
+        The columns of the header are:
+
+        - elon: The centre longitude of the plane.
+        - elot: The centre latitude of the plane.
+        - nstk: The number of patches along strike for the plane.
+        - ndip: The number of patches along dip for the plane.
+        - len: The length of the plane (in km).
+        - wid: The width of the plane (in km).
+        - stk: The plane strike.
+        - dip: The plane dip.
+        - dtop: The top of the plane.
+        - dbottom: The bottom of the plane.
+        - shyp: The hypocentre location in strike coordinates.
+        - dhyp: The hypocentre location in dip coordinates.
+
+
     points : pd.DataFrame
-        A list of SrfPoint objects representing the points in the SRF file.
+        A list of SrfPoint objects representing the points in the SRF
+        file. The columns of the points dataframe are:
+
+        - lon: longitude of the patch.
+        - lat: latitude of the patch.
+        - dep: depth of the patch (in kilometres).
+        - stk: local strike.
+        - dip: local dip.
+        - area: area of the patch (in cm^2).
+        - tinit: initial rupture time for this patch (in seconds).
+        - dt: the timestep for all slipt* columns (in seconds).
+        - rake: local rake.
+        - slip1: total slip in the first component.
+        - slip2: total slip in the second component.
+        - slip3: total slip in the third component.
+        - slip: total slip.
+
+        The final two columns are computed from the SRF and are not saved to
+        disk. See the linked documentation on the SRF format for more details.
+
+    slipt{i}_array : csr_array
+        A sparse array containing the ith component of slip for each point and at each timestep, where
+        slipt{i}_array[i, j] is the slip for the ith patch at time t = j * dt. See also: SRFFile.slip.
+
+    References
+    ----------
+    SRF File Format Doc: https://wiki.canterbury.ac.nz/display/QuakeCore/File+Formats+Used+On+GM
     """
 
     version: str
     header: pd.DataFrame
     points: pd.DataFrame
+    slipt1_array: sp.sparse.csr_array
+    slipt2_array: sp.sparse.csr_array
+    slipt3_array: sp.sparse.csr_array
+
+    @property
+    def slip(self):
+        """csr_array: sparse array representing slip in all components."""
+        slip_array = self.slipt1_array.power(2)
+        if self.slipt2_array:
+            slip_array += self.slipt2_array.power(2)
+        if self.slipt3_array:
+            slip_array += self.slipt3_array.power(2)
+        return slip_array.sqrt()
+
+    @property
+    def nt(self):
+        """int: The number of timeslices in the SRF."""
+        return self.slipt1_array.shape[1]
+
+    @property
+    def dt(self):
+        """float: time resolution of SRF."""
+        return self.points["dt"].iloc[0]
+
+    @property
+    def segments(self) -> Segments:
+        """Segments: A sequence of segments in the SRF."""
+        return Segments(self.header, self.points)
 
 
 class SrfParseError(Exception):
@@ -151,50 +259,6 @@ def read_int(srf_file: TextIO, label: Optional[str] = None) -> int:
             raise SrfParseError(f'Expecting int, got: "{int_str}"')
 
 
-def read_srf_point(srf_file: TextIO) -> dict[str, int | float]:
-    """Read a single SRF point from a file handle.
-
-    Parameters
-    ----------
-    srf_file : TextIO
-        The file handle of the SRF file, pointing to the start of the
-        line.
-
-    Returns
-    -------
-    dict[str, int | float]
-        A dictionary containing the values for the point.
-    """
-    # skip optional first spaces
-    row = {
-        "lon": read_float(srf_file, label="lon"),
-        "lat": read_float(srf_file, label="lat"),
-        "dep": read_float(srf_file, label="dep"),
-        "stk": read_float(srf_file, label="stk"),
-        "dip": read_float(srf_file, label="dip"),
-        "area": read_float(srf_file, label="area"),
-        "tinit": read_float(srf_file, label="tinit"),
-        "dt": read_float(srf_file, label="dt"),
-        "rake": read_float(srf_file, label="rake"),
-        "slip1": read_float(srf_file, label="slip1"),
-    }
-    nt1 = read_int(srf_file, label="nt1")
-    row["slip2"] = read_float(srf_file, label="slip2")
-    nt2 = read_int(srf_file, label="nt2")
-    row["slip3"] = read_float(srf_file, label="slip3")
-    nt3 = read_int(srf_file, label="nt3")
-    row["slipt1"] = np.fromiter(
-        (read_float(srf_file, label="slipt1") for _ in range(nt1)), float
-    )
-    row["slipt2"] = np.fromiter(
-        (read_float(srf_file, label="slipt2") for _ in range(nt2)), float
-    )
-    row["slipt3"] = np.fromiter(
-        (read_float(srf_file, label="slipt2") for _ in range(nt3)), float
-    )
-    return row
-
-
 def read_srf(srf_ffp: Path) -> SrfFile:
     """Read an SRF file into an SrfFile object.
 
@@ -209,7 +273,7 @@ def read_srf(srf_ffp: Path) -> SrfFile:
         The filepath of the SRF file.
     """
     with open(srf_ffp, mode="r", encoding="utf-8") as srf_file_handle:
-        version = float(srf_file_handle.readline())
+        version = srf_file_handle.readline().strip()
 
         plane_count_line = srf_file_handle.readline().strip()
         plane_count_match = re.match(PLANE_COUNT_RE, plane_count_line)
@@ -237,6 +301,8 @@ def read_srf(srf_ffp: Path) -> SrfFile:
                 }
             )
         headers = pd.DataFrame(segments)
+        headers["nstk"] = headers["nstk"].astype(int)
+        headers["ndip"] = headers["ndip"].astype(int)
 
         points_count_line = srf_file_handle.readline().strip()
         points_count_match = re.match(POINT_COUNT_RE, points_count_line)
@@ -246,11 +312,33 @@ def read_srf(srf_ffp: Path) -> SrfFile:
             )
         point_count = int(points_count_match.group(1))
 
-        points = pd.DataFrame(
-            (read_srf_point(srf_file_handle) for _ in range(point_count))
+        points_metadata, slipt1_array, slipt2_array, slipt3_array = (
+            srf_reader.read_srf_points(srf_file_handle, point_count)
+        )
+        points_df = pd.DataFrame(
+            data=points_metadata,
+            columns=[
+                "lon",
+                "lat",
+                "dep",
+                "stk",
+                "dip",
+                "area",
+                "tinit",
+                "dt",
+                "rake",
+                "slip1",
+                "slip2",
+                "slip3",
+            ],
         )
 
-        return SrfFile(version, headers, points)
+        points_df["slip"] = np.sqrt(
+            points_df["slip1"] ** 2 + points_df["slip2"] ** 2 + points_df["slip3"] ** 2
+        )
+        return SrfFile(
+            version, headers, points_df, slipt1_array, slipt2_array, slipt3_array
+        )
 
 
 def write_slip(srf_file: TextIO, slips: np.ndarray) -> None:
@@ -271,28 +359,74 @@ def write_slip(srf_file: TextIO, slips: np.ndarray) -> None:
         )
 
 
-def write_srf_point(srf_file: TextIO, point: pd.Series) -> None:
+def write_srf_point(srf_file: TextIO, srf: SrfFile, point: pd.Series) -> None:
     """Write out a single SRF point.
 
     Parameters
     ----------
     srf_file : TextIO
         The SRF file to write to.
+    srf : SrfFile
+        The SRF file object to write.
     point : pd.Series
         The point to write.
     """
+    index = int(point["point_index"])
+    # We need to get the raw slip values for the slip arrays to write out to the SRF.
+    # The slip values for the ith point in the SRF are stored in the ith row of the slip array.
+    # The indptr array contains the indices for each row in the slip array, so:
+    #
+    # - indptr[i] is the start index in the data array for the ith row and
+    # - indptr[i + 1] is the start index in the data array for the (i + 1)th row.
+    #
+    # Hence, data[indptr[i]:indptr[i+1]] collects the slip values for the ith row.
+    # Note we cannot use standard multi-dimensional array indexing arr[i, :]
+    # because scipy sparse arrays do not support this kind of indexing.
+    #
+    # Visually:
+    #
+    # +----+-----+----+---+---+--+
+    # |    |     |    |   |   |  | indptr
+    # +----+---\-+----+---+-\-+--+
+    #           \            \
+    #            \            \
+    # +----------+\-----------+\----------+-------------+
+    # |   ...    |    row i   | row i + 1 |     ...     | data
+    # +----------+------------+-----------+-------------+
+    #            row_i = data[indptr[i]:indptr[i+1]]
+    slipt1 = (
+        srf.slipt1_array.data[
+            srf.slipt1_array.indptr[index] : srf.slipt1_array.indptr[index + 1]
+        ]
+        if srf.slipt1_array is not None
+        else None
+    )
+    slipt2 = (
+        srf.slipt2_array.data[
+            srf.slipt2_array.indptr[index] : srf.slipt2_array.indptr[index + 1]
+        ]
+        if srf.slipt2_array is not None
+        else None
+    )
+    slipt3 = (
+        srf.slipt3_array.data[
+            srf.slipt3_array.indptr[index] : srf.slipt3_array.indptr[index + 1]
+        ]
+        if srf.slipt3_array is not None
+        else None
+    )
     srf_file.write(
         f"{point['lon']:.6f} {point['lat']:.6f} {point['dep']:g} {point['stk']:g} {point['dip']:g} {point['area']:.4E} {point['tinit']:.4f} {point['dt']:.6E}\n"
     )
     srf_file.write(
-        f"{point['rake']:g} {point['slip1']:.4f} {len(point['slipt1'])} {point['slip2']:.4f} {len(point['slipt2'])} {point['slip3']:.4f} {len(point['slipt3'])}\n"
+        f"{point['rake']:g} {point['slip1']:.4f} {len(slipt1) if slipt1 is not None else 0} {point['slip2']:.4f} {len(slipt2) if slipt2 is not None else 0} {point['slip3']:.4f} {len(slipt3) if slipt3 is not None else 0}\n"
     )
-    if len(point["slipt1"]):
-        write_slip(srf_file, point["slipt1"])
-    if len(point["slipt2"]):
-        write_slip(srf_file, point["slipt2"])
-    if len(point["slipt3"]):
-        write_slip(srf_file, point["slipt3"])
+    if slipt1 is not None:
+        write_slip(srf_file, slipt1)
+    if slipt2 is not None:
+        write_slip(srf_file, slipt2)
+    if slipt3 is not None:
+        write_slip(srf_file, slipt3)
 
 
 def write_srf(srf_ffp: Path, srf: SrfFile) -> None:
@@ -308,18 +442,23 @@ def write_srf(srf_ffp: Path, srf: SrfFile) -> None:
     with open(srf_ffp, mode="w", encoding="utf-8") as srf_file_handle:
         srf_file_handle.write("1.0\n")
         srf_file_handle.write(f"PLANE {len(srf.header)}\n")
-        srf_file_handle.write(
-            srf.header.to_string(
-                index=False,
-                header=None,
-                formatters={
-                    "elon": lambda elon: f"{elon:.6f}",
-                    "elat": lambda elat: f"{elat:.6f}",
-                    "len": lambda len: f"{len:.4f}",
-                    "wid": lambda wid: f"{wid:.4f}",
-                },
+        # Cannot use srf.header.to_string because the newline separating headers is significant!
+        # This is ok because the number of headers is typically very small (< 100)
+        for _, plane in srf.header.iterrows():
+            srf_file_handle.write(
+                "\n".join(
+                    [
+                        f"{plane['elon']:.6f} {plane['elat']:.6f} {int(plane['nstk'])} {int(plane['ndip'])} {plane['len']:.4f} {plane['wid']:.4f}",
+                        f"{plane['stk']:.4f} {plane['dip']:.4f} {plane['dtop']:.4f} {plane['shyp']:.4f} {plane['dhyp']:.4f}",
+                        "",
+                    ]
+                )
             )
-            + "\n"
-        )
+
         srf_file_handle.write(f"POINTS {len(srf.points)}\n")
-        srf.points.apply(functools.partial(write_srf_point, srf_file_handle), axis=1)
+        srf.points["point_index"] = np.arange(len(srf.points))
+
+        srf.points.apply(
+            functools.partial(write_srf_point, srf_file_handle, srf), axis=1
+        )
+        srf.points = srf.points.drop(columns="point_index")