Support videos with more bits per channel

Multiple other improvements
zwicker-group · Mar 18, 2024 · b3c58ff · b3c58ff
1 parent f5e4eff
commit b3c58ff
Show file tree

Hide file tree

Showing 6 changed files with 228 additions and 122 deletions.
diff --git a/pde/storage/_ffmpeg.py b/pde/storage/_ffmpeg.py
diff --git a/pde/storage/movie.py b/pde/storage/movie.py
@@ -7,13 +7,9 @@
 .. codeauthor:: David Zwicker <david.zwicker@ds.mpg.de> 
 """
 
-# TODO:
-#     - allow more bits for colorchannels
-#     - track whether times roughly work (checking for frame drops)
-#     - we could embedd extra information (like time, and maybe colorscaling) in
-#       the individual frames if we extended the shape (or we could potentially use
-#       subtitles?)
-
+# TODO: write time as the time stamps (potentially using a factor to convert simulation
+#       time to real time); this might not be possible with rawvideo. An alternative
+#       might be to store the time stamps and apply them later, e.g., using `mkvmerge`
 from __future__ import annotations
 
 import json
@@ -27,12 +23,12 @@
 from matplotlib.colors import Normalize
 from numpy.typing import ArrayLike
 
-from ..fields import FieldCollection, ScalarField
+from ..fields import FieldCollection
 from ..fields.base import DataFieldBase, FieldBase
+from ..tools import ffmpeg as FFmpeg
 from ..tools.docstrings import fill_in_docstring
 from ..tools.misc import module_available
 from ..trackers.interrupts import ConstantInterrupts
-from . import _ffmpeg as FFmpeg
 from .base import InfoDict, StorageBase, StorageTracker, WriteModeType
 
 
@@ -52,8 +48,8 @@ class MovieStorage(StorageBase):
     Warning:
         This storage potentially compresses data and can thus lead to loss of some
         information. The data quality depends on many parameters, but most important are
-        the bit depth of the video format, the range that is encoded (determined by
-        `vmin` and `vmax`), and the target bitrate.
+        the bits per channel of the video format, the range that is encoded (determined
+        by `vmin` and `vmax`), and the target bitrate.
 
         Note also that selecting individual time points might be quite slow since the
         video needs to be read from the beginning each time. Instead, it is much more
@@ -67,6 +63,7 @@ def __init__(
         *,
         vmin: float | ArrayLike = 0,
         vmax: float | ArrayLike = 1,
+        bits_per_channel: int = 8,
         video_format: str = "auto",
         bitrate: int = -1,
         info: InfoDict | None = None,
@@ -77,31 +74,38 @@ def __init__(
         Args:
             filename (str):
                 The path where the movie is stored. The file extension determines the
-                container format of the movie.
+                container format of the movie. The standard codec FFV1 plays well with
+                the ".avi" and ".mkv" container format.
             vmin (float or array):
-                Lowest values that are encoded (per field). Lower values are clipped to
-                this value.
+                Lowest values that are encoded (per field). Smaller values are clipped
+                to this value.
             vmax (float or array):
                 Highest values that are encoded (per field). Larger values are clipped
                 to this value.
+            bits_per_channel (int):
+                The number of bits used per color channel. Typical values are 8 and 16.
+                The relative accuracy of stored values is 0.01 and 0.0001, respectively.
             video_format (str):
-                How to write data to the movie. This determines the number of color
-                channels and the bit depth of individual colors. Available options are
-                listed in :func:`~pde.storage._ffmpeg.formats`. The special value
-                `auto` tries to find a suitable format automatically.
+                Identifier for a video format from :data:`~pde.tools.ffmpeg.formats`,
+                which determines the number of channels, the bit depth of individual
+                colors, and the codec. The special value `auto` tries to find a suitable
+                format automatically, taking `bits_per_channel` into account.
             bitrate (float):
                 The bitrate of the movie (in kilobits per second). The default value of
-                -1 let's the encode choose an appropriate bit rate.
+                -1 let's the encoder choose an appropriate bit rate.
             info (dict):
-                Supplies extra information that is stored in the storage
+                Supplies extra information that is stored in the storage alongside
+                additional information necessary to reconstruct fields and grids.
             write_mode (str):
                 Determines how new data is added to already existing data. Possible
                 values are: 'append' (data is always appended), 'truncate' (data is
                 cleared every time this storage is used for writing), or 'truncate_once'
                 (data is cleared for the first writing, but appended subsequently).
                 Alternatively, specifying 'readonly' will disable writing completely.
             loglevel (str):
-                FFmpeg log level
+                FFmpeg log level determining the amount of data sent to stdout. The
+                default only emits warnings and errors, but setting this to `"info"` can
+                be useful to get additioanl information about the encoding.
         """
         if not module_available("ffmpeg"):
             raise ModuleNotFoundError("`MovieStorage` needs `ffmpeg-python` package")
@@ -110,6 +114,7 @@ def __init__(
         self.filename = Path(filename)
         self.vmin = vmin
         self.vmax = vmax
+        self.bits_per_channel = bits_per_channel
         self.video_format = video_format
         self.bitrate = bitrate
         self.loglevel = loglevel
@@ -257,10 +262,14 @@ def start_writing(self, field: FieldBase, info: InfoDict | None = None) -> None:
 
         # get color channel information
         if self.video_format == "auto":
-            if isinstance(field, ScalarField):
-                self.info["video_format"] = "gray"
-            else:
-                self.info["video_format"] = "rgb24"
+            channels = field._data_flat.shape[0]
+            video_format = FFmpeg.find_format(channels, self.bits_per_channel)
+            if video_format is None:
+                raise RuntimeError(
+                    f"Could not find a video format with {channels} channels and "
+                    f"{self.bits_per_channel} bits per channel."
+                )
+            self.info["video_format"] = video_format
         else:
             self.info["video_format"] = self.video_format
         self._format = FFmpeg.formats[self.info["video_format"]]
@@ -283,12 +292,13 @@ def start_writing(self, field: FieldBase, info: InfoDict | None = None) -> None:
         # set output format
         output_args = {
             "vcodec": self._format.codec,
-            "crf": "0",  # Constant Rate Factor - lower values for less compression
             "pix_fmt": self._format.pix_fmt_file,
             "metadata": "comment=" + shlex.quote(self._get_metadata()),
         }
         if "264" in self._format.codec:
-            # make the H.264 codec use the full color range
+            # set extra options for the H.264 codec
+            output_args["crf"] = "0"  # Constant Rate Factor (lower = less compression)
+            # make the H.264 codec use the full color range:
             output_args["bsf"] = "h264_metadata=video_full_range_flag=1"
         if self.bitrate > 0:
             # set the specified bitrate
@@ -438,6 +448,7 @@ def __iter__(self) -> Iterator[FieldBase]:
         frame_shape = (self.info["width"], self.info["height"], self._format.channels)
         data_shape = (len(self._norms), self.info["width"], self.info["height"])
         data = np.empty(data_shape, dtype=self._dtype)
+        frame_bytes = np.prod(frame_shape) * self._format.bytes_per_channel
 
         # iterate over entire movie
         f_input = ffmpeg.input(self.filename, loglevel=self.loglevel)
@@ -446,7 +457,7 @@ def __iter__(self) -> Iterator[FieldBase]:
         )
         proc = f_output.run_async(pipe_stdout=True)
         while True:
-            read_bytes = proc.stdout.read(np.prod(frame_shape))
+            read_bytes = proc.stdout.read(frame_bytes)
             if not read_bytes:
                 break
             frame = np.frombuffer(read_bytes, self._format.dtype).reshape(frame_shape)

diff --git a/pde/tools/ffmpeg.py b/pde/tools/ffmpeg.py
@@ -0,0 +1,122 @@
+"""
+Functions for interacting with FFmpeg
+
+.. codeauthor:: David Zwicker <david.zwicker@ds.mpg.de>
+"""
+
+from dataclasses import dataclass
+
+# import subprocess as sp
+from typing import Optional
+
+import numpy as np
+from numpy.typing import DTypeLike
+
+# def _run_ffmpeg(args: list[str]):
+#     return sp.check_output(["ffmpeg"] + args)
+#
+#
+# def codecs() -> list[str]:
+#     """list: all supported ffmpeg codecs"""
+#     res = _run_ffmpeg(["-codecs"])
+#
+#
+# def get_pixel_formats(encoder=None):
+#     if encoder is None:
+#         res = _run_ffmpeg(["-pix_fmts"])
+#     else:
+#         res = _run_ffmpeg(["-h", f"encoder={encoder}"])
+
+
+@dataclass
+class FFmpegFormat:
+    """defines a FFmpeg format used for storing field data in a video"""
+
+    pix_fmt_file: str
+    channels: int
+    bits_per_channel: int
+    codec: str = "ffv1"
+
+    @property
+    def pix_fmt_data(self) -> str:
+        """return a suitable pixel format for the field data"""
+        if self.bits_per_channel == 8:
+            if self.channels == 1:
+                return "gray"
+            elif self.channels == 3:
+                return "rgb24"
+            elif self.channels == 4:
+                return "rgba"
+            else:
+                raise NotImplementedError(f"Cannot deal with {self.channels} channels")
+        elif self.bits_per_channel == 16:
+            if self.channels == 1:
+                return "gray16le"
+            elif self.channels == 3:
+                return "gbrp16le"
+            elif self.channels == 4:
+                return "rgba64le"
+            else:
+                raise NotImplementedError(f"Cannot deal with {self.channels} channels")
+        else:
+            raise NotImplementedError(f"Cannot use {self.bits_per_channel} bits")
+
+    @property
+    def bytes_per_channel(self) -> int:
+        return self.bits_per_channel // 8
+
+    @property
+    def dtype(self) -> DTypeLike:
+        if self.bits_per_channel == 8:
+            return np.uint8
+        elif self.bits_per_channel == 16:
+            return np.uint16
+        else:
+            raise NotImplementedError(f"Cannot use {self.bits_per_channel} bits")
+
+    @property
+    def value_max(self) -> int:
+        return 2**self.bits_per_channel - 1  # type: ignore
+
+    def data_to_frame(self, normalized_data: np.ndarray) -> np.ndarray:
+        return (normalized_data * self.value_max).astype(self.dtype)
+
+    def data_from_frame(self, frame_data: np.ndarray):
+        return frame_data.astype(float) / self.value_max
+
+
+formats = {
+    "gray": FFmpegFormat(pix_fmt_file="gray", channels=1, bits_per_channel=8),
+    "rgb24": FFmpegFormat(pix_fmt_file="rgb24", channels=3, bits_per_channel=8),
+    "rgb32": FFmpegFormat(pix_fmt_file="rgb32", channels=4, bits_per_channel=8),
+    "gray16le": FFmpegFormat(pix_fmt_file="gray16le", channels=1, bits_per_channel=16),
+    "gbrp16le": FFmpegFormat(pix_fmt_file="gbrp16le", channels=3, bits_per_channel=16),
+    "rgba64le": FFmpegFormat(pix_fmt_file="rgba64le", channels=4, bits_per_channel=16),
+}
+
+
+def find_format(channels: int, bits_per_channel: int = 8) -> Optional[str]:
+    """find a defined FFmpegFormat that satisifies the requirements
+
+    Args:
+        channels (int):
+            Minimal number of color channels
+        bits_per_channel (int):
+            Minimal number of bits per channel
+
+    Returns:
+        str: Identifier for a format that satisifies the requirements (but might have
+        more channels or more bits per channel then requested. `None` is returned if no
+        format can be identified.
+    """
+    n_best, f_best = None, None
+    for n, f in formats.items():  # iterate through all defined formats
+        if f.channels >= channels and f.bits_per_channel >= bits_per_channel:
+            # this format satisfies the requirements
+            if (
+                f_best is None
+                or f.bits_per_channel < f_best.bits_per_channel
+                or f.channels < f_best.channels
+            ):
+                n_best, f_best = n, f
+    return n_best
diff --git a/tests/storage/test_generic_storages.py b/tests/storage/test_generic_storages.py
@@ -32,7 +32,7 @@ def storage_factory(tmp_path, storage_class):
         # provide factory that initializes a MovieStorage with a file
         if not module_available("ffmpeg"):
             pytest.skip("No module `ffmpeg-python`")
-        file_path = tmp_path / "test_storage_write.mp4"
+        file_path = tmp_path / "test_storage_write.avi"
         return functools.partial(MovieStorage, file_path, vmax=5)
 
     # simply return the storage class assuming it is a factory function already