Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions xrspatial/geotiff/_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@
TAG_GEO_ASCII_PARAMS = 34737


# Human-readable names for tags that turn up in error messages. Only
# tags that legitimately use RATIONAL / SRATIONAL are interesting here;
# anything else gets formatted as the bare numeric id.
_TAG_NAMES = {
TAG_X_RESOLUTION: "XResolution",
TAG_Y_RESOLUTION: "YResolution",
TAG_RESOLUTION_UNIT: "ResolutionUnit",
}


def _tag_label(tag: int | None) -> str:
"""Format a tag id for error messages."""
if tag is None:
return "<unknown>"
name = _TAG_NAMES.get(tag)
if name is not None:
return f"{name} (tag={tag})"
return f"tag={tag}"


@dataclass
class TIFFHeader:
"""Parsed TIFF file header."""
Expand Down Expand Up @@ -427,8 +447,15 @@ def parse_header(data: bytes | memoryview) -> TIFFHeader:


def _read_value(data: bytes | memoryview, offset: int, type_id: int,
count: int, bo: str) -> Any:
"""Read a typed value array from data at the given offset."""
count: int, bo: str, tag: int | None = None) -> Any:
"""Read a typed value array from data at the given offset.

A zero-denominator RATIONAL or SRATIONAL is rejected with a
`ValueError` rather than silently coerced to 0.0. The TIFF spec
treats denominator-zero rationals as malformed, and quietly mapping
them to 0.0 lets corrupted `XResolution` / `YResolution` metadata
round-trip through the reader as if the file were valid.
"""
type_size = TIFF_TYPE_SIZES.get(type_id, 1)

if type_id == ASCII:
Expand All @@ -445,7 +472,13 @@ def _read_value(data: bytes | memoryview, offset: int, type_id: int,
off = offset + i * 8
num = struct.unpack_from(f'{bo}I', data, off)[0]
den = struct.unpack_from(f'{bo}I', data, off + 4)[0]
values.append(num / den if den != 0 else 0.0)
if den == 0:
raise ValueError(
f"Malformed RATIONAL on {_tag_label(tag)}: "
f"numerator={num} denominator={den} at element {i}; "
f"refusing to parse possibly malformed TIFF"
)
values.append(num / den)
return tuple(values) if count > 1 else values[0]

if type_id == SRATIONAL:
Expand All @@ -454,7 +487,13 @@ def _read_value(data: bytes | memoryview, offset: int, type_id: int,
off = offset + i * 8
num = struct.unpack_from(f'{bo}i', data, off)[0]
den = struct.unpack_from(f'{bo}i', data, off + 4)[0]
values.append(num / den if den != 0 else 0.0)
if den == 0:
raise ValueError(
f"Malformed SRATIONAL on {_tag_label(tag)}: "
f"numerator={num} denominator={den} at element {i}; "
f"refusing to parse possibly malformed TIFF"
)
values.append(num / den)
return tuple(values) if count > 1 else values[0]

fmt_char = TIFF_TYPE_STRUCT_CODES.get(type_id)
Expand Down Expand Up @@ -633,7 +672,7 @@ def parse_ifd(data: bytes | memoryview, offset: int,
# cap still fires.
continue
try:
dims[tag] = _read_value(data, value_area_offset, type_id, count, bo)
dims[tag] = _read_value(data, value_area_offset, type_id, count, bo, tag=tag)
except (struct.error, ValueError):
continue

Expand Down Expand Up @@ -686,7 +725,7 @@ def parse_ifd(data: bytes | memoryview, offset: int,
)

if total_size <= inline_max:
value = _read_value(data, value_area_offset, type_id, count, bo)
value = _read_value(data, value_area_offset, type_id, count, bo, tag=tag)
else:
if is_big:
ptr = struct.unpack_from(f'{bo}Q', data, value_area_offset)[0]
Expand All @@ -702,7 +741,7 @@ def parse_ifd(data: bytes | memoryview, offset: int,
f"[{ptr}, {ptr + total_size}) exceeds file length "
f"{data_len}"
)
value = _read_value(data, ptr, type_id, count, bo)
value = _read_value(data, ptr, type_id, count, bo, tag=tag)

entries[tag] = IFDEntry(tag=tag, type_id=type_id, count=count, value=value)

Expand Down
37 changes: 28 additions & 9 deletions xrspatial/geotiff/tests/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import pytest

from xrspatial.geotiff._dtypes import RATIONAL, SRATIONAL
from xrspatial.geotiff._header import (IFD, TAG_IMAGE_WIDTH, _read_value, parse_all_ifds,
parse_header, parse_ifd)
from xrspatial.geotiff._header import (IFD, TAG_IMAGE_WIDTH, TAG_X_RESOLUTION, TAG_Y_RESOLUTION,
_read_value, parse_all_ifds, parse_header, parse_ifd)

from .conftest import make_minimal_tiff

Expand Down Expand Up @@ -250,16 +250,35 @@ def test_self_loop_raises(self):
class TestReadValueRationals:
"""T-8 coverage for RATIONAL / SRATIONAL edge cases in _read_value."""

def test_rational_denominator_zero_returns_zero(self):
# numerator=5, denominator=0 -- by convention return 0.0
def test_rational_denominator_zero_raises(self):
# numerator=5, denominator=0 -- malformed, reject instead of
# silently mapping to 0.0 (issue #2313).
buf = struct.pack('<II', 5, 0)
result = _read_value(buf, 0, RATIONAL, 1, '<')
assert result == 0.0
with pytest.raises(ValueError, match="Malformed RATIONAL"):
_read_value(buf, 0, RATIONAL, 1, '<')

def test_rational_denominator_zero_names_tag(self):
# When the tag is known, the error message names it.
buf = struct.pack('<II', 5, 0)
with pytest.raises(ValueError, match="XResolution"):
_read_value(buf, 0, RATIONAL, 1, '<', tag=TAG_X_RESOLUTION)

def test_srational_denominator_zero_returns_zero(self):
def test_srational_denominator_zero_raises(self):
buf = struct.pack('<ii', -3, 0)
result = _read_value(buf, 0, SRATIONAL, 1, '<')
assert result == 0.0
with pytest.raises(ValueError, match="Malformed SRATIONAL"):
_read_value(buf, 0, SRATIONAL, 1, '<')

def test_srational_denominator_zero_names_tag(self):
buf = struct.pack('<ii', -3, 0)
with pytest.raises(ValueError, match="YResolution"):
_read_value(buf, 0, SRATIONAL, 1, '<', tag=TAG_Y_RESOLUTION)

def test_rational_denominator_zero_in_array_raises(self):
# Second element has denominator=0; the failure points at the
# offending element index.
buf = struct.pack('<IIII', 1, 1, 5, 0)
with pytest.raises(ValueError, match="element 1"):
_read_value(buf, 0, RATIONAL, 2, '<')

def test_rational_normal_value(self):
buf = struct.pack('<II', 22, 7)
Expand Down
176 changes: 176 additions & 0 deletions xrspatial/geotiff/tests/test_rational_zero_denominator_2313.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""Regression tests for issue #2313.

A RATIONAL or SRATIONAL tag with a zero denominator is malformed by
the TIFF spec. The reader used to coerce it to 0.0 silently, which
let corrupted `XResolution` / `YResolution` metadata round-trip as if
the file were valid. After the fix the reader raises `ValueError`
with the tag name and the denominator in the message.
"""
from __future__ import annotations

import io
import struct

import pytest

from xrspatial.geotiff import open_geotiff
from xrspatial.geotiff._dtypes import RATIONAL, SRATIONAL
from xrspatial.geotiff._header import (TAG_X_RESOLUTION, TAG_Y_RESOLUTION,
parse_all_ifds, parse_header)


def _build_tiff_with_malformed_resolution(numerator: int, denominator: int,
*, which: int = TAG_X_RESOLUTION,
srational: bool = False) -> bytes:
"""Build a minimal little-endian TIFF whose `which` tag is a single
RATIONAL (or SRATIONAL) pointing at `(numerator, denominator)`.

`which` should be `TAG_X_RESOLUTION` or `TAG_Y_RESOLUTION`. The
other resolution tag is filled with a valid 72/1 so the IFD is
only malformed in the one place the test cares about.

The file lays out:
- 8-byte TIFF header
- IFD with the minimum tags a parser will accept plus
X/YResolution; rationals are 8 bytes so they live in an
overflow block after the IFD entry table
- 16 bytes of strip data so StripOffsets / StripByteCounts
are consistent
"""
if which not in (TAG_X_RESOLUTION, TAG_Y_RESOLUTION):
raise ValueError(
f"which must be TAG_X_RESOLUTION or TAG_Y_RESOLUTION, got {which}"
)

bo = '<'
rat_type = SRATIONAL if srational else RATIONAL
rat_fmt = f'{bo}ii' if srational else f'{bo}II'

# The malformed rational goes on `which`; the other resolution tag
# gets a healthy 72/1 so it doesn't trip the parser first.
if which == TAG_X_RESOLUTION:
xres = struct.pack(rat_fmt, numerator, denominator)
yres = struct.pack(rat_fmt, 72, 1)
else:
xres = struct.pack(rat_fmt, 72, 1)
yres = struct.pack(rat_fmt, numerator, denominator)

out = bytearray()
# Header: little-endian, classic TIFF, first IFD at offset 8.
out.extend(b'II')
out.extend(struct.pack(f'{bo}H', 42))
out.extend(struct.pack(f'{bo}I', 8))

tags = [
# (tag, type_id, count, raw_bytes)
(256, 3, 1, struct.pack(f'{bo}H', 4)), # ImageWidth
(257, 3, 1, struct.pack(f'{bo}H', 4)), # ImageLength
(258, 3, 1, struct.pack(f'{bo}H', 8)), # BitsPerSample
(259, 3, 1, struct.pack(f'{bo}H', 1)), # Compression
(262, 3, 1, struct.pack(f'{bo}H', 1)), # PhotometricInterpretation
(273, 4, 1, b'\x00\x00\x00\x00'), # StripOffsets (patched)
(277, 3, 1, struct.pack(f'{bo}H', 1)), # SamplesPerPixel
(278, 3, 1, struct.pack(f'{bo}H', 4)), # RowsPerStrip
(279, 4, 1, struct.pack(f'{bo}I', 16)), # StripByteCounts
(TAG_X_RESOLUTION, rat_type, 1, xres),
(TAG_Y_RESOLUTION, rat_type, 1, yres),
]
tags.sort(key=lambda t: t[0])

num_entries = len(tags)
ifd_start = 8
ifd_size = 2 + 12 * num_entries + 4
overflow_start = ifd_start + ifd_size

# Lay out overflow values for any tag whose raw bytes exceed 4.
overflow_buf = bytearray()
tag_offsets: dict[int, int | None] = {}
for tag, typ, count, raw in tags:
if len(raw) > 4:
tag_offsets[tag] = len(overflow_buf)
overflow_buf.extend(raw)
if len(overflow_buf) % 2:
overflow_buf.append(0)
else:
tag_offsets[tag] = None

pixel_data_start = overflow_start + len(overflow_buf)
# Patch StripOffsets to point at the actual pixel block.
patched = []
for tag, typ, count, raw in tags:
if tag == 273:
patched.append((tag, typ, count,
struct.pack(f'{bo}I', pixel_data_start)))
else:
patched.append((tag, typ, count, raw))
tags = patched

# IFD entries.
out.extend(struct.pack(f'{bo}H', num_entries))
for tag, typ, count, raw in tags:
out.extend(struct.pack(f'{bo}HHI', tag, typ, count))
if len(raw) <= 4:
out.extend(raw.ljust(4, b'\x00'))
else:
ptr = overflow_start + tag_offsets[tag]
out.extend(struct.pack(f'{bo}I', ptr))
# Next IFD pointer (none).
out.extend(struct.pack(f'{bo}I', 0))
# Overflow block.
out.extend(overflow_buf)
# Pad to pixel_data_start.
while len(out) < pixel_data_start:
out.append(0)
# 16 bytes of pixel payload (matches StripByteCounts above).
out.extend(b'\x00' * 16)

return bytes(out)


class TestRationalZeroDenominator:
"""Issue #2313: zero-denominator rationals must fail loudly."""

def test_rational_zero_denominator_surfaces_from_parse_all_ifds(self):
data = _build_tiff_with_malformed_resolution(72, 0)
header = parse_header(data)
with pytest.raises(ValueError, match="XResolution"):
parse_all_ifds(data, header)

def test_rational_zero_denominator_message_includes_denominator(self):
data = _build_tiff_with_malformed_resolution(150, 0)
header = parse_header(data)
with pytest.raises(ValueError) as exc:
parse_all_ifds(data, header)
message = str(exc.value)
assert "Malformed RATIONAL" in message
assert "XResolution" in message
assert "denominator=0" in message
assert "numerator=150" in message

def test_srational_zero_denominator_surfaces_from_parse_all_ifds(self):
data = _build_tiff_with_malformed_resolution(-5, 0, srational=True)
header = parse_header(data)
with pytest.raises(ValueError, match="Malformed SRATIONAL"):
parse_all_ifds(data, header)

def test_rational_zero_denominator_fails_open_geotiff(self):
# The public read entry point should fail loudly too, not just
# the low-level header parser.
data = _build_tiff_with_malformed_resolution(72, 0)
buf = io.BytesIO(data)
with pytest.raises(ValueError, match="XResolution"):
open_geotiff(buf)

def test_yresolution_zero_denominator_named_in_error(self):
# Same path, different tag.
data = _build_tiff_with_malformed_resolution(
72, 0, which=TAG_Y_RESOLUTION
)
header = parse_header(data)
with pytest.raises(ValueError, match="YResolution"):
parse_all_ifds(data, header)

# Sanity check: the helpers we use to assert tag ids actually exist.
def test_tag_constants_present(self):
assert TAG_X_RESOLUTION == 282
assert TAG_Y_RESOLUTION == 283
Loading