From f9d6179cfd24d56165b6ea60b7c040d517e59e7a Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 25 Jun 2026 11:47:16 -0700 Subject: [PATCH 1/2] Stop a short-row PAM RAT from crashing open_geotiff read_pam_sidecar() is contracted to never raise on a bad sidecar. open_geotiff() reads the PAM .aux.xml for any local string source, and a malformed one should degrade to {} rather than break the read. _parse_rat() indexes RAT cells by column index (fields[name_col] and friends). A with fewer cells than the highest column index raises IndexError, which the read_pam_sidecar() except clause did not cover (it caught OSError, ValueError, TypeError). The IndexError escaped and crashed open_geotiff for any TIFF carrying such a sidecar. Add IndexError to the caught exceptions so a short-row RAT falls back to {} like every other malformed case. Closes #3520 --- xrspatial/geotiff/_pam.py | 7 +++++- .../tests/test_rasterize_categorical_3482.py | 25 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/xrspatial/geotiff/_pam.py b/xrspatial/geotiff/_pam.py index aa7255bca..51a773abf 100644 --- a/xrspatial/geotiff/_pam.py +++ b/xrspatial/geotiff/_pam.py @@ -144,9 +144,14 @@ def read_pam_sidecar(path): if colors is not None: out['category_colors'] = colors return out - except (OSError, ValueError, TypeError): + except (OSError, ValueError, TypeError, IndexError): # A missing, malformed, or foreign sidecar is non-fatal auxiliary # metadata, not a read error -- never let it break open_geotiff. + # IndexError covers a thematic RAT whose carries fewer + # cells than its highest column index (e.g. a Name column at index + # 1 paired with a single-cell row), which would otherwise escape + # _parse_rat and crash the open_geotiff call that reads the + # adjacent sidecar. return {} diff --git a/xrspatial/tests/test_rasterize_categorical_3482.py b/xrspatial/tests/test_rasterize_categorical_3482.py index 2aa1e26bf..507340578 100644 --- a/xrspatial/tests/test_rasterize_categorical_3482.py +++ b/xrspatial/tests/test_rasterize_categorical_3482.py @@ -257,6 +257,31 @@ def test_malformed_sidecar_returns_empty(self, tmp_path): # Must never raise; worst case returns {}. assert isinstance(read_pam_sidecar(path), dict) + def test_short_row_thematic_rat_returns_empty(self, tmp_path): + """A thematic RAT row with fewer cells than the Name column + index must not crash read_pam_sidecar. + + The Name column sits at index 1 (after a Value column at index 0), + but the row carries a single cell, so indexing the name cell raises + IndexError inside _parse_rat. read_pam_sidecar must swallow it and + fall back to {} like every other malformed-sidecar case, since + open_geotiff reads this sidecar for any local string source. + """ + from xrspatial.geotiff._pam import read_pam_sidecar + path = str(tmp_path / 'short_row.tif') + with open(path + '.aux.xml', 'w') as fh: + fh.write('' + '' + 'Value0' + '5' + 'Class2' + '2' + '0' + '' + '') + # Must never raise; worst case returns {}. + assert read_pam_sidecar(path) == {} + # --------------------------------------------------------------------------- # GPU backend parity From 0a5d8f8122fa9bb7d0d89ebf184077db1937d4ad Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 25 Jun 2026 12:59:09 -0700 Subject: [PATCH 2/2] Also swallow XML ParseError in read_pam_sidecar The IndexError fix left a sibling gap: safe_fromstring() raises xml.etree.ElementTree.ParseError on any non-well-formed sidecar, and ParseError subclasses SyntaxError, so it was not covered by the (OSError, ValueError, TypeError, IndexError) tuple. A truncated or corrupt .aux.xml therefore still escaped read_pam_sidecar and crashed the open_geotiff call -- the same contract violation, reached through a different exception and far easier to trigger than a hand-crafted short RAT row. Add ParseError to the caught exceptions (it is the same class for both the defusedxml and stdlib parse paths) so a non-well-formed sidecar degrades to {} like every other malformed case. Adds test_non_well_formed_xml_sidecar_returns_empty, which fails with ParseError before the change. Surfaced by the follow-up review on this PR. --- xrspatial/geotiff/_pam.py | 8 ++++++-- .../tests/test_rasterize_categorical_3482.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/xrspatial/geotiff/_pam.py b/xrspatial/geotiff/_pam.py index 51a773abf..6b52b9894 100644 --- a/xrspatial/geotiff/_pam.py +++ b/xrspatial/geotiff/_pam.py @@ -15,6 +15,7 @@ from __future__ import annotations import os +from xml.etree.ElementTree import ParseError from xml.sax.saxutils import escape from ._safe_xml import safe_fromstring @@ -144,14 +145,17 @@ def read_pam_sidecar(path): if colors is not None: out['category_colors'] = colors return out - except (OSError, ValueError, TypeError, IndexError): + except (OSError, ValueError, TypeError, IndexError, ParseError): # A missing, malformed, or foreign sidecar is non-fatal auxiliary # metadata, not a read error -- never let it break open_geotiff. # IndexError covers a thematic RAT whose carries fewer # cells than its highest column index (e.g. a Name column at index # 1 paired with a single-cell row), which would otherwise escape # _parse_rat and crash the open_geotiff call that reads the - # adjacent sidecar. + # adjacent sidecar. ParseError covers a truncated or otherwise + # non-well-formed sidecar: safe_fromstring raises it (a SyntaxError + # subclass, so not covered by the types above) and it would + # likewise escape and crash the read. return {} diff --git a/xrspatial/tests/test_rasterize_categorical_3482.py b/xrspatial/tests/test_rasterize_categorical_3482.py index 507340578..4cce25899 100644 --- a/xrspatial/tests/test_rasterize_categorical_3482.py +++ b/xrspatial/tests/test_rasterize_categorical_3482.py @@ -282,6 +282,21 @@ def test_short_row_thematic_rat_returns_empty(self, tmp_path): # Must never raise; worst case returns {}. assert read_pam_sidecar(path) == {} + def test_non_well_formed_xml_sidecar_returns_empty(self, tmp_path): + """A truncated / non-well-formed .aux.xml must not crash the read. + + safe_fromstring raises xml.etree.ElementTree.ParseError, which is a + SyntaxError subclass (not an OSError/ValueError/TypeError/IndexError), + so it would otherwise escape read_pam_sidecar and break the + open_geotiff call that reads the sidecar for any local string source. + """ + from xrspatial.geotiff._pam import read_pam_sidecar + path = str(tmp_path / 'truncated.tif') + with open(path + '.aux.xml', 'w') as fh: + fh.write('oops') + # Must never raise; worst case returns {}. + assert read_pam_sidecar(path) == {} + # --------------------------------------------------------------------------- # GPU backend parity