From e75e928cbb8c993f4f185e038038e54a39bab0b7 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Thu, 25 Jun 2026 14:04:08 +0200 Subject: [PATCH] fix: suffixes_prefixes_titles always reflects current set state The `suffixes_prefixes_titles` property on `Constants` cached its result in `_pst` after the first access. Any subsequent `add()` or `remove()` call on `titles`, `prefixes`, `suffix_acronyms`, or `suffix_not_acronyms` was silently ignored by the cache, so `is_rootname()` kept returning the stale result. Concretely, a word added to `C.titles` after the property was first accessed would still be treated as a rootname (first/middle/last) by `join_on_conjunctions`, even though `is_title()` correctly returned `True` for it. This contradicts the documented behaviour of per-instance config customisation described in AGENTS.md. Fix: drop the `_pst` cache entirely and compute the union fresh on every access. The four-set union is cheap and the simplest correct approach. Add five tests that assert the property and `is_rootname` stay consistent with the live sets after `add()`/`remove()` calls. --- nameparser/config/__init__.py | 6 +----- tests/test_constants.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index ce9da44..624ef6b 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -179,7 +179,6 @@ class Constants: capitalization_exceptions: TupleManager[str] regexes: RegexTupleManager - _pst: Set[str] | None string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" """ @@ -262,13 +261,10 @@ def __init__(self, self.conjunctions = SetManager(conjunctions) self.capitalization_exceptions = TupleManager(capitalization_exceptions) self.regexes = RegexTupleManager(regexes) - self._pst = None @property def suffixes_prefixes_titles(self) -> Set[str]: - if not self._pst: - self._pst = self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles - return self._pst + return self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles def __repr__(self) -> str: return "" diff --git a/tests/test_constants.py b/tests/test_constants.py index 9f619c3..ca13dc6 100644 --- a/tests/test_constants.py +++ b/tests/test_constants.py @@ -104,3 +104,35 @@ def test_add_constant_with_explicit_encoding(self) -> None: c = Constants() c.titles.add_with_encoding(b'b\351ck', encoding='latin_1') self.assertIn('béck', c.titles) + + def test_suffixes_prefixes_titles_reflects_add_title(self) -> None: + """suffixes_prefixes_titles must include titles added after construction.""" + c = Constants() + c.titles.add('emerita') + self.assertIn('emerita', c.suffixes_prefixes_titles) + + def test_suffixes_prefixes_titles_reflects_add_prefix(self) -> None: + """suffixes_prefixes_titles must include prefixes added after construction.""" + c = Constants() + c.prefixes.add('xpfx') + self.assertIn('xpfx', c.suffixes_prefixes_titles) + + def test_suffixes_prefixes_titles_reflects_remove_title(self) -> None: + """suffixes_prefixes_titles must not include a word that was only in titles and is then removed.""" + c = Constants() + c.titles.add('emerita') + self.assertIn('emerita', c.suffixes_prefixes_titles) + c.titles.remove('emerita') + self.assertFalse('emerita' in c.suffixes_prefixes_titles) + + def test_is_rootname_consistent_with_is_title(self) -> None: + """is_rootname must return False for words recognised by is_title.""" + hn = HumanName("", constants=None) + hn.C.titles.add('emerita') + self.assertFalse(hn.is_rootname('emerita')) + + def test_is_rootname_consistent_with_is_prefix(self) -> None: + """is_rootname must return False for words recognised by is_prefix.""" + hn = HumanName("", constants=None) + hn.C.prefixes.add('xpfx') + self.assertFalse(hn.is_rootname('xpfx'))