DataFog · sidmohan0 · Jul 3, 2026
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 4.7.0
+current_version = 4.7.1
 commit = True
 tag = True
 tag_name = v{new_version}

diff --git a/CHANGELOG.MD b/CHANGELOG.MD
@@ -2,6 +2,28 @@
 
 ## [2026-07-02]
 
+### `datafog-python` [4.7.1]
+
+#### Behavior change (precision)
+
+- **Bare digit runs are no longer treated as SSN or PHONE by default.**
+  A nine-digit run with no delimiter and a ten-digit run with no formatting
+  are no longer matched as `SSN`/`PHONE`; delimited SSNs (`NNN-NN-NNNN`,
+  `NNN NN NNNN`) and formatted phone numbers (separators, parentheses, or a
+  `+country` prefix) still match. This fixes the dominant false-positive
+  class in structured tool output — tab ids, database row ids, epoch
+  timestamps, ticket numbers — reported in
+  [#158](https://github.com/DataFog/datafog-python/issues/158). Spaces are
+  now also accepted as an SSN delimiter.
+- **Migration:** to restore the previous behavior (undelimited nine-digit
+  SSNs and bare ten-digit phone numbers), pass `strict_numeric=False` to
+  `scan()` / `redact()`. The Claude Code hook and LiteLLM guardrail run in
+  strict mode; the hook additionally supports an
+  `^\d{9}$|^\d{10}$` allowlist pattern as belt-and-braces.
+- Broader structural validation (SSA area/group ranges, NANP area/exchange
+  rules) is deferred to the v5 validator layer; this release only changes
+  the delimiter/formatting requirement.
+
 ### `datafog-python` [4.7.0]
 
 #### Added

diff --git a/datafog/__about__.py b/datafog/__about__.py
@@ -1 +1 @@
-__version__ = "4.7.0"
+__version__ = "4.7.1"
diff --git a/datafog/__init__.py b/datafog/__init__.py
@@ -155,6 +155,7 @@ def scan(
     locales: list[str] | None = None,
     allowlist: list[str] | None = None,
     allowlist_patterns: list[str] | None = None,
+    strict_numeric: bool = True,
 ) -> ScanResult:
     """
     v5-preview scan entrypoint.
@@ -165,7 +166,10 @@ def scan(
     ``allowlist`` exempts exact entity texts (your own support address, doc
     placeholders); ``allowlist_patterns`` exempts entities whose full text
     matches a regex (e.g. ``^\\d{10}$`` so unix timestamps stop matching as
-    phone numbers).
+    phone numbers). ``strict_numeric`` (default True) requires SSNs to be
+    delimited and phone numbers to carry formatting cues, so bare digit runs
+    (tab ids, row ids, timestamps) are not flagged; set False to also detect
+    undelimited nine-digit SSNs and bare ten-digit phone numbers.
     """
     return _scan(
         text=text,
@@ -174,6 +178,7 @@ def scan(
         locales=locales,
         allowlist=allowlist,
         allowlist_patterns=allowlist_patterns,
+        strict_numeric=strict_numeric,
     )
 
 
@@ -187,6 +192,7 @@ def redact(
     locales: list[str] | None = None,
     allowlist: list[str] | None = None,
     allowlist_patterns: list[str] | None = None,
+    strict_numeric: bool = True,
 ) -> RedactResult:
     """
     v5-preview redaction entrypoint.
@@ -195,7 +201,9 @@ def redact(
     using the selected engine and redact the detected entities. ``allowlist``
     and ``allowlist_patterns`` exempt findings from redaction (exact text and
     full-text regex match respectively); they apply to the scan path and are
-    rejected when explicit ``entities`` are supplied.
+    rejected when explicit ``entities`` are supplied. ``strict_numeric``
+    matches ``scan``: bare digit runs are not treated as SSN/PHONE unless it
+    is set False.
     """
     if preset is not None:
         try:
@@ -220,6 +228,7 @@ def redact(
         locales=locales,
         allowlist=allowlist,
         allowlist_patterns=allowlist_patterns,
+        strict_numeric=strict_numeric,
     )
 
 

diff --git a/datafog/__init___lean.py b/datafog/__init___lean.py
@@ -15,15 +15,10 @@
 """
 
 from .__about__ import __version__
-
 # Core imports - always available
 from .models.annotator import AnnotationResult, AnnotatorRequest
-from .models.anonymizer import (
-    AnonymizationResult,
-    Anonymizer,
-    AnonymizerRequest,
-    AnonymizerType,
-)
+from .models.anonymizer import (AnonymizationResult, Anonymizer,
+                                AnonymizerRequest, AnonymizerType)
 from .models.common import EntityTypes
 from .processing.text_processing.regex_annotator import RegexAnnotator
 

diff --git a/datafog/__init___original.py b/datafog/__init___original.py
@@ -11,23 +11,18 @@
 from .client import app
 from .config import OperationType, get_config
 from .main import DataFog, TextPIIAnnotator
-from .models.annotator import (
-    AnalysisExplanation,
-    AnnotationResult,
-    AnnotationResultWithAnaysisExplanation,
-    AnnotatorRequest,
-)
-from .models.anonymizer import (
-    AnonymizationResult,
-    Anonymizer,
-    AnonymizerRequest,
-    AnonymizerType,
-)
-from .models.common import AnnotatorMetadata, EntityTypes, Pattern, PatternRecognizer
+from .models.annotator import (AnalysisExplanation, AnnotationResult,
+                               AnnotationResultWithAnaysisExplanation,
+                               AnnotatorRequest)
+from .models.anonymizer import (AnonymizationResult, Anonymizer,
+                                AnonymizerRequest, AnonymizerType)
+from .models.common import (AnnotatorMetadata, EntityTypes, Pattern,
+                            PatternRecognizer)
 from .models.spacy_nlp import SpacyAnnotator
 from .processing.image_processing.donut_processor import DonutProcessor
 from .processing.image_processing.image_downloader import ImageDownloader
-from .processing.image_processing.pytesseract_processor import PytesseractProcessor
+from .processing.image_processing.pytesseract_processor import \
+    PytesseractProcessor
 from .processing.text_processing.spacy_pii_annotator import SpacyPIIAnnotator
 from .services.image_service import ImageService
 from .services.spark_service import SparkService

diff --git a/datafog/client.py b/datafog/client.py
@@ -199,9 +199,8 @@ def download_model(
 
     elif engine == "gliner":
         try:
-            from datafog.processing.text_processing.gliner_annotator import (
-                GLiNERAnnotator,
-            )
+            from datafog.processing.text_processing.gliner_annotator import \
+                GLiNERAnnotator
 
             GLiNERAnnotator.download_model(model_name)
             typer.echo(f"GLiNER model {model_name} downloaded and cached successfully.")
@@ -310,9 +309,8 @@ def list_entities():
         typer.echo(annotator.list_entities())
     except ModuleNotFoundError as e:
         try:
-            from .processing.text_processing.spacy_pii_annotator import (
-                PII_ANNOTATION_LABELS,
-            )
+            from .processing.text_processing.spacy_pii_annotator import \
+                PII_ANNOTATION_LABELS
 
             typer.echo(PII_ANNOTATION_LABELS)
         except Exception:

diff --git a/datafog/core.py b/datafog/core.py
@@ -51,11 +51,9 @@ def detect_pii(
             pii_dict[entity.type].append(entity.text)
 
         try:
-            from datafog.telemetry import (
-                _get_duration_bucket,
-                _get_text_length_bucket,
-                track_function_call,
-            )
+            from datafog.telemetry import (_get_duration_bucket,
+                                           _get_text_length_bucket,
+                                           track_function_call)
 
             _duration = (_time.monotonic() - _start) * 1000
             entity_count = sum(len(v) for v in pii_dict.values())
@@ -132,11 +130,9 @@ def anonymize_text(
         )
 
         try:
-            from datafog.telemetry import (
-                _get_duration_bucket,
-                _get_text_length_bucket,
-                track_function_call,
-            )
+            from datafog.telemetry import (_get_duration_bucket,
+                                           _get_text_length_bucket,
+                                           track_function_call)
 
             _duration = (_time.monotonic() - _start) * 1000
             track_function_call(

diff --git a/datafog/engine.py b/datafog/engine.py
@@ -188,8 +188,13 @@ def _regex_entities(
     text: str,
     entity_types: Optional[list[str]] = None,
     locales: Optional[list[str]] = None,
+    strict_numeric: bool = True,
 ) -> list[Entity]:
-    annotator = RegexAnnotator(locales=locales, enabled_labels=entity_types)
+    annotator = RegexAnnotator(
+        locales=locales,
+        enabled_labels=entity_types,
+        strict_numeric=strict_numeric,
+    )
     _, structured = annotator.annotate_with_spans(text)
     entities: list[Entity] = []
     for span in structured.spans:
@@ -363,12 +368,17 @@ def scan(
     locales: Optional[list[str]] = None,
     allowlist: Optional[list[str]] = None,
     allowlist_patterns: Optional[list[str]] = None,
+    strict_numeric: bool = True,
 ) -> ScanResult:
     """Scan text for PII entities.
 
     ``allowlist`` exempts exact entity texts (e.g. your own support email);
     ``allowlist_patterns`` exempts entities whose full text matches a regex
     (e.g. ``^\\d{10}$`` to stop unix timestamps matching as phone numbers).
+    ``strict_numeric`` (default True) requires SSNs to be delimited and phone
+    numbers to carry formatting cues, so bare digit runs are not matched; set
+    False to also detect undelimited nine-digit SSNs and bare ten-digit phone
+    numbers.
     """
     if not isinstance(text, str):
         raise TypeError("text must be a string")
@@ -384,6 +394,7 @@ def scan(
         text,
         entity_types=entity_types,
         locales=locales,
+        strict_numeric=strict_numeric,
     )
 
     if engine == "regex":
@@ -524,6 +535,7 @@ def scan_and_redact(
     locales: Optional[list[str]] = None,
     allowlist: Optional[list[str]] = None,
     allowlist_patterns: Optional[list[str]] = None,
+    strict_numeric: bool = True,
 ) -> RedactResult:
     """Convenience wrapper: scan then redact."""
     scan_result = scan(
@@ -533,5 +545,6 @@ def scan_and_redact(
         locales=locales,
         allowlist=allowlist,
         allowlist_patterns=allowlist_patterns,
+        strict_numeric=strict_numeric,
     )
     return redact(text=text, entities=scan_result.entities, strategy=strategy)
diff --git a/datafog/main.py b/datafog/main.py
@@ -141,7 +141,8 @@ def run_text_pipeline_sync(self, str_list: List[str]) -> List:
                 _pipeline_result = str_list
 
             try:
-                from .telemetry import _get_duration_bucket, track_function_call
+                from .telemetry import (_get_duration_bucket,
+                                        track_function_call)
 
                 _duration = (_time.monotonic() - _start) * 1000
                 track_function_call(
@@ -194,11 +195,9 @@ def detect(self, text: str) -> dict:
             result.setdefault(label, []).append(entity.text)
 
         try:
-            from .telemetry import (
-                _get_duration_bucket,
-                _get_text_length_bucket,
-                track_function_call,
-            )
+            from .telemetry import (_get_duration_bucket,
+                                    _get_text_length_bucket,
+                                    track_function_call)
 
             _duration = (_time.monotonic() - _start) * 1000
             entity_count = sum(len(v) for v in result.values())

diff --git a/datafog/processing/image_processing/donut_processor.py b/datafog/processing/image_processing/donut_processor.py
@@ -88,7 +88,8 @@ async def extract_text_from_image(self, image: "Image.Image") -> str:
                 raise ImportError(self._missing_dependency_message("torch")) from exc
 
             try:
-                from transformers import DonutProcessor as TransformersDonutProcessor
+                from transformers import \
+                    DonutProcessor as TransformersDonutProcessor
                 from transformers import VisionEncoderDecoderModel
             except ImportError as e:
                 raise ImportError(

diff --git a/datafog/processing/text_processing/regex_annotator/__init__.py b/datafog/processing/text_processing/regex_annotator/__init__.py
@@ -1,7 +1,4 @@
 from datafog.processing.text_processing.regex_annotator.regex_annotator import (
-    AnnotationResult,
-    RegexAnnotator,
-    Span,
-)
+    AnnotationResult, RegexAnnotator, Span)
 
 __all__ = ["RegexAnnotator", "Span", "AnnotationResult"]