From 9e742bc59adee81310d0f570275a7c73829965a3 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Thu, 18 Jun 2026 12:41:04 +1000 Subject: [PATCH 1/3] Add Markdown footnote support to from_markdown Parse standard Markdown footnotes (`text[^label]` references and `[^label]: definition` lines) into Substack's footnoteAnchor inline nodes and footnote blocks. Footnotes are numbered by order of first reference and labels may be numeric or named. Also adds Post.footnote_anchor() and Post.footnote() helpers for building footnotes manually, plus tests. --- README.md | 15 +++ substack/post.py | 151 ++++++++++++++++++++++++ tests/substack/test_footnotes.py | 191 +++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+) create mode 100644 tests/substack/test_footnotes.py diff --git a/README.md b/README.md index c4f57fa..cfe0611 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,21 @@ This is a paragraph with **bold** and *italic* text. """ post.from_markdown(markdown_content, api=api) +# Markdown footnotes are supported too. References become inline anchors and +# definitions become footnote blocks, numbered by order of first appearance. +# Labels can be numbers or names (e.g. [^1] or [^source]). +footnote_markdown = """ +A claim that needs support.[^1] Another, with a named label.[^source] + +[^1]: The supporting detail, with a [link](https://example.com). +[^source]: Author, *Title* (2025). +""" +post.from_markdown(footnote_markdown, api=api) + +# Or build footnotes manually: +post.paragraph(content=[{"content": "Some claim."}]).footnote_anchor(1) +post.footnote(1, "The note text, with **formatting** allowed.") + draft = api.post_draft(post.get_draft()) # set section (can only be done after first posting the draft) diff --git a/substack/post.py b/substack/post.py index 8a9d55d..8d190f3 100644 --- a/substack/post.py +++ b/substack/post.py @@ -12,6 +12,10 @@ from substack.exceptions import SectionNotExistsException +# Markdown footnotes: ``text.[^label]`` references and ``[^label]: definition`` lines. +FOOTNOTE_REFERENCE_PATTERN = re.compile(r"\[\^([^\]]+)\]") +FOOTNOTE_DEFINITION_PATTERN = re.compile(r"^\[\^([^\]]+)\]:\s?(.*)$") + def tokens_to_text_nodes(tokens: List[Dict]) -> List[Dict]: """Convert parse_inline() tokens to ProseMirror text nodes. @@ -543,6 +547,135 @@ def code_block(self, content, attrs=None): return self + def footnote_anchor(self, number: int): + """ + + Add an inline footnote reference (the superscript marker) to the last block. + + Args: + number: The footnote number this anchor points to. + + Returns: + Self for method chaining. + + """ + content = self.draft_body["content"][-1].get("content", []) + content += [{"type": "footnoteAnchor", "attrs": {"number": number}}] + self.draft_body["content"][-1]["content"] = content + return self + + def footnote(self, number: int, content=None): + """ + + Append a footnote block (the note shown at the foot of the post). + + Args: + number: The footnote number, matching a footnote_anchor. + content: Text string or list of inline token dicts. A plain string is + parsed for inline Markdown; a parse_inline() token list or a list + of ready text nodes is also accepted. + + Returns: + Self for method chaining. + + """ + if isinstance(content, str): + text_nodes = tokens_to_text_nodes(parse_inline(content)) + elif isinstance(content, list): + # Accept either parse_inline tokens ({"content": ...}) or text nodes. + if content and content[0].get("type") == "text": + text_nodes = content + else: + text_nodes = tokens_to_text_nodes(content) + else: + text_nodes = [] + + node: Dict = { + "type": "footnote", + "attrs": {"number": number}, + "content": [{"type": "paragraph", "content": text_nodes}], + } + self.draft_body["content"] = self.draft_body.get("content", []) + [node] + return self + + @staticmethod + def _extract_footnote_definitions(markdown_content: str): + """ + + Pull ``[^label]: definition`` lines out of the Markdown. + + Definitions may wrap onto indented continuation lines. Returns the body + with definitions removed plus a {label: definition_text} mapping. + + """ + lines = markdown_content.split("\n") + body_lines: List[str] = [] + definitions: Dict[str, str] = {} + i = 0 + while i < len(lines): + match = FOOTNOTE_DEFINITION_PATTERN.match(lines[i]) + if match: + label, first = match.group(1), match.group(2) + parts = [first] + i += 1 + # Continuation lines are indented and neither blank nor a new def. + while i < len(lines) and lines[i].strip() and lines[i][:1] in (" ", "\t"): + parts.append(lines[i].strip()) + i += 1 + definitions[label] = " ".join(p for p in parts if p).strip() + else: + body_lines.append(lines[i]) + i += 1 + return "\n".join(body_lines), definitions + + @staticmethod + def _number_footnotes(markdown_content: str, definitions: Dict[str, str]): + """Number footnotes by order of first inline reference in the body.""" + order: List[str] = [] + for match in FOOTNOTE_REFERENCE_PATTERN.finditer(markdown_content): + label = match.group(1) + if label in definitions and label not in order: + order.append(label) + # Defined-but-unreferenced footnotes go last, in definition order. + for label in definitions: + if label not in order: + order.append(label) + return {label: index + 1 for index, label in enumerate(order)} + + def _inject_footnote_anchors(self, node: Dict, numbers_by_label: Dict[str, int]): + """Recursively replace ``[^label]`` in text nodes with footnoteAnchor nodes.""" + content = node.get("content") + if not isinstance(content, list): + return + new_content: List[Dict] = [] + for child in content: + text = child.get("text", "") + if child.get("type") == "text" and FOOTNOTE_REFERENCE_PATTERN.search(text): + marks = child.get("marks") + last = 0 + for match in FOOTNOTE_REFERENCE_PATTERN.finditer(text): + label = match.group(1) + if label not in numbers_by_label: + continue # Unknown label: leave the literal text in place. + if match.start() > last: + segment = {"type": "text", "text": text[last:match.start()]} + if marks: + segment["marks"] = marks + new_content.append(segment) + new_content.append( + {"type": "footnoteAnchor", "attrs": {"number": numbers_by_label[label]}} + ) + last = match.end() + if last < len(text): + segment = {"type": "text", "text": text[last:]} + if marks: + segment["marks"] = marks + new_content.append(segment) + else: + self._inject_footnote_anchors(child, numbers_by_label) + new_content.append(child) + node["content"] = new_content + def from_markdown(self, markdown_content: str, api=None): """ Parse Markdown content and add it to the post. @@ -559,6 +692,10 @@ def from_markdown(self, markdown_content: str, api=None): - Ordered lists: Lines starting with '1.', '2.', etc. - Horizontal rules: Lines with ---, ***, or ___ - Inline formatting: **bold**, *italic*, ***bold+italic***, `code`, ~~strikethrough~~ + - Footnotes: ``text.[^label]`` references plus ``[^label]: definition`` + lines. References become inline anchors and definitions become + footnote blocks, numbered by order of first appearance. Labels may be + numbers or names (e.g. ``[^1]`` or ``[^agi-book]``). Args: markdown_content: Markdown string to parse and add to the post. @@ -572,6 +709,13 @@ def from_markdown(self, markdown_content: str, api=None): >>> post = Post("Title", "Subtitle", user_id) >>> post.from_markdown("# Heading\\n\\nThis is **bold** text with [a link](https://example.com).") """ + # Footnotes: extract ``[^label]: ...`` definitions and number them by + # order of first reference before parsing the rest of the body. + markdown_content, footnote_definitions = self._extract_footnote_definitions( + markdown_content + ) + footnote_numbers = self._number_footnotes(markdown_content, footnote_definitions) + lines = markdown_content.split("\n") blocks = [] current_block: List[str] = [] @@ -844,4 +988,11 @@ def flush_ordered(): tokens = parse_inline(text_content) self.add({"type": "paragraph", "content": tokens}) + # Footnotes: turn ``[^label]`` references into inline anchors, then append + # the footnote blocks in numbered order. + if footnote_numbers: + self._inject_footnote_anchors(self.draft_body, footnote_numbers) + for label, number in sorted(footnote_numbers.items(), key=lambda item: item[1]): + self.footnote(number, footnote_definitions[label]) + return self diff --git a/tests/substack/test_footnotes.py b/tests/substack/test_footnotes.py new file mode 100644 index 0000000..f9db725 --- /dev/null +++ b/tests/substack/test_footnotes.py @@ -0,0 +1,191 @@ +"""Tests for Markdown footnote support in post.py.""" + +from substack.post import Post + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_post(): + """Create a fresh Post instance for testing.""" + return Post(title="Test", subtitle="Sub", user_id=1) + + +def body_content(post): + """Return the content list from the post's draft body.""" + return post.draft_body["content"] + + +def find_nodes(node, node_type, acc=None): + """Recursively collect every node of a given type from a doc tree.""" + if acc is None: + acc = [] + if isinstance(node, dict): + if node.get("type") == node_type: + acc.append(node) + for value in node.values(): + find_nodes(value, node_type, acc) + elif isinstance(node, list): + for value in node: + find_nodes(value, node_type, acc) + return acc + + +def anchors(post): + return find_nodes(post.draft_body, "footnoteAnchor") + + +def footnotes(post): + return find_nodes(post.draft_body, "footnote") + + +# --------------------------------------------------------------------------- +# TestFootnoteHelpers +# --------------------------------------------------------------------------- + +class TestFootnoteHelpers: + def test_footnote_anchor_added_inline(self): + post = make_post() + post.paragraph(content=[{"content": "See here."}]) + post.footnote_anchor(1) + para = body_content(post)[0] + assert para["content"][-1] == {"type": "footnoteAnchor", "attrs": {"number": 1}} + + def test_footnote_block_from_string(self): + post = make_post() + post.footnote(1, "A simple note.") + block = body_content(post)[-1] + assert block["type"] == "footnote" + assert block["attrs"] == {"number": 1} + assert block["content"][0]["type"] == "paragraph" + assert block["content"][0]["content"][0]["text"] == "A simple note." + + def test_footnote_block_parses_inline_markdown(self): + post = make_post() + post.footnote(2, "See [the source](https://example.com).") + block = footnotes(post)[0] + text_nodes = block["content"][0]["content"] + link_node = next(n for n in text_nodes if n.get("marks")) + assert link_node["text"] == "the source" + assert link_node["marks"] == [{"type": "link", "attrs": {"href": "https://example.com"}}] + + +# --------------------------------------------------------------------------- +# TestFromMarkdownFootnotes +# --------------------------------------------------------------------------- + +class TestFromMarkdownFootnotes: + def test_basic_reference_and_definition(self): + post = make_post() + post.from_markdown("A claim.[^1]\n\n[^1]: The supporting detail.") + assert len(anchors(post)) == 1 + assert anchors(post)[0]["attrs"]["number"] == 1 + blocks = footnotes(post) + assert len(blocks) == 1 + assert blocks[0]["attrs"]["number"] == 1 + assert blocks[0]["content"][0]["content"][0]["text"] == "The supporting detail." + + def test_definition_removed_from_body(self): + post = make_post() + post.from_markdown("A claim.[^1]\n\n[^1]: The note.") + # The definition line must not leak into a paragraph. + paragraphs = find_nodes(post.draft_body, "paragraph") + body_text = " ".join( + n.get("text", "") + for p in paragraphs + for n in p.get("content", []) + ) + assert "[^1]:" not in body_text + + def test_anchor_injected_mid_sentence(self): + post = make_post() + post.from_markdown("Before[^1] and after.\n\n[^1]: Note.") + para = find_nodes(post.draft_body, "paragraph")[0] + types = [c["type"] for c in para["content"]] + assert types == ["text", "footnoteAnchor", "text"] + assert para["content"][0]["text"] == "Before" + assert para["content"][2]["text"] == " and after." + + def test_named_labels_numbered_by_first_appearance(self): + post = make_post() + md = ( + "First[^book] then second[^study].\n\n" + "[^study]: Second definition.\n" + "[^book]: First definition.\n" + ) + post.from_markdown(md) + nums = [a["attrs"]["number"] for a in anchors(post)] + assert nums == [1, 2] # order of reference, not of definition + blocks = sorted(footnotes(post), key=lambda b: b["attrs"]["number"]) + assert blocks[0]["content"][0]["content"][0]["text"] == "First definition." + assert blocks[1]["content"][0]["content"][0]["text"] == "Second definition." + + def test_repeated_reference_reuses_number(self): + post = make_post() + post.from_markdown("One[^a] two[^a].\n\n[^a]: Note.") + nums = [a["attrs"]["number"] for a in anchors(post)] + assert nums == [1, 1] + assert len(footnotes(post)) == 1 + + def test_link_inside_definition_preserved(self): + post = make_post() + post.from_markdown("Claim.[^1]\n\n[^1]: See [docs](https://example.com).") + block = footnotes(post)[0] + link_node = next( + n for n in block["content"][0]["content"] if n.get("marks") + ) + assert link_node["marks"][0]["attrs"]["href"] == "https://example.com" + + def test_multiline_definition(self): + post = make_post() + md = "Claim.[^1]\n\n[^1]: First line\n continued on the next line." + post.from_markdown(md) + text = footnotes(post)[0]["content"][0]["content"][0]["text"] + assert text == "First line continued on the next line." + + def test_unreferenced_definition_still_appended(self): + post = make_post() + post.from_markdown("No references here.\n\n[^1]: Orphan note.") + assert len(anchors(post)) == 0 + assert len(footnotes(post)) == 1 + + def test_reference_without_definition_left_as_text(self): + post = make_post() + post.from_markdown("A dangling[^missing] reference.") + assert len(anchors(post)) == 0 + assert len(footnotes(post)) == 0 + para = find_nodes(post.draft_body, "paragraph")[0] + assert "[^missing]" in para["content"][0]["text"] + + def test_definition_in_middle_moves_to_end(self): + post = make_post() + md = ( + "First paragraph.[^1]\n\n" + "[^1]: First footnote.\n\n" + "Second paragraph." + ) + post.from_markdown(md) + + types = [node["type"] for node in body_content(post)] + # Both paragraphs come first; the footnote block is last regardless of + # where the definition appeared in the source. + assert types == ["paragraph", "paragraph", "footnote"] + + paragraphs = find_nodes(post.draft_body, "paragraph") + assert paragraphs[0]["content"][0]["text"] == "First paragraph." + # The definition line did not become a paragraph in the body. + assert paragraphs[1]["content"][0]["text"] == "Second paragraph." + + assert len(anchors(post)) == 1 + block = footnotes(post)[0] + assert block["content"][0]["content"][0]["text"] == "First footnote." + + def test_no_footnotes_is_unchanged(self): + post = make_post() + post.from_markdown("Just a plain paragraph.") + assert len(anchors(post)) == 0 + assert len(footnotes(post)) == 0 + assert find_nodes(post.draft_body, "paragraph")[0]["content"][0]["text"] == ( + "Just a plain paragraph." + ) From 698e098032d205c06d49afc1ea24a737338cc5b1 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Fri, 26 Jun 2026 11:43:00 +1000 Subject: [PATCH 2/3] Keep footnote handling out of code blocks and inline code Footnote definition extraction now skips fenced code blocks, and anchor injection skips codeBlock nodes and text marked as inline code. This fixes footnote-like text inside code being removed or rewritten. Adds regression tests for fenced and inline code cases. --- substack/post.py | 22 ++++++++++++++++++++-- tests/substack/test_footnotes.py | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/substack/post.py b/substack/post.py index 8d190f3..8816292 100644 --- a/substack/post.py +++ b/substack/post.py @@ -611,9 +611,17 @@ def _extract_footnote_definitions(markdown_content: str): lines = markdown_content.split("\n") body_lines: List[str] = [] definitions: Dict[str, str] = {} + in_code_fence = False i = 0 while i < len(lines): - match = FOOTNOTE_DEFINITION_PATTERN.match(lines[i]) + # Track fenced code blocks so footnote-like lines inside them are + # left untouched. + if lines[i].lstrip().startswith("```"): + in_code_fence = not in_code_fence + body_lines.append(lines[i]) + i += 1 + continue + match = None if in_code_fence else FOOTNOTE_DEFINITION_PATTERN.match(lines[i]) if match: label, first = match.group(1), match.group(2) parts = [first] @@ -644,13 +652,23 @@ def _number_footnotes(markdown_content: str, definitions: Dict[str, str]): def _inject_footnote_anchors(self, node: Dict, numbers_by_label: Dict[str, int]): """Recursively replace ``[^label]`` in text nodes with footnoteAnchor nodes.""" + # Never rewrite the contents of a code block. + if node.get("type") == "codeBlock": + return content = node.get("content") if not isinstance(content, list): return new_content: List[Dict] = [] for child in content: text = child.get("text", "") - if child.get("type") == "text" and FOOTNOTE_REFERENCE_PATTERN.search(text): + has_code_mark = any( + mark.get("type") == "code" for mark in (child.get("marks") or []) + ) + if ( + child.get("type") == "text" + and not has_code_mark + and FOOTNOTE_REFERENCE_PATTERN.search(text) + ): marks = child.get("marks") last = 0 for match in FOOTNOTE_REFERENCE_PATTERN.finditer(text): diff --git a/tests/substack/test_footnotes.py b/tests/substack/test_footnotes.py index f9db725..eccb3dd 100644 --- a/tests/substack/test_footnotes.py +++ b/tests/substack/test_footnotes.py @@ -181,6 +181,29 @@ def test_definition_in_middle_moves_to_end(self): block = footnotes(post)[0] assert block["content"][0]["content"][0]["text"] == "First footnote." + def test_footnote_definition_inside_fenced_code_stays_code(self): + post = make_post() + post.from_markdown("```\n[^1]: not a footnote\n```") + content = body_content(post) + assert len(content) == 1 + assert content[0]["type"] == "codeBlock" + assert content[0]["content"][0]["text"] == "[^1]: not a footnote" + + def test_footnote_reference_inside_fenced_code_stays_text(self): + post = make_post() + post.from_markdown("```\ncode [^1]\n```\n\n[^1]: note") + content = body_content(post) + assert content[0]["type"] == "codeBlock" + assert content[0]["content"][0]["text"] == "code [^1]" + + def test_footnote_reference_inside_inline_code_stays_text(self): + post = make_post() + post.from_markdown("`code [^1]`\n\n[^1]: note") + content = body_content(post) + assert content[0]["type"] == "paragraph" + assert content[0]["content"][0]["text"] == "code [^1]" + assert content[0]["content"][0]["marks"] == [{"type": "code"}] + def test_no_footnotes_is_unchanged(self): post = make_post() post.from_markdown("Just a plain paragraph.") From 5495067ac1a7394f3fadc790065ddca78fa44fb2 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Fri, 26 Jun 2026 12:24:50 +1000 Subject: [PATCH 3/3] Support multi-paragraph footnote definitions Footnote definitions can now contain multiple paragraphs (a blank line followed by an indented block); previously a second paragraph leaked into the post body and only the first was kept. Extraction preserves paragraph breaks and Post.footnote() splits blank-line-separated content into multiple paragraph nodes (verified accepted/rendered by Substack). Adds regression tests. --- substack/post.py | 61 ++++++++++++++++++++++++-------- tests/substack/test_footnotes.py | 32 +++++++++++++++++ 2 files changed, 79 insertions(+), 14 deletions(-) diff --git a/substack/post.py b/substack/post.py index 8816292..690b402 100644 --- a/substack/post.py +++ b/substack/post.py @@ -572,28 +572,38 @@ def footnote(self, number: int, content=None): Args: number: The footnote number, matching a footnote_anchor. content: Text string or list of inline token dicts. A plain string is - parsed for inline Markdown; a parse_inline() token list or a list - of ready text nodes is also accepted. + parsed for inline Markdown and may contain blank-line-separated + paragraphs; a parse_inline() token list or a list of ready text + nodes is also accepted (single paragraph). Returns: Self for method chaining. """ + paragraphs: List[Dict] = [] if isinstance(content, str): - text_nodes = tokens_to_text_nodes(parse_inline(content)) + # Blank lines separate paragraphs within the footnote. + for chunk in re.split(r"\n\s*\n", content): + chunk = chunk.strip() + if chunk: + paragraphs.append( + {"type": "paragraph", "content": tokens_to_text_nodes(parse_inline(chunk))} + ) elif isinstance(content, list): # Accept either parse_inline tokens ({"content": ...}) or text nodes. if content and content[0].get("type") == "text": text_nodes = content else: text_nodes = tokens_to_text_nodes(content) - else: - text_nodes = [] + paragraphs.append({"type": "paragraph", "content": text_nodes}) + + if not paragraphs: + paragraphs = [{"type": "paragraph", "content": []}] node: Dict = { "type": "footnote", "attrs": {"number": number}, - "content": [{"type": "paragraph", "content": text_nodes}], + "content": paragraphs, } self.draft_body["content"] = self.draft_body.get("content", []) + [node] return self @@ -604,8 +614,10 @@ def _extract_footnote_definitions(markdown_content: str): Pull ``[^label]: definition`` lines out of the Markdown. - Definitions may wrap onto indented continuation lines. Returns the body - with definitions removed plus a {label: definition_text} mapping. + Definitions may wrap onto indented continuation lines and may contain + multiple paragraphs (blank line followed by an indented block). Returns + the body with definitions removed plus a {label: definition_text} mapping, + where paragraphs are separated by a blank line. """ lines = markdown_content.split("\n") @@ -624,13 +636,34 @@ def _extract_footnote_definitions(markdown_content: str): match = None if in_code_fence else FOOTNOTE_DEFINITION_PATTERN.match(lines[i]) if match: label, first = match.group(1), match.group(2) - parts = [first] + paragraphs: List[str] = [] + current = [first.strip()] if first.strip() else [] i += 1 - # Continuation lines are indented and neither blank nor a new def. - while i < len(lines) and lines[i].strip() and lines[i][:1] in (" ", "\t"): - parts.append(lines[i].strip()) - i += 1 - definitions[label] = " ".join(p for p in parts if p).strip() + while i < len(lines): + line = lines[i] + if line.strip() == "": + # A blank line stays in the footnote only if the next + # non-empty line is indented (a further paragraph). + nxt = i + 1 + if ( + nxt < len(lines) + and lines[nxt].strip() + and lines[nxt][:1] in (" ", "\t") + ): + if current: + paragraphs.append(" ".join(current)) + current = [] + i += 1 + continue + break + if line[:1] in (" ", "\t"): + current.append(line.strip()) + i += 1 + else: + break + if current: + paragraphs.append(" ".join(current)) + definitions[label] = "\n\n".join(paragraphs) else: body_lines.append(lines[i]) i += 1 diff --git a/tests/substack/test_footnotes.py b/tests/substack/test_footnotes.py index eccb3dd..172d152 100644 --- a/tests/substack/test_footnotes.py +++ b/tests/substack/test_footnotes.py @@ -204,6 +204,38 @@ def test_footnote_reference_inside_inline_code_stays_text(self): assert content[0]["content"][0]["text"] == "code [^1]" assert content[0]["content"][0]["marks"] == [{"type": "code"}] + def test_multiparagraph_definition(self): + post = make_post() + md = "Claim.[^1]\n\n[^1]: First para.\n\n Second para." + post.from_markdown(md) + # The second paragraph must stay in the footnote, not leak into the body. + assert [n["type"] for n in body_content(post)] == ["paragraph", "footnote"] + block = footnotes(post)[0] + assert len(block["content"]) == 2 + assert block["content"][0]["content"][0]["text"] == "First para." + assert block["content"][1]["content"][0]["text"] == "Second para." + + def test_multiparagraph_definition_in_middle(self): + post = make_post() + md = ( + "First.[^1]\n\n" + "[^1]: Note para one.\n\n" + " Note para two.\n\n" + "Back to the body." + ) + post.from_markdown(md) + types = [n["type"] for n in body_content(post)] + assert types == ["paragraph", "paragraph", "footnote"] + assert body_content(post)[1]["content"][0]["text"] == "Back to the body." + assert len(footnotes(post)[0]["content"]) == 2 + + def test_footnote_helper_splits_paragraphs(self): + post = make_post() + post.footnote(1, "Para one.\n\nPara two.") + block = footnotes(post)[0] + assert len(block["content"]) == 2 + assert block["content"][1]["content"][0]["text"] == "Para two." + def test_no_footnotes_is_unchanged(self): post = make_post() post.from_markdown("Just a plain paragraph.")