fix: Improve rendering of TOC panel

feat: Add additional example legislation
fix: Correctly convert single paragraph in FastAPI
2025-05-20 12:08:14 +02:00 · 2025-05-20 12:07:58 +02:00 · 2025-05-20 09:14:47 +02:00 · 2025-05-20 09:05:32 +02:00 · 2025-05-20 08:37:16 +02:00
9 changed files with 571 additions and 457 deletions
--- a/frontend/src/components/CelexSelector/CelexSelector.test.tsx
+++ b/frontend/src/components/CelexSelector/CelexSelector.test.tsx
@@ -12,7 +12,13 @@ describe("CelexSelector", () => {
    expect(getByLabelText("Select example:")).toBeInTheDocument();
    expect(getByRole("combobox")).toBeInTheDocument();
-    const options = getAllByRole("option");
+    const [def, ...options] = getAllByRole("option");
    // First option is the disabled placeholder option
    expect(def).toHaveValue("");
    expect(def).toHaveTextContent("Select an example");
    expect(def).toBeDisabled();
    expect(options).toHaveLength(examples.length);
    for (const i in examples) {
      expect(options[i]).toHaveValue(examples[i].id);
--- a/frontend/src/components/TOC/TOC.module.css
+++ b/frontend/src/components/TOC/TOC.module.css
@@ -1,16 +1,15 @@
 .toc {
  font-size: 0.8rem;
-  min-width: 25vw;
+  flex: 1 0 25vw;
  flex: 1 auto;
  &.hidden {
-    flex: 0 0;
+    display: none;
    min-width: 0;
  }
  transition: flex-basis 0.1s ease-in-out;
  overflow-y: scroll;
  overflow-x: wrap;
  height: 100vh;
  .tocDivision {
    margin-block: 0.5rem;
--- a/frontend/src/components/TOC/TOC.tsx
+++ b/frontend/src/components/TOC/TOC.tsx
@@ -55,15 +55,17 @@ function TOC({ toc }: TOCProps) {
  const [isVisible, setIsVisible] = useState(true);
  return (
-    <nav className={[styles.toc, isVisible ? "" : styles.hidden].join(" ")}>
+    <>
      <button
        onClick={() => setIsVisible(!isVisible)}
        className={styles.toggleButton}
      >
        {isVisible ? "<" : ">"}
      </button>
      <nav className={[styles.toc, isVisible ? "" : styles.hidden].join(" ")}>
        {toc.map((division) => renderDivision(division))}
      </nav>
    </>
  );
 }
 export default TOC;
--- a/frontend/src/lib/examples.ts
+++ b/frontend/src/lib/examples.ts
@@ -1,5 +1,9 @@
 export const examples = [
  { name: "GDPR", id: "32016R0679" },
  { name: "AI Act", id: "32024R1689" },
  { name: "Cybersecurity Act", id: "32019R0881" },
  { name: "Cyber Resilience Act", id: "32024R2847" },
  { name: "Medical Device Regulation", id: "32017R0745" },
  { name: "NIS 2 Directive", id: "32022L2555" },
  { name: "Digital Services Act", id: "32022R2065" },
 ];
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,3 +20,8 @@ formex-viewer = "formex_viewer:main"
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 [dependency-groups]
 dev = [
    "pytest>=8.3.5",
 ]
--- a/src/formex_viewer/formex4.py
+++ b/src/formex_viewer/formex4.py
@@ -2,7 +2,7 @@ import html
 import re
 import warnings
 from dataclasses import dataclass
-from typing import Literal, Optional, Union
+from typing import Literal, Optional, Union, cast
 import lxml.etree
 from lxml import etree as ET
@@ -10,7 +10,7 @@ from lxml import etree as ET
 from formex_viewer.main import Language
-def text_content(el: lxml.etree.Element) -> str:
+def text_content(el: ET._Element) -> str:
    """Get the text content of an XML element, including all child elements."""
    def _iterate(el):
@@ -35,7 +35,7 @@ class CrossReference:
    paragraph: int | None = None
-def extract_xrefs(el: lxml.etree.Element, language: Language) -> list[CrossReference]:
+def extract_xrefs(el: ET._Element, language: Language) -> list[CrossReference]:
    """Extract cross-references from an XML element.
    Args:
@@ -80,19 +80,23 @@ def extract_xrefs(el: lxml.etree.Element, language: Language) -> list[CrossRefer
                match.group("art_num") if key == "article" else match.group("annex_num")
            )
            parag_num = match.groupdict().get("parag_num")
            if key not in ["article", "annex"]:
                raise RuntimeError()
            crossref_text = match.group(0)
            crossrefs.append(
                CrossReference(
                    target=key,
                    id=crossref_id,
-                    paragraph=parag_num,
+                    paragraph=int(parag_num) if parag_num else None,
                    text=crossref_text,
                )
            )
    return crossrefs
-def extract_article(doc: ET.ElementBase, article_id: int) -> ET.ElementBase | None:
+def extract_article(doc: ET._Element, article_id: int) -> ET._Element | None:
    """Extract a specific article from a Formex document.
    Args:
@@ -109,8 +113,8 @@ def extract_article(doc: ET.ElementBase, article_id: int) -> ET.ElementBase | No
 def extract_paragraph(
-    doc: ET.ElementBase, article_id: int, paragraph_id: int
+    doc: ET._Element, article_id: int, paragraph_id: int
-) -> ET.ElementBase | None:
+) -> ET._Element | None:
    """Extract a specific paragraph from an article in a Formex document.
    Args:
@@ -146,7 +150,7 @@ class FormexArticleConverter:
        """Get the tag name with namespace if available."""
        return f"{self.ns_prefix}{tag}"
-    def _get_text(self, element: ET.Element) -> str:
+    def _get_text(self, element: ET._Element) -> str:
        """Get the text content of an element, including all nested text.
        This uses lxml's text_content() method when available, falling back to
@@ -161,7 +165,7 @@ class FormexArticleConverter:
        except AttributeError:
            # Fall back to manual traversal if text_content() is not available
            text = element.text or ""
-            for child in element:
+            for child in element.iterchildren(tag="*"):
                text += self._get_text(child)
                if child.tail:
                    text += child.tail
@@ -182,7 +186,7 @@ class FormexArticleConverter:
        )
        return text
-    def _convert_btx(self, element: ET.Element) -> str:
+    def _convert_btx(self, element: ET._Element) -> str:
        """
        Convert basic text elements (t_btx, t_btx.seq) to HTML.
@@ -202,7 +206,7 @@ class FormexArticleConverter:
                # Replace the cross-reference text with a link
                result = self._replace_xref(result, xref)
-        for child in element:
+        for child in element.iterchildren(tag="*"):
            child_tag = child.tag.replace(self.ns_prefix, "")
            # Process common inline elements
@@ -309,7 +313,7 @@ class FormexArticleConverter:
        return result
-    def _convert_list(self, list_element: ET.Element) -> str:
+    def _convert_list(self, list_element: ET._Element) -> str:
        """Convert a Formex LIST element to HTML list items."""
        result = ""
        # Using lxml's xpath to get direct child ITEM elements
@@ -347,41 +351,40 @@ class FormexArticleConverter:
        return result
-    def _convert_alinea(self, alinea: ET.Element) -> str:
+    def _convert_alinea(self, alinea: ET._Element) -> str:
        """Convert an ALINEA element to HTML."""
        return f'<p class="alinea">{self._convert_btx(alinea)}</p>'
-    def _convert_parag(self, parag: ET.Element) -> str:
+    def _convert_parag(self, parag: ET._Element) -> str:
        """Convert a PARAG (paragraph) element to HTML."""
        identifier = parag.get("IDENTIFIER", "")
        parag_id = self._create_id(identifier) if identifier else ""
        # Get the paragraph number using XPath
        no_parag_elems = parag.xpath(f"./{self._get_tag('NO.PARAG')}")
        parag_num = self._get_text(no_parag_elems[0]) if no_parag_elems else ""
        # Process the alineas within the paragraph
        content = ""
-        for alinea in parag.xpath(f"./{self._get_tag('ALINEA')}"):
+        for child in parag.iterchildren(tag="*"):
-            content += self._convert_alinea(alinea)
+            child_tag = child.tag.replace(self.ns_prefix, "")
-
+            if child_tag == "ALINEA":
-        # Process any comments
+                content += self._convert_alinea(child)
-        for comment in parag.xpath(f"./{self._get_tag('COMMENT')}"):
+            elif child_tag == "COMMENT":
-            content += f'<div class="comment">{self._convert_btx(comment)}</div>'
+                content += f'<div class="comment">{self._convert_btx(child)}</div>'
-
+            elif child_tag == "QUOT.S":
-        # Process any quotations
+                content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
-        for quot in parag.xpath(f"./{self._get_tag('QUOT.S')}"):
+            elif child_tag == "NO.PARAG":
                content += (
-                f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
+                    f'<span class="paragraph-number">{self._convert_btx(child)}</span>'
                )
            else:
                raise RuntimeError(
                    f"Unexpected child element '{child_tag}' in PARAG: {text_content(child)}"
                )
-        return f'<div class="paragraph" data-paragraph-id="{parag_id}"><span class="paragraph-number">{parag_num}</span>{content}</div>'
+        return f'<div class="paragraph" data-paragraph-id="{parag_id}">{content}</div>'
-    def _convert_subdiv(self, subdiv: ET.Element) -> str:
+    def _convert_subdiv(self, subdiv: ET._Element) -> str:
-        """Convert a SUBDIV (subdivision) element to HTML."""
+        """Convert a SUBDIV (subdivision) element to HTML, preserving child order."""
-        # Get the title using XPath
+        # Get the title using XPath (should be the first TITLE child if present)
        title_elems = subdiv.xpath(f"./{self._get_tag('TITLE')}")
        title = ""
        title_elems = subdiv.xpath(f"./{self._get_tag('TITLE')}")
        if title_elems:
            title_elem = title_elems[0]
            # Process TI (title) and STI (subtitle) elements
@@ -396,34 +399,30 @@ class FormexArticleConverter:
            if sti_list:
                title += f'<h5 class="subdivision-subtitle">{" ".join(sti_list)}</h5>'
-        # Process content: either paragraphs, alineas, or nested subdivisions
+        # Process all children in order, skipping TITLE (already handled)
        content = ""
-
+        for child in subdiv.iterchildren(tag="*"):
-        # Process paragraphs directly under this subdivision
+            child_tag = child.tag.replace(self.ns_prefix, "")
-        for parag in subdiv.xpath(f"./{self._get_tag('PARAG')}"):
+            if child_tag == "TITLE":
-            content += self._convert_parag(parag)
+                continue  # already handled
-
+            elif child_tag == "PARAG":
-        # Process alineas directly under this subdivision
+                content += self._convert_parag(child)
-        for alinea in subdiv.xpath(f"./{self._get_tag('ALINEA')}"):
+            elif child_tag == "ALINEA":
-            content += self._convert_alinea(alinea)
+                content += self._convert_alinea(child)
-
+            elif child_tag == "COMMENT":
-        # Process comments directly under this subdivision
+                content += f'<div class="comment">{self._convert_btx(child)}</div>'
-        for comment in subdiv.xpath(f"./{self._get_tag('COMMENT')}"):
+            elif child_tag == "QUOT.S":
-            content += f'<div class="comment">{self._convert_btx(comment)}</div>'
+                content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
-
+            elif child_tag == "SUBDIV":
-        # Process quotations directly under this subdivision
+                content += self._convert_subdiv(child)
-        for quot in subdiv.xpath(f"./{self._get_tag('QUOT.S')}"):
+            else:
-            content += (
+                raise RuntimeError(
-                f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
+                    f"Unexpected child element '{child_tag}' in SUBDIV: {text_content(child)}"
                )
        # Process nested subdivisions directly under this subdivision
        for sub in subdiv.xpath(f"./{self._get_tag('SUBDIV')}"):
            content += self._convert_subdiv(sub)
        return f'<section class="subdivision">{title}{content}</section>'
-    def convert_article(self, article: Union[str, ET.Element]) -> str:
+    def convert_article(self, article: Union[str, ET._Element]) -> str:
        """
        Convert a Formex <ARTICLE> element to HTML5.
@@ -437,7 +436,9 @@ class FormexArticleConverter:
        if isinstance(article, str):
            try:
                parser = ET.XMLParser(remove_blank_text=True)
-                article = ET.fromstring(article.encode("utf-8"), parser)
+                article = cast(
                    ET._Element, ET.fromstring(article.encode("utf-8"), parser)
                )
            except ET.XMLSyntaxError as e:
                return f"<p>Error parsing XML: {e}</p>"
@@ -471,35 +472,25 @@ class FormexArticleConverter:
        # Process the content based on what's present
        content = ""
-        # Check if we have alineas directly under the article
+        # Process all child elements (except TITLE) in tree order
-        alineas = article.xpath(f"./{self._get_tag('ALINEA')}")
+        for child in article.iterchildren(tag="*"):
-        if alineas:
+            child_tag = child.tag.replace(self.ns_prefix, "")
-            for alinea in alineas:
+            if child_tag in ["TI.ART", "STI.ART"]:
-                content += self._convert_alinea(alinea)
+                continue  # already handled
-
+            elif child_tag == "ALINEA":
-        # Check if we have paragraphs directly under the article
+                content += self._convert_alinea(child)
-        parags = article.xpath(f"./{self._get_tag('PARAG')}")
+            elif child_tag == "PARAG":
-        if parags:
+                content += self._convert_parag(child)
-            for parag in parags:
+            elif child_tag == "COMMENT":
-                content += self._convert_parag(parag)
+                content += f'<div class="comment">{self._convert_btx(child)}</div>'
-
+            elif child_tag == "QUOT.S":
-        # Check for comments directly under the article
+                content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
-        comments = article.xpath(f"./{self._get_tag('COMMENT')}")
+            elif child_tag == "SUBDIV":
-        if comments:
+                content += self._convert_subdiv(child)
-            for comment in comments:
+            else:
-                content += f'<div class="comment">{self._convert_btx(comment)}</div>'
+                raise RuntimeError(
-
+                    f"Unexpected child element '{child_tag}' in ARTICLE: {text_content(child)}"
-        # Check for quotations directly under the article
+                )
        quots = article.xpath(f"./{self._get_tag('QUOT.S')}")
        if quots:
            for quot in quots:
                content += f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
        # Check for subdivisions directly under the article
        subdivs = article.xpath(f"./{self._get_tag('SUBDIV')}")
        if subdivs:
            for subdiv in subdivs:
                content += self._convert_subdiv(subdiv)
        # Assemble the complete article
        return f'<article id="{article_id}" class="formex-article">{header}<div class="article-content">{content}</div></article>'
--- a/src/formex_viewer/server.py
+++ b/src/formex_viewer/server.py
@@ -160,9 +160,14 @@ def paragraph(
    """
    xml = _get_fmx4_data(celex_id, language)
    parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id)
    if parag is None:
        return Response(
            "Paragraph not found",
            status_code=404,
        )
    return Response(
-        FormexArticleConverter(language=language).convert_article(parag),
+        FormexArticleConverter(language=language)._convert_parag(parag),
        media_type="text/html",
    )
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -0,0 +1,52 @@
 import pytest
 from lxml import etree as ET
 from formex_viewer.formex4 import FormexArticleConverter
 from formex_viewer.main import Language
@pytest.fixture
 def converter():
    return FormexArticleConverter(language=Language.ENG)
 def test_convert_tree_order(converter):
    """Test that the order of HTML blocks in the converted article matches the order of elements in the XML tree."""
    xml = """
    <ARTICLE>
        <SUBDIV>
            <TITLE>
                <TI>Subdivision Title</TI>
                <STI>Subdivision Subtitle</STI>
            </TITLE>
            <PARAG IDENTIFIER="001.001">
                <NO.PARAG>1</NO.PARAG>
                <ALINEA>Paragraph 1 text.</ALINEA>
            </PARAG>
            <COMMENT>Comment text.</COMMENT>
            <ALINEA>Alinea text.</ALINEA>
            <QUOT.S>Quotation text.</QUOT.S>
            <SUBDIV>
                <TITLE>
                    <TI>Nested Subdivision</TI>
                </TITLE>
                <ALINEA>Nested alinea.</ALINEA>
            </SUBDIV>
        </SUBDIV>
    </ARTICLE>
    """
    parser = ET.XMLParser(remove_blank_text=True)
    el = ET.fromstring(xml, parser)
    html = converter.convert_article(el)
    # Check that the order of HTML blocks matches the order of elements in the XML tree
    idx_title = html.index("Subdivision Title")
    idx_parag = html.index('class="paragraph"')
    idx_comment = html.index("Comment text.")
    idx_alinea = html.index("Alinea text.")
    idx_quot = html.index("Quotation text.")
    idx_nested = html.index("Nested Subdivision")
    # The order in the XML: title, parag, alinea, comment, quot, nested subdiv
    assert idx_title < idx_parag < idx_comment < idx_alinea < idx_quot < idx_nested
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
Adrian Rumpold	58bd1160c1	fix: Improve rendering of TOC panel	2025-05-20 12:08:14 +02:00
Adrian Rumpold	debaf567ea	feat: Add additional example legislation	2025-05-20 12:07:58 +02:00
Adrian Rumpold	56d271d0df	fix: Correctly convert single paragraph in FastAPI	2025-05-20 09:14:47 +02:00
Adrian Rumpold	f0d4214d17	fix: Preserve XML tree order in Formex parser	2025-05-20 09:05:32 +02:00
Adrian Rumpold	56b5e3e3a4	fix: Type hints in Formex parser	2025-05-20 08:37:16 +02:00