Paragraph-level links, preview tooltips
This commit is contained in:
@@ -2,7 +2,7 @@ import html
|
||||
import re
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Union
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
import lxml.etree
|
||||
from lxml import etree as ET
|
||||
@@ -29,9 +29,10 @@ def text_content(el: lxml.etree.Element) -> str:
|
||||
|
||||
@dataclass
|
||||
class CrossReference:
|
||||
id: str
|
||||
target: Literal["article", "annex"]
|
||||
text: str
|
||||
target: str
|
||||
id: str
|
||||
paragraph: int | None = None
|
||||
|
||||
|
||||
def extract_xrefs(el: lxml.etree.Element, language: Language) -> list[CrossReference]:
|
||||
@@ -69,8 +70,8 @@ def extract_xrefs(el: lxml.etree.Element, language: Language) -> list[CrossRefer
|
||||
# Also, match only at word boundaries to prevent partial matches
|
||||
parts = PATTERN_PARTS[language]
|
||||
patterns = {
|
||||
"article": rf"\b{parts["article"]}\s+(?P<art_num>\d+){parts["exclusion"]}\b",
|
||||
"annex": rf"\b{parts["annex"]}\s+(?P<annex_num>[DILMVX]+){parts["exclusion"]}\b",
|
||||
"article": rf"\b{parts["article"]}\s+(?P<art_num>\d+)(?:[(](?P<parag_num>\d+)[)])?(?:{parts["exclusion"]})",
|
||||
"annex": rf"\b{parts["annex"]}\s+(?P<annex_num>[DILMVX]+)(?:{parts["exclusion"]})",
|
||||
}
|
||||
for key, pattern in patterns.items():
|
||||
matches = re.finditer(pattern, text, flags=re.IGNORECASE)
|
||||
@@ -78,13 +79,54 @@ def extract_xrefs(el: lxml.etree.Element, language: Language) -> list[CrossRefer
|
||||
crossref_id = (
|
||||
match.group("art_num") if key == "article" else match.group("annex_num")
|
||||
)
|
||||
parag_num = match.groupdict().get("parag_num")
|
||||
crossref_text = match.group(0)
|
||||
crossrefs.append(
|
||||
CrossReference(id=crossref_id, text=crossref_text, target=key)
|
||||
CrossReference(
|
||||
target=key,
|
||||
id=crossref_id,
|
||||
paragraph=parag_num,
|
||||
text=crossref_text,
|
||||
)
|
||||
)
|
||||
return crossrefs
|
||||
|
||||
|
||||
def extract_article(doc: ET.ElementBase, article_id: int) -> ET.ElementBase | None:
|
||||
"""Extract a specific article from a Formex document.
|
||||
|
||||
Args:
|
||||
doc: The XML document to extract from.
|
||||
article_id: The article number.
|
||||
|
||||
Returns:
|
||||
The extracted article element.
|
||||
"""
|
||||
|
||||
# Use XPath to find the specific article
|
||||
xpath = f".//ARTICLE[@IDENTIFIER='{article_id:03d}']"
|
||||
return doc.xpath(xpath)[0] if doc.xpath(xpath) else None
|
||||
|
||||
|
||||
def extract_paragraph(
|
||||
doc: ET.ElementBase, article_id: int, paragraph_id: int
|
||||
) -> ET.ElementBase | None:
|
||||
"""Extract a specific paragraph from an article in a Formex document.
|
||||
|
||||
Args:
|
||||
doc: The XML document to extract from.
|
||||
article_id: The article number.
|
||||
paragraph_id: The paragraph number.
|
||||
|
||||
Returns:
|
||||
The extracted paragraph element.
|
||||
"""
|
||||
|
||||
# Use XPath to find the specific paragraph
|
||||
xpath = f".//PARAG[@IDENTIFIER='{article_id:03d}.{paragraph_id:03d}']"
|
||||
return doc.xpath(xpath)[0] if doc.xpath(xpath) else None
|
||||
|
||||
|
||||
class FormexArticleConverter:
|
||||
"""Converts Formex XML <ARTICLE> elements to semantic HTML5."""
|
||||
|
||||
@@ -136,7 +178,7 @@ class FormexArticleConverter:
|
||||
# Replace the cross-reference text with a link
|
||||
text = text.replace(
|
||||
xref.text,
|
||||
f'<a href="" data-target="{xref.target}" data-id="{xref.id}" class="cross-ref">{xref.text}</a>',
|
||||
f'<a href="" data-target="{xref.target}" data-id="{xref.id}" data-paragraph-id="{xref.paragraph or ''}" class="cross-ref">{xref.text}</a>',
|
||||
)
|
||||
return text
|
||||
|
||||
@@ -418,10 +460,13 @@ class FormexArticleConverter:
|
||||
article_subtitle = self._convert_btx(sti_art) if sti_art is not None else ""
|
||||
|
||||
# Build the header section
|
||||
header = f'<header><h3 class="article-title">{article_title}</h3>'
|
||||
if article_subtitle:
|
||||
header += f'<h4 class="article-subtitle">{article_subtitle}</h4>'
|
||||
header += "</header>"
|
||||
if article_title and article_subtitle:
|
||||
header = f'<header><h3 class="article-title">{article_title}</h3>'
|
||||
if article_subtitle:
|
||||
header += f'<h4 class="article-subtitle">{article_subtitle}</h4>'
|
||||
header += "</header>"
|
||||
else:
|
||||
header = ""
|
||||
|
||||
# Process the content based on what's present
|
||||
content = ""
|
||||
|
||||
@@ -2,7 +2,11 @@ import lxml.etree as ET
|
||||
from fastapi import APIRouter, FastAPI, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from formex_viewer.formex4 import FormexArticleConverter
|
||||
from formex_viewer.formex4 import (
|
||||
FormexArticleConverter,
|
||||
extract_article,
|
||||
extract_paragraph,
|
||||
)
|
||||
from formex_viewer.main import (
|
||||
CellarClient,
|
||||
CellarIdentifier,
|
||||
@@ -121,21 +125,46 @@ def toc(celex_id: str, language: Language = Language.ENG):
|
||||
|
||||
|
||||
@api_router.get("/{celex_id}/articles/{article_id}/{language}")
|
||||
def article(celex_id: str, article_id: int, language: Language = Language.ENG):
|
||||
def article(
|
||||
celex_id: str,
|
||||
article_id: int,
|
||||
language: Language = Language.ENG,
|
||||
):
|
||||
"""
|
||||
Fetch an article from the server.
|
||||
"""
|
||||
xml = _get_fmx4_data(celex_id, language)
|
||||
article = extract_article(xml, article_id=article_id)
|
||||
|
||||
article_xpath = "//ARTICLE"
|
||||
articles = xml.xpath(article_xpath)
|
||||
for article in articles:
|
||||
num = article.get("IDENTIFIER").lstrip("0")
|
||||
if num == str(article_id):
|
||||
return Response(
|
||||
FormexArticleConverter(language=language).convert_article(article),
|
||||
media_type="text/html",
|
||||
)
|
||||
if article is None:
|
||||
return Response(
|
||||
"Article not found",
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
return Response(
|
||||
FormexArticleConverter(language=language).convert_article(article),
|
||||
media_type="text/html",
|
||||
)
|
||||
|
||||
|
||||
@api_router.get("/{celex_id}/articles/{article_id}/{parag_id}/{language}")
|
||||
def paragraph(
|
||||
celex_id: str,
|
||||
article_id: int,
|
||||
parag_id: int,
|
||||
language: Language = Language.ENG,
|
||||
):
|
||||
"""
|
||||
Fetch a paragraph within an article from the server.
|
||||
"""
|
||||
xml = _get_fmx4_data(celex_id, language)
|
||||
parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id)
|
||||
|
||||
return Response(
|
||||
FormexArticleConverter(language=language).convert_article(parag),
|
||||
media_type="text/html",
|
||||
)
|
||||
|
||||
|
||||
app.include_router(api_router, prefix="/api")
|
||||
|
||||
Reference in New Issue
Block a user