Files
formex-viewer/src/formex_viewer/server.py
2025-05-20 09:14:47 +02:00

176 lines
4.5 KiB
Python

import lxml.etree as ET
from fastapi import APIRouter, FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware
from formex_viewer.formex4 import (
FormexArticleConverter,
extract_article,
extract_paragraph,
)
from formex_viewer.main import (
CellarClient,
CellarIdentifier,
ContentType,
Language,
SystemName,
)
origins = [
"http://localhost:5173",
]
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
type CacheKey = tuple[str, Language]
CACHE: dict[CacheKey, str] = {}
def _get_fmx4_data(celex_id: str, language: Language) -> ET.Element:
"""
Fetch the FMX4 data from the server.
"""
if (celex_id, language) in CACHE:
return CACHE[(celex_id, language)]
client = CellarClient(language)
cellar_id = CellarIdentifier(
system_name=SystemName.CELEX,
system_id=celex_id,
)
fmx4_data = client.publication_text(cellar_id, ContentType.ZIP_FMX4)
xml = ET.fromstring(fmx4_data.encode("utf-8"))
CACHE[(celex_id, language)] = xml
return xml
api_router = APIRouter()
@api_router.get("/{celex_id}/articles")
def article_ids(celex_id: str, language: Language = Language.ENG):
"""
Fetch the article IDs from the server.
"""
xml = _get_fmx4_data(celex_id, language)
article_xpath = "//ARTICLE/@IDENTIFIER"
article_ids = xml.xpath(article_xpath)
article_ids = [int(article_id.lstrip("0")) for article_id in article_ids]
article_ids.sort()
return article_ids
@api_router.get("/{celex_id}/toc/{language}")
def toc(celex_id: str, language: Language = Language.ENG):
def _extract_text(root: ET.Element, tag: str) -> str:
"""
Extract text from the given tag in the XML element.
"""
text = root.xpath(f"{tag}//text()")
return "".join(text) if text else ""
def _handle_division(division: ET.Element, level: int):
title = _extract_text(division, "TITLE/TI")
subtitle = _extract_text(division, "TITLE/STI")
subdivisions = []
for subdivision in division.xpath("DIVISION") or []:
subdivisions.append(_handle_division(subdivision, level + 1))
articles = []
for article in division.xpath("ARTICLE") or []:
art_id = article.get("IDENTIFIER")
if not art_id:
continue
art_title = _extract_text(article, "TI.ART")
art_subtitle = _extract_text(article, "STI.ART")
articles.append(
{
"id": int(art_id.lstrip("0")),
"type": "article",
"title": art_title,
"subtitle": art_subtitle,
}
)
return {
"type": "division",
"title": title,
"subtitle": subtitle,
"level": level,
"content": subdivisions + articles,
}
"""
Fetch the table of contents from the server.
"""
xml = _get_fmx4_data(celex_id, language)
toc = []
for division in xml.xpath("//ENACTING.TERMS/DIVISION"):
toc.append(_handle_division(division, 0))
return toc
@api_router.get("/{celex_id}/articles/{article_id}/{language}")
def article(
celex_id: str,
article_id: int,
language: Language = Language.ENG,
):
"""
Fetch an article from the server.
"""
xml = _get_fmx4_data(celex_id, language)
article = extract_article(xml, article_id=article_id)
if article is None:
return Response(
"Article not found",
status_code=404,
)
return Response(
FormexArticleConverter(language=language).convert_article(article),
media_type="text/html",
)
@api_router.get("/{celex_id}/articles/{article_id}/{parag_id}/{language}")
def paragraph(
celex_id: str,
article_id: int,
parag_id: int,
language: Language = Language.ENG,
):
"""
Fetch a paragraph within an article from the server.
"""
xml = _get_fmx4_data(celex_id, language)
parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id)
if parag is None:
return Response(
"Paragraph not found",
status_code=404,
)
return Response(
FormexArticleConverter(language=language)._convert_parag(parag),
media_type="text/html",
)
app.include_router(api_router, prefix="/api")