formex-viewer/src/formex_viewer/server.py

import lxml.etree as ET
from fastapi import APIRouter, FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware

from formex_viewer.formex4 import (
    FormexArticleConverter,
    extract_article,
    extract_paragraph,
)
from formex_viewer.main import (
    CellarClient,
    CellarIdentifier,
    ContentType,
    Language,
    SystemName,
)

origins = [
    "http://localhost:5173",
]
app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

type CacheKey = tuple[str, Language]

CACHE: dict[CacheKey, str] = {}


def _get_fmx4_data(celex_id: str, language: Language) -> ET.Element:
    """
    Fetch the FMX4 data from the server.
    """

    if (celex_id, language) in CACHE:
        return CACHE[(celex_id, language)]

    client = CellarClient(language)
    cellar_id = CellarIdentifier(
        system_name=SystemName.CELEX,
        system_id=celex_id,
    )
    fmx4_data = client.publication_text(cellar_id, ContentType.ZIP_FMX4)

    xml = ET.fromstring(fmx4_data.encode("utf-8"))
    CACHE[(celex_id, language)] = xml

    return xml


api_router = APIRouter()


@api_router.get("/{celex_id}/articles")
def article_ids(celex_id: str, language: Language = Language.ENG):
    """
    Fetch the article IDs from the server.
    """
    xml = _get_fmx4_data(celex_id, language)

    article_xpath = "//ARTICLE/@IDENTIFIER"
    article_ids = xml.xpath(article_xpath)
    article_ids = [int(article_id.lstrip("0")) for article_id in article_ids]
    article_ids.sort()
    return article_ids


@api_router.get("/{celex_id}/toc/{language}")
def toc(celex_id: str, language: Language = Language.ENG):
    def _extract_text(root: ET.Element, tag: str) -> str:
        """
        Extract text from the given tag in the XML element.
        """
        text = root.xpath(f"{tag}//text()")
        return "".join(text) if text else ""

    def _handle_division(division: ET.Element, level: int):
        title = _extract_text(division, "TITLE/TI")
        subtitle = _extract_text(division, "TITLE/STI")

        subdivisions = []
        for subdivision in division.xpath("DIVISION") or []:
            subdivisions.append(_handle_division(subdivision, level + 1))

        articles = []
        for article in division.xpath("ARTICLE") or []:
            art_id = article.get("IDENTIFIER")
            if not art_id:
                continue

            art_title = _extract_text(article, "TI.ART")
            art_subtitle = _extract_text(article, "STI.ART")
            articles.append(
                {
                    "id": int(art_id.lstrip("0")),
                    "type": "article",
                    "title": art_title,
                    "subtitle": art_subtitle,
                }
            )

        return {
            "type": "division",
            "title": title,
            "subtitle": subtitle,
            "level": level,
            "content": subdivisions + articles,
        }

    """
    Fetch the table of contents from the server.
    """
    xml = _get_fmx4_data(celex_id, language)
    toc = []

    for division in xml.xpath("//ENACTING.TERMS/DIVISION"):
        toc.append(_handle_division(division, 0))

    return toc


@api_router.get("/{celex_id}/articles/{article_id}/{language}")
def article(
    celex_id: str,
    article_id: int,
    language: Language = Language.ENG,
):
    """
    Fetch an article from the server.
    """
    xml = _get_fmx4_data(celex_id, language)
    article = extract_article(xml, article_id=article_id)

    if article is None:
        return Response(
            "Article not found",
            status_code=404,
        )

    return Response(
        FormexArticleConverter(language=language).convert_article(article),
        media_type="text/html",
    )


@api_router.get("/{celex_id}/articles/{article_id}/{parag_id}/{language}")
def paragraph(
    celex_id: str,
    article_id: int,
    parag_id: int,
    language: Language = Language.ENG,
):
    """
    Fetch a paragraph within an article from the server.
    """
    xml = _get_fmx4_data(celex_id, language)
    parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id)
    if parag is None:
        return Response(
            "Paragraph not found",
            status_code=404,
        )

    return Response(
        FormexArticleConverter(language=language)._convert_parag(parag),
        media_type="text/html",
    )


app.include_router(api_router, prefix="/api")