176 lines
4.5 KiB
Python
176 lines
4.5 KiB
Python
import lxml.etree as ET
|
|
from fastapi import APIRouter, FastAPI, Response
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
from formex_viewer.formex4 import (
|
|
FormexArticleConverter,
|
|
extract_article,
|
|
extract_paragraph,
|
|
)
|
|
from formex_viewer.main import (
|
|
CellarClient,
|
|
CellarIdentifier,
|
|
ContentType,
|
|
Language,
|
|
SystemName,
|
|
)
|
|
|
|
origins = [
|
|
"http://localhost:5173",
|
|
]
|
|
app = FastAPI()
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=origins,
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
type CacheKey = tuple[str, Language]
|
|
|
|
CACHE: dict[CacheKey, str] = {}
|
|
|
|
|
|
def _get_fmx4_data(celex_id: str, language: Language) -> ET.Element:
|
|
"""
|
|
Fetch the FMX4 data from the server.
|
|
"""
|
|
|
|
if (celex_id, language) in CACHE:
|
|
return CACHE[(celex_id, language)]
|
|
|
|
client = CellarClient(language)
|
|
cellar_id = CellarIdentifier(
|
|
system_name=SystemName.CELEX,
|
|
system_id=celex_id,
|
|
)
|
|
fmx4_data = client.publication_text(cellar_id, ContentType.ZIP_FMX4)
|
|
|
|
xml = ET.fromstring(fmx4_data.encode("utf-8"))
|
|
CACHE[(celex_id, language)] = xml
|
|
|
|
return xml
|
|
|
|
|
|
api_router = APIRouter()
|
|
|
|
|
|
@api_router.get("/{celex_id}/articles")
|
|
def article_ids(celex_id: str, language: Language = Language.ENG):
|
|
"""
|
|
Fetch the article IDs from the server.
|
|
"""
|
|
xml = _get_fmx4_data(celex_id, language)
|
|
|
|
article_xpath = "//ARTICLE/@IDENTIFIER"
|
|
article_ids = xml.xpath(article_xpath)
|
|
article_ids = [int(article_id.lstrip("0")) for article_id in article_ids]
|
|
article_ids.sort()
|
|
return article_ids
|
|
|
|
|
|
@api_router.get("/{celex_id}/toc/{language}")
|
|
def toc(celex_id: str, language: Language = Language.ENG):
|
|
def _extract_text(root: ET.Element, tag: str) -> str:
|
|
"""
|
|
Extract text from the given tag in the XML element.
|
|
"""
|
|
text = root.xpath(f"{tag}//text()")
|
|
return "".join(text) if text else ""
|
|
|
|
def _handle_division(division: ET.Element, level: int):
|
|
title = _extract_text(division, "TITLE/TI")
|
|
subtitle = _extract_text(division, "TITLE/STI")
|
|
|
|
subdivisions = []
|
|
for subdivision in division.xpath("DIVISION") or []:
|
|
subdivisions.append(_handle_division(subdivision, level + 1))
|
|
|
|
articles = []
|
|
for article in division.xpath("ARTICLE") or []:
|
|
art_id = article.get("IDENTIFIER")
|
|
if not art_id:
|
|
continue
|
|
|
|
art_title = _extract_text(article, "TI.ART")
|
|
art_subtitle = _extract_text(article, "STI.ART")
|
|
articles.append(
|
|
{
|
|
"id": int(art_id.lstrip("0")),
|
|
"type": "article",
|
|
"title": art_title,
|
|
"subtitle": art_subtitle,
|
|
}
|
|
)
|
|
|
|
return {
|
|
"type": "division",
|
|
"title": title,
|
|
"subtitle": subtitle,
|
|
"level": level,
|
|
"content": subdivisions + articles,
|
|
}
|
|
|
|
"""
|
|
Fetch the table of contents from the server.
|
|
"""
|
|
xml = _get_fmx4_data(celex_id, language)
|
|
toc = []
|
|
|
|
for division in xml.xpath("//ENACTING.TERMS/DIVISION"):
|
|
toc.append(_handle_division(division, 0))
|
|
|
|
return toc
|
|
|
|
|
|
@api_router.get("/{celex_id}/articles/{article_id}/{language}")
|
|
def article(
|
|
celex_id: str,
|
|
article_id: int,
|
|
language: Language = Language.ENG,
|
|
):
|
|
"""
|
|
Fetch an article from the server.
|
|
"""
|
|
xml = _get_fmx4_data(celex_id, language)
|
|
article = extract_article(xml, article_id=article_id)
|
|
|
|
if article is None:
|
|
return Response(
|
|
"Article not found",
|
|
status_code=404,
|
|
)
|
|
|
|
return Response(
|
|
FormexArticleConverter(language=language).convert_article(article),
|
|
media_type="text/html",
|
|
)
|
|
|
|
|
|
@api_router.get("/{celex_id}/articles/{article_id}/{parag_id}/{language}")
|
|
def paragraph(
|
|
celex_id: str,
|
|
article_id: int,
|
|
parag_id: int,
|
|
language: Language = Language.ENG,
|
|
):
|
|
"""
|
|
Fetch a paragraph within an article from the server.
|
|
"""
|
|
xml = _get_fmx4_data(celex_id, language)
|
|
parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id)
|
|
if parag is None:
|
|
return Response(
|
|
"Paragraph not found",
|
|
status_code=404,
|
|
)
|
|
|
|
return Response(
|
|
FormexArticleConverter(language=language)._convert_parag(parag),
|
|
media_type="text/html",
|
|
)
|
|
|
|
|
|
app.include_router(api_router, prefix="/api")
|