import lxml.etree as ET from fastapi import APIRouter, FastAPI, Response from fastapi.middleware.cors import CORSMiddleware from formex_viewer.formex4 import ( FormexArticleConverter, extract_article, extract_paragraph, ) from formex_viewer.main import ( CellarClient, CellarIdentifier, ContentType, Language, SystemName, ) origins = [ "http://localhost:5173", ] app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) type CacheKey = tuple[str, Language] CACHE: dict[CacheKey, str] = {} def _get_fmx4_data(celex_id: str, language: Language) -> ET.Element: """ Fetch the FMX4 data from the server. """ if (celex_id, language) in CACHE: return CACHE[(celex_id, language)] client = CellarClient(language) cellar_id = CellarIdentifier( system_name=SystemName.CELEX, system_id=celex_id, ) fmx4_data = client.publication_text(cellar_id, ContentType.ZIP_FMX4) xml = ET.fromstring(fmx4_data.encode("utf-8")) CACHE[(celex_id, language)] = xml return xml api_router = APIRouter() @api_router.get("/{celex_id}/articles") def article_ids(celex_id: str, language: Language = Language.ENG): """ Fetch the article IDs from the server. """ xml = _get_fmx4_data(celex_id, language) article_xpath = "//ARTICLE/@IDENTIFIER" article_ids = xml.xpath(article_xpath) article_ids = [int(article_id.lstrip("0")) for article_id in article_ids] article_ids.sort() return article_ids @api_router.get("/{celex_id}/toc/{language}") def toc(celex_id: str, language: Language = Language.ENG): def _extract_text(root: ET.Element, tag: str) -> str: """ Extract text from the given tag in the XML element. """ text = root.xpath(f"{tag}//text()") return "".join(text) if text else "" def _handle_division(division: ET.Element, level: int): title = _extract_text(division, "TITLE/TI") subtitle = _extract_text(division, "TITLE/STI") subdivisions = [] for subdivision in division.xpath("DIVISION") or []: subdivisions.append(_handle_division(subdivision, level + 1)) articles = [] for article in division.xpath("ARTICLE") or []: art_id = article.get("IDENTIFIER") if not art_id: continue art_title = _extract_text(article, "TI.ART") art_subtitle = _extract_text(article, "STI.ART") articles.append( { "id": int(art_id.lstrip("0")), "type": "article", "title": art_title, "subtitle": art_subtitle, } ) return { "type": "division", "title": title, "subtitle": subtitle, "level": level, "content": subdivisions + articles, } """ Fetch the table of contents from the server. """ xml = _get_fmx4_data(celex_id, language) toc = [] for division in xml.xpath("//ENACTING.TERMS/DIVISION"): toc.append(_handle_division(division, 0)) return toc @api_router.get("/{celex_id}/articles/{article_id}/{language}") def article( celex_id: str, article_id: int, language: Language = Language.ENG, ): """ Fetch an article from the server. """ xml = _get_fmx4_data(celex_id, language) article = extract_article(xml, article_id=article_id) if article is None: return Response( "Article not found", status_code=404, ) return Response( FormexArticleConverter(language=language).convert_article(article), media_type="text/html", ) @api_router.get("/{celex_id}/articles/{article_id}/{parag_id}/{language}") def paragraph( celex_id: str, article_id: int, parag_id: int, language: Language = Language.ENG, ): """ Fetch a paragraph within an article from the server. """ xml = _get_fmx4_data(celex_id, language) parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id) if parag is None: return Response( "Paragraph not found", status_code=404, ) return Response( FormexArticleConverter(language=language)._convert_parag(parag), media_type="text/html", ) app.include_router(api_router, prefix="/api")