Compare commits

..

22 Commits

Author SHA1 Message Date
Adrian Rumpold
58bd1160c1 fix: Improve rendering of TOC panel 2025-05-20 12:08:14 +02:00
Adrian Rumpold
debaf567ea feat: Add additional example legislation 2025-05-20 12:07:58 +02:00
Adrian Rumpold
56d271d0df fix: Correctly convert single paragraph in FastAPI 2025-05-20 09:14:47 +02:00
Adrian Rumpold
f0d4214d17 fix: Preserve XML tree order in Formex parser 2025-05-20 09:05:32 +02:00
Adrian Rumpold
56b5e3e3a4 fix: Type hints in Formex parser 2025-05-20 08:37:16 +02:00
Adrian Rumpold
1d467c827a Fix panels to equal size 2025-05-07 13:00:45 +02:00
Adrian Rumpold
1aa2f541dc Keep history 2025-04-30 12:17:43 +02:00
Adrian Rumpold
2886989835 Fix history state manipulation 2025-04-30 12:15:44 +02:00
Adrian Rumpold
7dd913df7b Paragraph-level links, preview tooltips 2025-04-30 12:04:38 +02:00
Adrian Rumpold
ea7885eeee Update README 2025-04-30 08:32:11 +02:00
Adrian Rumpold
894d4f50ef Add high-level architecture to README 2025-04-29 14:12:32 +02:00
Adrian Rumpold
8aced6c67a Fix cross-ref hyperlink behavior 2025-04-29 10:15:00 +02:00
Adrian Rumpold
860c67b00b Fix xref links 2025-04-29 09:41:51 +02:00
Adrian Rumpold
04f46e3893 Article cross-reference linking 2025-04-29 09:34:14 +02:00
Adrian Rumpold
9597ccc3bd Fix Panel test setup 2025-04-29 07:37:15 +02:00
Adrian Rumpold
f113c72c10 Switch to Axios, Panel component test 2025-04-29 07:33:01 +02:00
Adrian Rumpold
3e5d465356 Tests and fixes for ArticleSelector component 2025-04-25 12:42:29 +02:00
Adrian Rumpold
727622755f More component tests 2025-04-25 11:31:31 +02:00
Adrian Rumpold
b281059218 Add test for the language switcher 2025-04-25 11:14:28 +02:00
Adrian Rumpold
00c32f72c2 Fix static asset serving in Modal app 2025-04-25 09:24:46 +02:00
Adrian Rumpold
abb415c380 Improved article selector with titles and structure 2025-04-25 09:18:53 +02:00
Adrian Rumpold
f21158c6c0 Correctly extract TOC text entries
The previous code could not correctly handle nested XML elements in the TOC text entries.
2025-04-25 09:18:23 +02:00
43 changed files with 6128 additions and 610 deletions

View File

@@ -0,0 +1,36 @@
## High-level architecture
```mermaid
flowchart TD
Cellar[Cellar API]
Frontend[React Frontend]
subgraph "FastAPI backend"
client[Cellar Client]
conv[Formex -> HTML renderer]
FastAPI[REST API]
end
Cellar --> meta@{ shape: docs, label: "XML/RDF Metadata"}
Cellar --> docs@{ shape: docs, label: "Formex 4 content"}
meta --> client
docs --> client
client -- Formex --> FastAPI
client -- Formex --> conv
conv -- HTML --> FastAPI
FastAPI -- TOC, HTML article text --> Frontend
```
## Resources
- Formex 4
- [XML schema](https://op.europa.eu/documents/3938058/5910419/formex_manual_on_screen_version.html/)
- [Physical specifications](https://op.europa.eu/documents/3938058/5896514/physical-specifications/)
- [Cellar](https://op.europa.eu/en/web/cellar/home)
- [Publications API](https://op.europa.eu/en/web/cellar/cellar-data/publications)
- [Metadata REST API](https://op.europa.eu/en/web/cellar/cellar-data/metadata/metadata-notices)
- [Metadata SPARQL API](https://op.europa.eu/en/web/cellar/cellar-data/metadata/knowledge-graph)

View File

@@ -0,0 +1,64 @@
import { act } from "@testing-library/react";
import type * as ZustandExportedTypes from "zustand";
export * from "zustand";
const { create: actualCreate, createStore: actualCreateStore } =
jest.requireActual<typeof ZustandExportedTypes>("zustand");
// a variable to hold reset functions for all stores declared in the app
export const storeResetFns = new Set<() => void>();
const createUncurried = <T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
const store = actualCreate(stateCreator);
const initialState = store.getInitialState();
storeResetFns.add(() => {
store.setState(initialState, true);
});
return store;
};
// when creating a store, we get its initial state, create a reset function and add it in the set
export const create = (<T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
console.log("zustand create mock");
// to support curried version of create
return typeof stateCreator === "function"
? createUncurried(stateCreator)
: createUncurried;
}) as typeof ZustandExportedTypes.create;
const createStoreUncurried = <T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
const store = actualCreateStore(stateCreator);
const initialState = store.getInitialState();
storeResetFns.add(() => {
store.setState(initialState, true);
});
return store;
};
// when creating a store, we get its initial state, create a reset function and add it in the set
export const createStore = (<T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
console.log("zustand createStore mock");
// to support curried version of createStore
return typeof stateCreator === "function"
? createStoreUncurried(stateCreator)
: createStoreUncurried;
}) as typeof ZustandExportedTypes.createStore;
// reset all stores after each test run
afterEach(() => {
act(() => {
storeResetFns.forEach((resetFn) => {
resetFn();
});
});
});

View File

@@ -1,28 +1,33 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
import js from "@eslint/js";
import pluginQuery from "@tanstack/eslint-plugin-query";
import reactHooks from "eslint-plugin-react-hooks";
import reactRefresh from "eslint-plugin-react-refresh";
import globals from "globals";
import tseslint from "typescript-eslint";
export default tseslint.config(
{ ignores: ['dist'] },
{ ignores: ["dist"] },
{
extends: [js.configs.recommended, ...tseslint.configs.recommended],
files: ['**/*.{ts,tsx}'],
extends: [
js.configs.recommended,
...tseslint.configs.recommended,
...pluginQuery.configs["flat/recommended"],
],
files: ["**/*.{ts,tsx}"],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
plugins: {
'react-hooks': reactHooks,
'react-refresh': reactRefresh,
"react-hooks": reactHooks,
"react-refresh": reactRefresh,
},
rules: {
...reactHooks.configs.recommended.rules,
'react-refresh/only-export-components': [
'warn',
"react-refresh/only-export-components": [
"warn",
{ allowConstantExport: true },
],
},
},
)
}
);

12
frontend/jest.config.js Normal file
View File

@@ -0,0 +1,12 @@
/** @type {import('ts-jest').JestConfigWithTsJest} **/
export default {
preset: "ts-jest",
testEnvironment: "jsdom",
transform: {
"^.+\\.tsx?$": ["ts-jest", { tsconfig: "tsconfig.app.json" }],
},
moduleNameMapper: {
"\\.(css|less|scss|sss|styl)$": "<rootDir>/node_modules/jest-css-modules",
},
setupFilesAfterEnv: ["<rootDir>/jest.setup.ts"],
};

1
frontend/jest.setup.ts Normal file
View File

@@ -0,0 +1 @@
import "@testing-library/jest-dom";

File diff suppressed because it is too large Load Diff

View File

@@ -7,11 +7,14 @@
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview"
"preview": "vite preview",
"test": "jest"
},
"dependencies": {
"@floating-ui/react": "^0.27.8",
"@tanstack/react-query": "^5.74.4",
"@tanstack/react-query-devtools": "^5.74.6",
"axios": "^1.9.0",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-router-dom": "^7.5.1",
@@ -19,14 +22,25 @@
},
"devDependencies": {
"@eslint/js": "^9.22.0",
"@jest/globals": "^29.7.0",
"@tanstack/eslint-plugin-query": "^5.73.3",
"@types/react": "^19.0.10",
"@types/react-dom": "^19.0.4",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/jest": "^29.5.14",
"@types/react": "^19.1.2",
"@types/react-dom": "^19.1.2",
"@vitejs/plugin-react": "^4.3.4",
"eslint": "^9.22.0",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.19",
"globals": "^16.0.0",
"jest": "^29.7.0",
"jest-css-modules": "^2.1.0",
"jest-environment-jsdom": "^29.7.0",
"ts-jest": "^29.3.2",
"ts-node": "^10.9.2",
"typescript": "~5.7.2",
"typescript-eslint": "^8.26.1",
"typescript-plugin-css-modules": "^5.1.0",

View File

@@ -1,64 +1,42 @@
import { useQueries } from "@tanstack/react-query";
import { getArticleIds, getToc } from "./lib/api";
import { Language } from "./lib/types";
import ArticleSelector from "./components/ArticleSelector/ArticleSelector";
import Panel from "./components/Panel/Panel";
import TOC from "./components/TOC/TOC";
import useNavState from "./store/navStore";
import useUIStore from "./store/uiStore";
import styles from "./App.module.css";
import CelexSelector from "./components/CelexSelector/CelexSelector";
import { useTOC } from "./hooks/toc";
function App() {
const { numPanels, addPanel } = useUIStore();
const { celexId, articleId } = useNavState();
const results = useQueries({
queries: [
{
queryKey: ["articleIds", celexId],
queryFn: () => getArticleIds(celexId!),
enabled: !!celexId,
},
{
queryKey: ["toc", celexId],
queryFn: () => getToc(celexId!, Language.ENG),
enabled: !!celexId,
},
],
});
const isPending = results.some((result) => result.isPending);
const error = results.find((result) => result.isError);
const numPanels = useUIStore((state) => state.numPanels);
const addPanel = useUIStore((state) => state.addPanel);
const { data: toc, isPending, error } = useTOC();
if (isPending) {
return <div>Loading...</div>;
}
if (error) {
return <div>Error: {error.error?.message}</div>;
return <div>Error: {error.message}</div>;
}
return (
<div className={styles.App}>
<div className={styles.controls}>
<CelexSelector />
<ArticleSelector articleIds={results[0].data!} />
<ArticleSelector toc={toc} />
<button onClick={addPanel}>Add Panel</button>
</div>
<div className={styles.panelContainer}>
<TOC toc={results[1].data!} />
<TOC toc={toc} />
{Array.from({ length: numPanels }, (_, index) => (
<Panel
key={index}
celexId={celexId!}
language={
Object.values(Language)[index % Object.values(Language).length]
}
articleId={articleId!}
/>
))}
</div>

View File

@@ -0,0 +1,4 @@
.articleSelector {
width: 40ch;
text-overflow: ellipsis;
}

View File

@@ -0,0 +1,60 @@
import { render } from "@testing-library/react";
import type { Division } from "../../lib/types";
import ArticleSelector from "./ArticleSelector";
describe("ArticleSelector", () => {
const mockDivision: Division = {
type: "division",
title: "Chapter 1",
subtitle: "Introduction",
level: 0,
content: [
{
type: "division",
title: "Section 1.1",
subtitle: "Overview",
level: 2,
content: [
{
type: "article",
id: 1,
title: "Article 1",
subtitle: "Details",
},
{
type: "article",
id: 2,
title: "Article 2",
subtitle: "Summary",
},
],
},
],
};
test("renders a top-level division as an optgroup", () => {
const { getByRole } = render(<ArticleSelector toc={[mockDivision]} />);
const optgroup = getByRole("group", {
name: "Chapter 1: Introduction",
});
expect(optgroup).toBeInTheDocument();
});
test("renders nested divisions as options", () => {
const { getByText } = render(<ArticleSelector toc={[mockDivision]} />);
const option1 = getByText("Article 1: Details");
const option2 = getByText("Article 2: Summary");
expect(option1).toBeInTheDocument();
expect(option2).toBeInTheDocument();
});
test("flattens nested divisions correctly", () => {
const { container } = render(<ArticleSelector toc={[mockDivision]} />);
const options = container.querySelectorAll("option");
expect(options).toHaveLength(2); // Two options rendered
});
});

View File

@@ -1,33 +1,57 @@
import useNavState from "../../store/navStore";
import { Fragment, JSX } from "react";
import type { Division } from "../../lib/types";
import useNavStore from "../../store/navStore";
import styles from "./ArticleSelector.module.css";
type ArticleSelectorProps = {
articleIds: number[];
toc: Division[];
};
function ArticleSelector({ articleIds }: ArticleSelectorProps) {
const { articleId, setArticleId } = useNavState();
/**
* Renders a division and its contents as a nested structure of optgroups and options.
* @param {Division} div - The division to render.
* @returns {JSX.Element} The rendered division, suitable for use inside a `select` tag.
*/
function renderDivision(div: Division): JSX.Element {
const contents = div.content.map((c) => {
if (c.type === "division") {
return renderDivision(c);
} else {
const title = `${c.title}: ${c.subtitle}`;
return (
<option key={title} value={c.id}>
{title}
</option>
);
}
});
const title = `${div.title}: ${div.subtitle}`;
if (div.level === 0) {
return (
// For top-level divisions, we can use optgroup
<optgroup key={title} label={title}>
{contents}
</optgroup>
);
} else {
// HTML does not support nested optgroups, so we need to flatten the structure
return <Fragment key={title}>{contents}</Fragment>;
}
}
function ArticleSelector({ toc }: ArticleSelectorProps) {
const articleId = useNavStore.use.articleId();
const setArticleId = useNavStore.use.setArticleId();
return (
<>
{articleId && articleId > 1 && (
<button onClick={() => setArticleId(articleId - 1)}>prev</button>
)}
<select
value={articleId || undefined}
onChange={(e) => {
const id = parseInt(e.currentTarget.value);
setArticleId(id);
}}
className={styles.articleSelector}
onChange={(e) => setArticleId(parseInt(e.target.value))}
>
{articleIds.map((id) => (
<option key={id} value={id}>
Article {id}
</option>
))}
{toc.map(renderDivision)}
</select>
{articleId && articleId < articleIds[articleIds.length - 1] && (
<button onClick={() => setArticleId(articleId + 1)}>next</button>
)}
</>
);
}

View File

@@ -0,0 +1,40 @@
import { fireEvent, render } from "@testing-library/react";
import { examples } from "../../lib/examples";
import useNavStore from "../../store/navStore";
import CelexSelector from "./CelexSelector";
describe("CelexSelector", () => {
it("renders the dropdown with options", () => {
const { getByLabelText, getAllByRole, getByRole } = render(
<CelexSelector />
);
expect(getByLabelText("Select example:")).toBeInTheDocument();
expect(getByRole("combobox")).toBeInTheDocument();
const [def, ...options] = getAllByRole("option");
// First option is the disabled placeholder option
expect(def).toHaveValue("");
expect(def).toHaveTextContent("Select an example");
expect(def).toBeDisabled();
expect(options).toHaveLength(examples.length);
for (const i in examples) {
expect(options[i]).toHaveValue(examples[i].id);
expect(options[i]).toHaveTextContent(examples[i].name);
}
});
it("calls setCelexId and setArticleId on selection change", async () => {
const celexId = examples[2].id;
const { getByRole } = render(<CelexSelector />);
fireEvent.change(getByRole("combobox"), {
target: { value: celexId },
});
expect(useNavStore.getState().celexId).toEqual(celexId);
expect(useNavStore.getState().articleId).toEqual(1);
});
});

View File

@@ -1,13 +1,10 @@
import useNavState from "../../store/navStore";
const examples = [
{ name: "GDPR", id: "32016R0679" },
{ name: "AI Act", id: "32024R1689" },
{ name: "Cyber Resilience Act", id: "32024R2847" },
];
import { examples } from "../../lib/examples";
import useNavStore from "../../store/navStore";
function CelexSelector() {
const { celexId, setCelexId, setArticleId } = useNavState();
const celexId = useNavStore.use.celexId();
const setCelexId = useNavStore.use.setCelexId();
const setArticleId = useNavStore.use.setArticleId();
return (
<div>
@@ -20,6 +17,9 @@ function CelexSelector() {
setArticleId(1);
}}
>
<option value="" disabled>
Select an example
</option>
{examples.map((example) => (
<option key={example.id} value={example.id}>
{example.name}

View File

@@ -0,0 +1,44 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { Language } from "../../lib/types";
import LanguageSwitcher from "./LanguageSwitcher";
describe("LanguageSwitcher", () => {
const mockOnChange = jest.fn();
const renderSwitcher = (
onChange = mockOnChange,
defaultLang = Language.ENG
) => {
render(<LanguageSwitcher defaultLang={defaultLang} onChange={onChange} />);
};
beforeEach(() => {
jest.clearAllMocks();
});
test("renders with correct default language", async () => {
renderSwitcher();
expect(screen.getByRole("combobox")).toHaveValue(Language.ENG);
});
test("calls onChange handler when a new language is selected", async () => {
renderSwitcher();
await userEvent.selectOptions(screen.getByRole("combobox"), Language.ESP);
expect(mockOnChange).toHaveBeenCalledWith(Language.ESP);
});
test("renders all language options correctly", () => {
renderSwitcher();
const options = screen.getAllByRole("option");
const languageValues = Object.values(Language);
expect(options).toHaveLength(languageValues.length);
languageValues.forEach((lang, index) => {
expect(options[index]).toHaveValue(lang);
expect(options[index]).toHaveTextContent(lang.toUpperCase());
});
});
});

View File

@@ -10,6 +10,7 @@ function LanguageSwitcher({
}) {
return (
<select
data-testid="language-switcher"
defaultValue={defaultLang}
onChange={(ev) => onChange(ev.currentTarget.value as Language)}
className={styles.languageSwitcher}

View File

@@ -1,5 +1,5 @@
.panel {
flex: 1 auto;
flex: auto;
padding: 1rem;
border-radius: 8px;
border: 1px solid #ccc;

View File

@@ -0,0 +1,114 @@
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { fireEvent, render } from "@testing-library/react";
import React from "react";
import { getArticle } from "../../lib/api";
import { Language } from "../../lib/types";
import useNavStore from "../../store/navStore";
import useUIStore from "../../store/uiStore";
import Panel from "./Panel";
jest.mock("../../store/uiStore");
jest.mock("../../store/navStore");
jest.mock("../../lib/api");
jest.mock("../../constants", () =>
Promise.resolve({
API_URL: "http://localhost:8000/api", // Mock the API_URL to a local server for testing
})
);
const queryClient = new QueryClient({
defaultOptions: {
queries: {
retry: false,
},
},
});
const wrapper = ({ children }: { children: React.ReactNode }) => (
<QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
);
describe("Panel Component", () => {
const mockSetSelectedParagraphId = jest.fn();
const mockUseUIStore = {
selectedParagraphId: null,
setSelectedParagraphId: mockSetSelectedParagraphId,
};
const mockNavState = {
celexId: "123",
articleId: 1,
};
beforeEach(() => {
jest.mocked(useNavStore).mockReturnValue(mockNavState);
jest.mocked(useUIStore).mockReturnValue(mockUseUIStore);
});
afterEach(() => {
jest.clearAllMocks();
queryClient.clear();
});
test("renders loading state", () => {
(getArticle as jest.Mock).mockReturnValue(new Promise(() => {}));
const { getByText } = render(<Panel />, { wrapper });
expect(getByText("Loading...")).toBeInTheDocument();
});
test("renders error state", async () => {
(getArticle as jest.Mock).mockRejectedValue(new Error("Failed to fetch"));
const { findByText } = render(<Panel />, { wrapper });
expect(
await findByText("An error has occurred: Failed to fetch")
).toBeInTheDocument();
});
test("renders article content", async () => {
const mockData = `<div class='paragraph' data-paragraph-id='1'>Test Content</div>`;
jest.mocked(getArticle).mockResolvedValue(mockData);
const result = render(<Panel />, { wrapper });
expect(await result.findByText("Test Content")).toBeInTheDocument();
});
test("highlights a paragraph on click", async () => {
const mockData = `
<div class='paragraph' data-paragraph-id='1'>Paragraph 1</div>
<div class='paragraph' data-paragraph-id='2'>Paragraph 2</div>
`;
(getArticle as jest.Mock).mockResolvedValue(mockData);
const result = render(<Panel />, { wrapper });
const paragraph1 = await result.findByText("Paragraph 1");
const paragraph2 = await result.findByText("Paragraph 2");
fireEvent.click(paragraph1);
expect(paragraph1.classList.contains("highlight")).toBe(true);
expect(paragraph2.classList.contains("highlight")).toBe(false);
expect(mockSetSelectedParagraphId).toHaveBeenCalledWith("1");
fireEvent.click(paragraph2);
expect(paragraph1.classList.contains("highlight")).toBe(false);
expect(paragraph2.classList.contains("highlight")).toBe(true);
expect(mockSetSelectedParagraphId).toHaveBeenCalledWith("2");
});
test("renders LanguageSwitcher and updates language", async () => {
jest
.mocked(getArticle)
.mockResolvedValue(
"<div class='paragraph' data-paragraph-id='1'>Test Content</div>"
);
const result = render(<Panel language={Language.FRA} />, { wrapper });
const languageSwitcher = await result.findByRole("combobox");
expect(languageSwitcher).toBeInTheDocument();
fireEvent.change(languageSwitcher, { target: { value: Language.ENG } });
expect(jest.mocked(getArticle)).toHaveBeenCalledWith(
"123",
1,
Language.ENG
);
});
});

View File

@@ -1,35 +1,70 @@
import { useQuery } from "@tanstack/react-query";
import { useEffect, useRef, useState } from "react";
import { getArticle } from "../../lib/api";
import { Language } from "../../lib/types";
import useUIStore from "../../store/uiStore";
import LanguageSwitcher from "../LanguageSwitcher/LanguageSwitcher";
import { useArticle } from "../../hooks/useArticle";
import useNavStore from "../../store/navStore";
import "../../styles/PanelContent.css";
import ParagraphPreview from "../ParagraphPreview/ParagraphPreview";
import { Tooltip, TooltipContent } from "../Tooltip/Tooltip";
import styles from "./Panel.module.css";
type PanelProps = {
celexId: string;
language?: Language;
articleId: number;
};
function Panel({ celexId, language, articleId }: PanelProps) {
function Panel({ language }: PanelProps) {
const { selectedParagraphId, setSelectedParagraphId } = useUIStore();
const [lang, setLang] = useState(language || Language.ENG);
const articleRef = useRef<HTMLDivElement>(null);
const { data, isPending, error } = useQuery({
queryKey: ["article", celexId, articleId, lang],
queryFn: () => getArticle(celexId, articleId, lang),
enabled: !!celexId && !!articleId,
});
const { articleId, celexId, setArticleId } = useNavStore();
const { data, isPending, error } = useArticle(celexId, articleId, lang);
const [hoverArticleId, setHoverArticleId] = useState<number | null>(null);
const [hoverParagraphId, setHoverParagraphId] = useState<number | null>(null);
const [isTooltipOpen, setIsTooltipOpen] = useState(false);
useEffect(() => {
const articleElement = articleRef.current;
if (!articleElement) return;
// Replace cross-reference links with page navigation
const crossRefs = articleElement.querySelectorAll(
"a.cross-ref"
) as NodeListOf<HTMLAnchorElement>;
crossRefs.forEach((link) => {
const target = link.getAttribute("data-target");
const targetId = link.getAttribute("data-id");
const paragraphId = link.getAttribute("data-paragraph-id");
if (target && targetId) {
if (target === "article") {
// Replace link for easier copying, but navigate in-place to maintain UI state
link.setAttribute("href", `/${celexId}/articles/${targetId}`);
link.onclick = () => {
setArticleId(parseInt(targetId));
return false;
};
if (paragraphId) {
link.onmouseover = () => {
setHoverArticleId(parseInt(targetId));
setHoverParagraphId(parseInt(paragraphId));
setIsTooltipOpen(true);
};
link.onmouseout = () => {
setIsTooltipOpen(false);
};
}
}
} else {
console.warn("No target or ID found for link:", link);
}
});
const paragraphs = articleElement.querySelectorAll(".paragraph");
// Highlight the selected paragraph
@@ -66,11 +101,15 @@ function Panel({ celexId, language, articleId }: PanelProps) {
// Cleanup event listeners
return () => {
console.log("Cleaning up event listeners");
// crossRefs.forEach((link) => {
// link.onmouseover = null;
// link.onmouseout = null;
// });
paragraphs.forEach((element) => {
element.removeEventListener("click", handleClick(element));
});
};
}, [articleRef, data, selectedParagraphId, setSelectedParagraphId]);
});
if (isPending) return "Loading...";
if (error) return "An error has occurred: " + error.message;
@@ -81,10 +120,24 @@ function Panel({ celexId, language, articleId }: PanelProps) {
defaultLang={lang}
onChange={setLang}
></LanguageSwitcher>
<Tooltip
open={isTooltipOpen}
placement="right-start"
onOpenChange={setIsTooltipOpen}
>
<TooltipContent>
<ParagraphPreview
celexId={celexId!}
articleId={hoverArticleId!}
paragraphId={hoverParagraphId!}
lang={lang}
/>
</TooltipContent>
</Tooltip>
<div
ref={articleRef}
lang={lang.substring(0, 2)}
dangerouslySetInnerHTML={{ __html: data || "" }}
dangerouslySetInnerHTML={{ __html: data }}
/>
</div>
);

View File

@@ -0,0 +1,34 @@
import { useParagraph } from "../../hooks/useParagraph";
import { Language } from "../../lib/types";
type ParagraphPreviewProps = {
celexId: string;
articleId: number;
paragraphId: number;
lang: Language;
};
function ParagraphPreview({
celexId,
articleId,
paragraphId,
lang,
}: ParagraphPreviewProps) {
const { data, isPending, error } = useParagraph(
celexId,
articleId,
paragraphId,
lang
);
if (isPending) {
return <p>Loading...</p>;
}
if (error) {
return <p>Error loading paragraph: {error.message}</p>;
}
return <div dangerouslySetInnerHTML={{ __html: data }} />;
}
export default ParagraphPreview;

View File

@@ -1,16 +1,15 @@
.toc {
font-size: 0.8rem;
min-width: 25vw;
flex: 1 auto;
flex: 1 0 25vw;
&.hidden {
flex: 0 0;
min-width: 0;
display: none;
}
transition: flex-basis 0.1s ease-in-out;
overflow-y: scroll;
overflow-x: wrap;
height: 100vh;
.tocDivision {
margin-block: 0.5rem;

View File

@@ -1,16 +1,14 @@
import { useState } from "react";
import { Division } from "../../lib/types";
import useNavState from "../../store/navStore";
import useNavStore from "../../store/navStore";
import styles from "./TOC.module.css";
type TOC = Division[];
type TOCProps = {
toc: TOC;
toc: Division[];
};
function TOC({ toc }: TOCProps) {
const { articleId, setArticleId } = useNavState();
const { articleId, setArticleId } = useNavStore();
function containsArticle(division: Division, articleId: number): boolean {
return division.content.some((c) => {
@@ -57,15 +55,17 @@ function TOC({ toc }: TOCProps) {
const [isVisible, setIsVisible] = useState(true);
return (
<nav className={[styles.toc, isVisible ? "" : styles.hidden].join(" ")}>
<>
<button
onClick={() => setIsVisible(!isVisible)}
className={styles.toggleButton}
>
{isVisible ? "<" : ">"}
</button>
<nav className={[styles.toc, isVisible ? "" : styles.hidden].join(" ")}>
{toc.map((division) => renderDivision(division))}
</nav>
</>
);
}
export default TOC;

View File

@@ -0,0 +1,10 @@
.Tooltip {
background-color: #444;
color: white;
font-size: 90%;
padding: 4px 8px;
border-radius: 4px;
box-sizing: border-box;
width: max-content;
max-width: 60ch;
}

View File

@@ -0,0 +1,163 @@
import type { Placement } from "@floating-ui/react";
import {
autoUpdate,
flip,
FloatingPortal,
offset,
shift,
useDismiss,
useFloating,
useFocus,
useHover,
useInteractions,
useMergeRefs,
useRole,
} from "@floating-ui/react";
import * as React from "react";
import styles from "./Tooltip.module.css";
interface TooltipOptions {
initialOpen?: boolean;
placement?: Placement;
open?: boolean;
onOpenChange?: (open: boolean) => void;
}
export function useTooltip({
initialOpen = false,
placement = "top",
open: controlledOpen,
onOpenChange: setControlledOpen,
}: TooltipOptions = {}) {
const [uncontrolledOpen, setUncontrolledOpen] = React.useState(initialOpen);
const open = controlledOpen ?? uncontrolledOpen;
const setOpen = setControlledOpen ?? setUncontrolledOpen;
const data = useFloating({
placement,
open,
onOpenChange: setOpen,
whileElementsMounted: autoUpdate,
middleware: [
offset(5),
flip({
crossAxis: placement.includes("-"),
fallbackAxisSideDirection: "start",
padding: 5,
}),
shift({ padding: 5 }),
],
});
const context = data.context;
const hover = useHover(context, {
move: false,
enabled: controlledOpen == null,
});
const focus = useFocus(context, {
enabled: controlledOpen == null,
});
const dismiss = useDismiss(context);
const role = useRole(context, { role: "tooltip" });
const interactions = useInteractions([hover, focus, dismiss, role]);
return React.useMemo(
() => ({
open,
setOpen,
...interactions,
...data,
}),
[open, setOpen, interactions, data]
);
}
type ContextType = ReturnType<typeof useTooltip> | null;
const TooltipContext = React.createContext<ContextType>(null);
export const useTooltipContext = () => {
const context = React.useContext(TooltipContext);
if (context == null) {
throw new Error("Tooltip components must be wrapped in <Tooltip />");
}
return context;
};
export function Tooltip({
children,
...options
}: { children: React.ReactNode } & TooltipOptions) {
// This can accept any props as options, e.g. `placement`,
// or other positioning options.
const tooltip = useTooltip(options);
return (
<TooltipContext.Provider value={tooltip}>
{children}
</TooltipContext.Provider>
);
}
export const TooltipTrigger = React.forwardRef<
HTMLElement,
React.HTMLProps<HTMLElement> & { asChild?: boolean }
>(function TooltipTrigger({ children, asChild = false, ...props }, propRef) {
const context = useTooltipContext();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const childrenRef = (children as any).ref;
const ref = useMergeRefs([context.refs.setReference, propRef, childrenRef]);
// `asChild` allows the user to pass any element as the anchor
if (asChild && React.isValidElement(children)) {
return React.cloneElement(
children,
context.getReferenceProps({
ref,
...props,
...children.props,
"data-state": context.open ? "open" : "closed",
})
);
}
return (
<button
ref={ref}
// The user can style the trigger based on the state
data-state={context.open ? "open" : "closed"}
{...context.getReferenceProps(props)}
>
{children}
</button>
);
});
export const TooltipContent = React.forwardRef<
HTMLDivElement,
React.HTMLProps<HTMLDivElement>
>(function TooltipContent({ style, ...props }, propRef) {
const context = useTooltipContext();
const ref = useMergeRefs([context.refs.setFloating, propRef]);
if (!context.open) return null;
return (
<FloatingPortal>
<div
ref={ref}
className={styles.Tooltip}
style={{
...context.floatingStyles,
...style,
}}
{...context.getFloatingProps(props)}
/>
</FloatingPortal>
);
});

View File

@@ -0,0 +1,3 @@
const { VITE_API_URL: API_URL } = import.meta.env;
export { API_URL };

14
frontend/src/hooks/toc.ts Normal file
View File

@@ -0,0 +1,14 @@
import { useQuery } from "@tanstack/react-query";
import { getToc } from "../lib/api";
import { Language } from "../lib/types";
import useNavStore from "../store/navStore";
export const useTOC = () => {
const celexId = useNavStore((state) => state.celexId);
const query = useQuery({
queryKey: ["toc", celexId],
queryFn: () => getToc(celexId!, Language.ENG),
enabled: !!celexId,
});
return query;
};

View File

@@ -1,11 +1,15 @@
import { useEffect } from "react";
import { useNavigate, useParams } from "react-router-dom";
import useNavState from "../store/navStore";
import useNavStore from "../store/navStore";
export const useUrlSync = () => {
const navigate = useNavigate();
const { celexId: paramCelexId, articleId: paramArticleId } = useParams();
const { articleId, celexId, setArticleId, setCelexId } = useNavState();
const celexId = useNavStore.use.celexId();
const setCelexId = useNavStore.use.setCelexId();
const articleId = useNavStore.use.articleId();
const setArticleId = useNavStore.use.setArticleId();
// Effect to read from URL when URL changes
useEffect(() => {
@@ -26,13 +30,13 @@ export const useUrlSync = () => {
// Update the URL when Zustand changes
useEffect(() => {
if (celexId === null) {
if (!celexId) {
return;
}
let targetUrl = `/${celexId}`;
if (articleId !== null) {
targetUrl += `/articles/${articleId}`;
}
navigate(targetUrl, { replace: true });
navigate(targetUrl);
}, [navigate, celexId, articleId]); // Only sync URL when Zustand changes
};

View File

@@ -0,0 +1,15 @@
import { useQuery } from "@tanstack/react-query";
import { getArticle } from "../lib/api";
import { Language } from "../lib/types";
export const useArticle = (
celexId: string | null,
articleId: number | null,
lang: Language
) => {
return useQuery({
queryKey: ["article", celexId, articleId, lang],
queryFn: () => getArticle(celexId!, articleId!, lang),
enabled: !!celexId && !!articleId,
});
};

View File

@@ -0,0 +1,16 @@
import { useQuery } from "@tanstack/react-query";
import { getParagraph } from "../lib/api";
import { Language } from "../lib/types";
export const useParagraph = (
celexId: string | null,
articleId: number | null,
paragraphId: number | null,
lang: Language
) => {
return useQuery({
queryKey: ["paragraph", celexId, articleId, paragraphId, lang],
queryFn: () => getParagraph(celexId!, articleId!, paragraphId!, lang),
enabled: !!celexId && !!articleId && !!paragraphId,
});
};

View File

@@ -1,7 +1,14 @@
import TOC from "../components/TOC/TOC";
import { Language } from "./types";
import Axios from "axios";
import { API_URL } from "../constants";
import { Division, Language } from "./types";
const API_URL = import.meta.env.VITE_API_URL;
const axios = Axios.create({
baseURL: API_URL,
timeout: 5000,
headers: {
"Content-Type": "application/json",
},
});
async function getArticle(
celexId: string,
@@ -11,22 +18,40 @@ async function getArticle(
console.debug(
`Fetching article ${article} for CELEX ID ${celexId} in ${language} language`
);
const response = await fetch(
`${API_URL}/${celexId}/articles/${article}/${language}`
const response = await axios.get<string>(
`${celexId}/articles/${article}/${language}`
);
return await response.text();
return response.data;
}
async function getParagraph(
celexId: string,
article: number,
paragraph: number,
language: string
): Promise<string> {
console.debug(
`Fetching article ${article} paragraph ${paragraph} for CELEX ID ${celexId} in ${language} language`
);
const response = await axios.get<string>(
`${celexId}/articles/${article}/${paragraph}/${language}`
);
return response.data;
}
async function getArticleIds(celexId: string): Promise<number[]> {
console.debug(`Fetching article list for CELEX ID ${celexId}`);
const response = await fetch(`${API_URL}/${celexId}/articles`);
return await response.json();
const response = await axios.get<number[]>(`${celexId}/articles`);
return response.data;
}
async function getToc(celexId: string, language: Language): Promise<TOC> {
async function getToc(
celexId: string,
language: Language
): Promise<Division[]> {
console.debug(`Fetching TOC for CELEX ID ${celexId}`);
const response = await fetch(`${API_URL}/${celexId}/toc/${language}`);
return await response.json();
const response = await axios.get<Division[]>(`${celexId}/toc/${language}`);
return response.data;
}
export { getArticle, getArticleIds, getToc };
export { getArticle, getArticleIds, getParagraph, getToc };

View File

@@ -0,0 +1,9 @@
export const examples = [
{ name: "GDPR", id: "32016R0679" },
{ name: "AI Act", id: "32024R1689" },
{ name: "Cybersecurity Act", id: "32019R0881" },
{ name: "Cyber Resilience Act", id: "32024R2847" },
{ name: "Medical Device Regulation", id: "32017R0745" },
{ name: "NIS 2 Directive", id: "32022L2555" },
{ name: "Digital Services Act", id: "32022R2065" },
];

View File

@@ -16,7 +16,7 @@ type Division = {
title: string;
subtitle: string;
level: number;
content: Article[] | Division[];
content: (Article | Division)[];
};
export { Language };

View File

@@ -1,10 +1,10 @@
import App from "../App";
import { useUrlSync } from "../hooks/urlSync";
import useNavState from "../store/navStore";
import useNavStore from "../store/navStore";
function MainView() {
useUrlSync();
const celexId = useNavState((state) => state.celexId);
const celexId = useNavStore.use.celexId();
if (!celexId) {
return <div>Error: No CELEX ID provided</div>;

View File

@@ -1,17 +1,19 @@
import { create } from "zustand";
import { create, StateCreator } from "zustand";
import { createSelectors } from "../util/zustand";
interface NavState {
export type NavStore = {
celexId: string | null;
articleId: number | null;
setCelexId: (celexId: string) => void;
setArticleId: (articleId: number | null) => void;
}
};
const useNavState = create<NavState>((set) => ({
celexId: null,
articleId: null,
const navStoreCreator: StateCreator<NavStore> = (set) => ({
celexId: "",
articleId: 1,
setCelexId: (celexId) => set({ celexId }),
setArticleId: (articleId) => set({ articleId }),
}));
});
export default useNavState;
const useNavStore = createSelectors(create<NavStore>()(navStoreCreator));
export default useNavStore;

View File

@@ -11,11 +11,11 @@ interface UIState {
const useUIStore = create<UIState>((set) => ({
numPanels: 1,
selectedParagraphId: null,
addPanel: () => set((state) => ({ numPanels: state.numPanels + 1 })),
removePanel: () =>
set((state) => ({ numPanels: Math.max(state.numPanels - 1, 1) })),
setSelectedParagraphId: (selectedParagraphId: string | null) =>
set({ selectedParagraphId }),
setSelectedParagraphId: (selectedParagraphId) => set({ selectedParagraphId }),
}));
export default useUIStore;

View File

@@ -5,6 +5,8 @@
}
article {
max-width: 64ch;
.list-lower-alpha {
list-style-type: lower-alpha;
}

View File

@@ -0,0 +1,18 @@
import { StoreApi, UseBoundStore } from "zustand";
type WithSelectors<S> = S extends { getState: () => infer T }
? S & { use: { [K in keyof T]: () => T[K] } }
: never;
export const createSelectors = <S extends UseBoundStore<StoreApi<object>>>(
_store: S
) => {
const store = _store as WithSelectors<typeof _store>;
store.use = {};
for (const k of Object.keys(store.getState())) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(store.use as any)[k] = () => store((s) => s[k as keyof typeof s]);
}
return store;
};

View File

@@ -24,5 +24,5 @@
"plugins": [{ "name": "typescript-plugin-css-modules" }]
},
"include": ["src"]
"include": ["src", "./jest.setup.ts", "__mocks__"]
}

View File

@@ -19,7 +19,7 @@ app = modal.App("formex-viewer", image=app_image)
)
@modal.asgi_app()
def fastapi_app():
import fastapi.staticfiles
import fastapi
from formex_viewer.server import app as formex_app
@@ -28,7 +28,7 @@ def fastapi_app():
@formex_app.get("/{path:path}")
async def frontend_handler(path: str):
fp = assets_path / path
if not fp.exists():
if not fp.exists() or not fp.is_file():
fp = assets_path / "index.html"
return fastapi.responses.FileResponse(fp)

View File

@@ -20,3 +20,8 @@ formex-viewer = "formex_viewer:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[dependency-groups]
dev = [
"pytest>=8.3.5",
]

View File

@@ -1,12 +1,16 @@
import html
import re
from typing import Optional, Union
import warnings
from dataclasses import dataclass
from typing import Literal, Optional, Union, cast
import lxml.etree
from lxml import etree as ET
from formex_viewer.main import Language
def text_content(el: lxml.etree.Element) -> str:
def text_content(el: ET._Element) -> str:
"""Get the text content of an XML element, including all child elements."""
def _iterate(el):
@@ -23,24 +27,130 @@ def text_content(el: lxml.etree.Element) -> str:
return "".join(_iterate(el))
@dataclass
class CrossReference:
target: Literal["article", "annex"]
text: str
id: str
paragraph: int | None = None
def extract_xrefs(el: ET._Element, language: Language) -> list[CrossReference]:
"""Extract cross-references from an XML element.
Args:
el: The XML element to extract cross-references from.
Returns:
A dictionary with cross-reference IDs as keys and their text content as values.
"""
crossrefs = []
text = text_content(el)
PATTERN_PARTS = {
Language.ENG: {
"article": r"(Art\.|Articles?)",
"annex": r"(Ann\.|Annex)",
"exclusion": r"(?! of(?! this))",
},
Language.DEU: {
"article": r"(Art\.|Artikels?)",
"annex": r"(Anhang)",
"exclusion": r"(?! von)",
},
}
if language not in PATTERN_PARTS:
warnings.warn(
f"Language '{language}' not supported for cross-reference extraction. Returning empty list."
)
return []
# Prevent zealous matching of references to other texts by using a negative lookahead
# Also, match only at word boundaries to prevent partial matches
parts = PATTERN_PARTS[language]
patterns = {
"article": rf"\b{parts["article"]}\s+(?P<art_num>\d+)(?:[(](?P<parag_num>\d+)[)])?(?:{parts["exclusion"]})",
"annex": rf"\b{parts["annex"]}\s+(?P<annex_num>[DILMVX]+)(?:{parts["exclusion"]})",
}
for key, pattern in patterns.items():
matches = re.finditer(pattern, text, flags=re.IGNORECASE)
for match in matches:
crossref_id = (
match.group("art_num") if key == "article" else match.group("annex_num")
)
parag_num = match.groupdict().get("parag_num")
if key not in ["article", "annex"]:
raise RuntimeError()
crossref_text = match.group(0)
crossrefs.append(
CrossReference(
target=key,
id=crossref_id,
paragraph=int(parag_num) if parag_num else None,
text=crossref_text,
)
)
return crossrefs
def extract_article(doc: ET._Element, article_id: int) -> ET._Element | None:
"""Extract a specific article from a Formex document.
Args:
doc: The XML document to extract from.
article_id: The article number.
Returns:
The extracted article element.
"""
# Use XPath to find the specific article
xpath = f".//ARTICLE[@IDENTIFIER='{article_id:03d}']"
return doc.xpath(xpath)[0] if doc.xpath(xpath) else None
def extract_paragraph(
doc: ET._Element, article_id: int, paragraph_id: int
) -> ET._Element | None:
"""Extract a specific paragraph from an article in a Formex document.
Args:
doc: The XML document to extract from.
article_id: The article number.
paragraph_id: The paragraph number.
Returns:
The extracted paragraph element.
"""
# Use XPath to find the specific paragraph
xpath = f".//PARAG[@IDENTIFIER='{article_id:03d}.{paragraph_id:03d}']"
return doc.xpath(xpath)[0] if doc.xpath(xpath) else None
class FormexArticleConverter:
"""Converts Formex XML <ARTICLE> elements to semantic HTML5."""
def __init__(self, namespace: Optional[str] = None):
def __init__(self, language: Language, namespace: Optional[str] = None):
"""
Initialize the converter.
Args:
language: Language object to determine the language for cross-reference extraction
namespace: Optional XML namespace to use when parsing elements
"""
self.ns = namespace
self.language = language
self.ns_prefix = f"{{{namespace}}}" if namespace else ""
def _get_tag(self, tag: str) -> str:
"""Get the tag name with namespace if available."""
return f"{self.ns_prefix}{tag}"
def _get_text(self, element: ET.Element) -> str:
def _get_text(self, element: ET._Element) -> str:
"""Get the text content of an element, including all nested text.
This uses lxml's text_content() method when available, falling back to
@@ -55,7 +165,7 @@ class FormexArticleConverter:
except AttributeError:
# Fall back to manual traversal if text_content() is not available
text = element.text or ""
for child in element:
for child in element.iterchildren(tag="*"):
text += self._get_text(child)
if child.tail:
text += child.tail
@@ -67,7 +177,16 @@ class FormexArticleConverter:
clean_id = re.sub(r"[^a-zA-Z0-9-]", "-", identifier)
return f"art-{clean_id}"
def _convert_btx(self, element: ET.Element) -> str:
def _replace_xref(self, text: str, xref: CrossReference) -> str:
"""Replace a cross-reference instance with semantic markup in the text."""
# Replace the cross-reference text with a link
text = text.replace(
xref.text,
f'<a href="" data-target="{xref.target}" data-id="{xref.id}" data-paragraph-id="{xref.paragraph or ''}" class="cross-ref">{xref.text}</a>',
)
return text
def _convert_btx(self, element: ET._Element) -> str:
"""
Convert basic text elements (t_btx, t_btx.seq) to HTML.
@@ -79,7 +198,15 @@ class FormexArticleConverter:
result = element.text or ""
for child in element:
is_title = element.tag in ("TI", "STI", "TI.ART", "STI.ART")
if not is_title and not element.getchildren():
# Cross-references should be treated at the deepest level
xrefs = extract_xrefs(element, self.language)
for xref in xrefs:
# Replace the cross-reference text with a link
result = self._replace_xref(result, xref)
for child in element.iterchildren(tag="*"):
child_tag = child.tag.replace(self.ns_prefix, "")
# Process common inline elements
@@ -176,11 +303,17 @@ class FormexArticleConverter:
result += self._convert_btx(child)
if child.tail:
result += child.tail
xrefs = extract_xrefs(child, self.language)
tail_text = child.tail
for xref in xrefs:
# Replace the cross-reference text with a link
tail_text = self._replace_xref(tail_text, xref)
result += tail_text
return result
def _convert_list(self, list_element: ET.Element) -> str:
def _convert_list(self, list_element: ET._Element) -> str:
"""Convert a Formex LIST element to HTML list items."""
result = ""
# Using lxml's xpath to get direct child ITEM elements
@@ -200,6 +333,12 @@ class FormexArticleConverter:
if no_p is not None and txt is not None:
num = self._get_text(no_p)
text = self._get_text(txt)
# Handle cross-references within the text
xrefs = extract_xrefs(txt, self.language)
for xref in xrefs:
text = self._replace_xref(text, xref)
item_content += f'<span class="item-number">{num}</span> {text}'
elif child_tag == "P":
# Regular paragraph
@@ -212,41 +351,40 @@ class FormexArticleConverter:
return result
def _convert_alinea(self, alinea: ET.Element) -> str:
def _convert_alinea(self, alinea: ET._Element) -> str:
"""Convert an ALINEA element to HTML."""
return f'<p class="alinea">{self._convert_btx(alinea)}</p>'
def _convert_parag(self, parag: ET.Element) -> str:
def _convert_parag(self, parag: ET._Element) -> str:
"""Convert a PARAG (paragraph) element to HTML."""
identifier = parag.get("IDENTIFIER", "")
parag_id = self._create_id(identifier) if identifier else ""
# Get the paragraph number using XPath
no_parag_elems = parag.xpath(f"./{self._get_tag('NO.PARAG')}")
parag_num = self._get_text(no_parag_elems[0]) if no_parag_elems else ""
# Process the alineas within the paragraph
content = ""
for alinea in parag.xpath(f"./{self._get_tag('ALINEA')}"):
content += self._convert_alinea(alinea)
# Process any comments
for comment in parag.xpath(f"./{self._get_tag('COMMENT')}"):
content += f'<div class="comment">{self._convert_btx(comment)}</div>'
# Process any quotations
for quot in parag.xpath(f"./{self._get_tag('QUOT.S')}"):
for child in parag.iterchildren(tag="*"):
child_tag = child.tag.replace(self.ns_prefix, "")
if child_tag == "ALINEA":
content += self._convert_alinea(child)
elif child_tag == "COMMENT":
content += f'<div class="comment">{self._convert_btx(child)}</div>'
elif child_tag == "QUOT.S":
content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
elif child_tag == "NO.PARAG":
content += (
f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
f'<span class="paragraph-number">{self._convert_btx(child)}</span>'
)
else:
raise RuntimeError(
f"Unexpected child element '{child_tag}' in PARAG: {text_content(child)}"
)
return f'<div class="paragraph" data-paragraph-id="{parag_id}"><span class="paragraph-number">{parag_num}</span>{content}</div>'
return f'<div class="paragraph" data-paragraph-id="{parag_id}">{content}</div>'
def _convert_subdiv(self, subdiv: ET.Element) -> str:
"""Convert a SUBDIV (subdivision) element to HTML."""
# Get the title using XPath
title_elems = subdiv.xpath(f"./{self._get_tag('TITLE')}")
def _convert_subdiv(self, subdiv: ET._Element) -> str:
"""Convert a SUBDIV (subdivision) element to HTML, preserving child order."""
# Get the title using XPath (should be the first TITLE child if present)
title = ""
title_elems = subdiv.xpath(f"./{self._get_tag('TITLE')}")
if title_elems:
title_elem = title_elems[0]
# Process TI (title) and STI (subtitle) elements
@@ -261,34 +399,30 @@ class FormexArticleConverter:
if sti_list:
title += f'<h5 class="subdivision-subtitle">{" ".join(sti_list)}</h5>'
# Process content: either paragraphs, alineas, or nested subdivisions
# Process all children in order, skipping TITLE (already handled)
content = ""
# Process paragraphs directly under this subdivision
for parag in subdiv.xpath(f"./{self._get_tag('PARAG')}"):
content += self._convert_parag(parag)
# Process alineas directly under this subdivision
for alinea in subdiv.xpath(f"./{self._get_tag('ALINEA')}"):
content += self._convert_alinea(alinea)
# Process comments directly under this subdivision
for comment in subdiv.xpath(f"./{self._get_tag('COMMENT')}"):
content += f'<div class="comment">{self._convert_btx(comment)}</div>'
# Process quotations directly under this subdivision
for quot in subdiv.xpath(f"./{self._get_tag('QUOT.S')}"):
content += (
f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
for child in subdiv.iterchildren(tag="*"):
child_tag = child.tag.replace(self.ns_prefix, "")
if child_tag == "TITLE":
continue # already handled
elif child_tag == "PARAG":
content += self._convert_parag(child)
elif child_tag == "ALINEA":
content += self._convert_alinea(child)
elif child_tag == "COMMENT":
content += f'<div class="comment">{self._convert_btx(child)}</div>'
elif child_tag == "QUOT.S":
content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
elif child_tag == "SUBDIV":
content += self._convert_subdiv(child)
else:
raise RuntimeError(
f"Unexpected child element '{child_tag}' in SUBDIV: {text_content(child)}"
)
# Process nested subdivisions directly under this subdivision
for sub in subdiv.xpath(f"./{self._get_tag('SUBDIV')}"):
content += self._convert_subdiv(sub)
return f'<section class="subdivision">{title}{content}</section>'
def convert_article(self, article: Union[str, ET.Element]) -> str:
def convert_article(self, article: Union[str, ET._Element]) -> str:
"""
Convert a Formex <ARTICLE> element to HTML5.
@@ -302,7 +436,9 @@ class FormexArticleConverter:
if isinstance(article, str):
try:
parser = ET.XMLParser(remove_blank_text=True)
article = ET.fromstring(article.encode("utf-8"), parser)
article = cast(
ET._Element, ET.fromstring(article.encode("utf-8"), parser)
)
except ET.XMLSyntaxError as e:
return f"<p>Error parsing XML: {e}</p>"
@@ -325,43 +461,36 @@ class FormexArticleConverter:
article_subtitle = self._convert_btx(sti_art) if sti_art is not None else ""
# Build the header section
if article_title and article_subtitle:
header = f'<header><h3 class="article-title">{article_title}</h3>'
if article_subtitle:
header += f'<h4 class="article-subtitle">{article_subtitle}</h4>'
header += "</header>"
else:
header = ""
# Process the content based on what's present
content = ""
# Check if we have alineas directly under the article
alineas = article.xpath(f"./{self._get_tag('ALINEA')}")
if alineas:
for alinea in alineas:
content += self._convert_alinea(alinea)
# Check if we have paragraphs directly under the article
parags = article.xpath(f"./{self._get_tag('PARAG')}")
if parags:
for parag in parags:
content += self._convert_parag(parag)
# Check for comments directly under the article
comments = article.xpath(f"./{self._get_tag('COMMENT')}")
if comments:
for comment in comments:
content += f'<div class="comment">{self._convert_btx(comment)}</div>'
# Check for quotations directly under the article
quots = article.xpath(f"./{self._get_tag('QUOT.S')}")
if quots:
for quot in quots:
content += f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
# Check for subdivisions directly under the article
subdivs = article.xpath(f"./{self._get_tag('SUBDIV')}")
if subdivs:
for subdiv in subdivs:
content += self._convert_subdiv(subdiv)
# Process all child elements (except TITLE) in tree order
for child in article.iterchildren(tag="*"):
child_tag = child.tag.replace(self.ns_prefix, "")
if child_tag in ["TI.ART", "STI.ART"]:
continue # already handled
elif child_tag == "ALINEA":
content += self._convert_alinea(child)
elif child_tag == "PARAG":
content += self._convert_parag(child)
elif child_tag == "COMMENT":
content += f'<div class="comment">{self._convert_btx(child)}</div>'
elif child_tag == "QUOT.S":
content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
elif child_tag == "SUBDIV":
content += self._convert_subdiv(child)
else:
raise RuntimeError(
f"Unexpected child element '{child_tag}' in ARTICLE: {text_content(child)}"
)
# Assemble the complete article
return f'<article id="{article_id}" class="formex-article">{header}<div class="article-content">{content}</div></article>'

View File

@@ -2,7 +2,11 @@ import lxml.etree as ET
from fastapi import APIRouter, FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware
from formex_viewer.formex4 import FormexArticleConverter
from formex_viewer.formex4 import (
FormexArticleConverter,
extract_article,
extract_paragraph,
)
from formex_viewer.main import (
CellarClient,
CellarIdentifier,
@@ -68,9 +72,16 @@ def article_ids(celex_id: str, language: Language = Language.ENG):
@api_router.get("/{celex_id}/toc/{language}")
def toc(celex_id: str, language: Language = Language.ENG):
def _extract_text(root: ET.Element, tag: str) -> str:
"""
Extract text from the given tag in the XML element.
"""
text = root.xpath(f"{tag}//text()")
return "".join(text) if text else ""
def _handle_division(division: ET.Element, level: int):
title = ti_el[0] if (ti_el := division.xpath("TITLE/TI//text()")) else ""
subtitle = sti_el[0] if (sti_el := division.xpath("TITLE/STI//text()")) else ""
title = _extract_text(division, "TITLE/TI")
subtitle = _extract_text(division, "TITLE/STI")
subdivisions = []
for subdivision in division.xpath("DIVISION") or []:
@@ -81,10 +92,9 @@ def toc(celex_id: str, language: Language = Language.ENG):
art_id = article.get("IDENTIFIER")
if not art_id:
continue
art_title = ti_el[0] if (ti_el := article.xpath("TI.ART//text()")) else ""
art_subtitle = (
sti_el[0] if (sti_el := article.xpath("STI.ART//text()")) else ""
)
art_title = _extract_text(article, "TI.ART")
art_subtitle = _extract_text(article, "STI.ART")
articles.append(
{
"id": int(art_id.lstrip("0")),
@@ -115,19 +125,49 @@ def toc(celex_id: str, language: Language = Language.ENG):
@api_router.get("/{celex_id}/articles/{article_id}/{language}")
def article(celex_id: str, article_id: int, language: Language = Language.ENG):
def article(
celex_id: str,
article_id: int,
language: Language = Language.ENG,
):
"""
Fetch an article from the server.
"""
xml = _get_fmx4_data(celex_id, language)
article = extract_article(xml, article_id=article_id)
article_xpath = "//ARTICLE"
articles = xml.xpath(article_xpath)
for article in articles:
num = article.get("IDENTIFIER").lstrip("0")
if num == str(article_id):
if article is None:
return Response(
FormexArticleConverter().convert_article(article),
"Article not found",
status_code=404,
)
return Response(
FormexArticleConverter(language=language).convert_article(article),
media_type="text/html",
)
@api_router.get("/{celex_id}/articles/{article_id}/{parag_id}/{language}")
def paragraph(
celex_id: str,
article_id: int,
parag_id: int,
language: Language = Language.ENG,
):
"""
Fetch a paragraph within an article from the server.
"""
xml = _get_fmx4_data(celex_id, language)
parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id)
if parag is None:
return Response(
"Paragraph not found",
status_code=404,
)
return Response(
FormexArticleConverter(language=language)._convert_parag(parag),
media_type="text/html",
)

52
tests/test_parser.py Normal file
View File

@@ -0,0 +1,52 @@
import pytest
from lxml import etree as ET
from formex_viewer.formex4 import FormexArticleConverter
from formex_viewer.main import Language
@pytest.fixture
def converter():
return FormexArticleConverter(language=Language.ENG)
def test_convert_tree_order(converter):
"""Test that the order of HTML blocks in the converted article matches the order of elements in the XML tree."""
xml = """
<ARTICLE>
<SUBDIV>
<TITLE>
<TI>Subdivision Title</TI>
<STI>Subdivision Subtitle</STI>
</TITLE>
<PARAG IDENTIFIER="001.001">
<NO.PARAG>1</NO.PARAG>
<ALINEA>Paragraph 1 text.</ALINEA>
</PARAG>
<COMMENT>Comment text.</COMMENT>
<ALINEA>Alinea text.</ALINEA>
<QUOT.S>Quotation text.</QUOT.S>
<SUBDIV>
<TITLE>
<TI>Nested Subdivision</TI>
</TITLE>
<ALINEA>Nested alinea.</ALINEA>
</SUBDIV>
</SUBDIV>
</ARTICLE>
"""
parser = ET.XMLParser(remove_blank_text=True)
el = ET.fromstring(xml, parser)
html = converter.convert_article(el)
# Check that the order of HTML blocks matches the order of elements in the XML tree
idx_title = html.index("Subdivision Title")
idx_parag = html.index('class="paragraph"')
idx_comment = html.index("Comment text.")
idx_alinea = html.index("Alinea text.")
idx_quot = html.index("Quotation text.")
idx_nested = html.index("Nested Subdivision")
# The order in the XML: title, parag, alinea, comment, quot, nested subdiv
assert idx_title < idx_parag < idx_comment < idx_alinea < idx_quot < idx_nested

764
uv.lock generated

File diff suppressed because it is too large Load Diff