Compare commits

...

22 Commits

Author SHA1 Message Date
Adrian Rumpold
58bd1160c1 fix: Improve rendering of TOC panel 2025-05-20 12:08:14 +02:00
Adrian Rumpold
debaf567ea feat: Add additional example legislation 2025-05-20 12:07:58 +02:00
Adrian Rumpold
56d271d0df fix: Correctly convert single paragraph in FastAPI 2025-05-20 09:14:47 +02:00
Adrian Rumpold
f0d4214d17 fix: Preserve XML tree order in Formex parser 2025-05-20 09:05:32 +02:00
Adrian Rumpold
56b5e3e3a4 fix: Type hints in Formex parser 2025-05-20 08:37:16 +02:00
Adrian Rumpold
1d467c827a Fix panels to equal size 2025-05-07 13:00:45 +02:00
Adrian Rumpold
1aa2f541dc Keep history 2025-04-30 12:17:43 +02:00
Adrian Rumpold
2886989835 Fix history state manipulation 2025-04-30 12:15:44 +02:00
Adrian Rumpold
7dd913df7b Paragraph-level links, preview tooltips 2025-04-30 12:04:38 +02:00
Adrian Rumpold
ea7885eeee Update README 2025-04-30 08:32:11 +02:00
Adrian Rumpold
894d4f50ef Add high-level architecture to README 2025-04-29 14:12:32 +02:00
Adrian Rumpold
8aced6c67a Fix cross-ref hyperlink behavior 2025-04-29 10:15:00 +02:00
Adrian Rumpold
860c67b00b Fix xref links 2025-04-29 09:41:51 +02:00
Adrian Rumpold
04f46e3893 Article cross-reference linking 2025-04-29 09:34:14 +02:00
Adrian Rumpold
9597ccc3bd Fix Panel test setup 2025-04-29 07:37:15 +02:00
Adrian Rumpold
f113c72c10 Switch to Axios, Panel component test 2025-04-29 07:33:01 +02:00
Adrian Rumpold
3e5d465356 Tests and fixes for ArticleSelector component 2025-04-25 12:42:29 +02:00
Adrian Rumpold
727622755f More component tests 2025-04-25 11:31:31 +02:00
Adrian Rumpold
b281059218 Add test for the language switcher 2025-04-25 11:14:28 +02:00
Adrian Rumpold
00c32f72c2 Fix static asset serving in Modal app 2025-04-25 09:24:46 +02:00
Adrian Rumpold
abb415c380 Improved article selector with titles and structure 2025-04-25 09:18:53 +02:00
Adrian Rumpold
f21158c6c0 Correctly extract TOC text entries
The previous code could not correctly handle nested XML elements in the TOC text entries.
2025-04-25 09:18:23 +02:00
43 changed files with 6128 additions and 610 deletions

View File

@@ -0,0 +1,36 @@
## High-level architecture
```mermaid
flowchart TD
Cellar[Cellar API]
Frontend[React Frontend]
subgraph "FastAPI backend"
client[Cellar Client]
conv[Formex -> HTML renderer]
FastAPI[REST API]
end
Cellar --> meta@{ shape: docs, label: "XML/RDF Metadata"}
Cellar --> docs@{ shape: docs, label: "Formex 4 content"}
meta --> client
docs --> client
client -- Formex --> FastAPI
client -- Formex --> conv
conv -- HTML --> FastAPI
FastAPI -- TOC, HTML article text --> Frontend
```
## Resources
- Formex 4
- [XML schema](https://op.europa.eu/documents/3938058/5910419/formex_manual_on_screen_version.html/)
- [Physical specifications](https://op.europa.eu/documents/3938058/5896514/physical-specifications/)
- [Cellar](https://op.europa.eu/en/web/cellar/home)
- [Publications API](https://op.europa.eu/en/web/cellar/cellar-data/publications)
- [Metadata REST API](https://op.europa.eu/en/web/cellar/cellar-data/metadata/metadata-notices)
- [Metadata SPARQL API](https://op.europa.eu/en/web/cellar/cellar-data/metadata/knowledge-graph)

View File

@@ -0,0 +1,64 @@
import { act } from "@testing-library/react";
import type * as ZustandExportedTypes from "zustand";
export * from "zustand";
const { create: actualCreate, createStore: actualCreateStore } =
jest.requireActual<typeof ZustandExportedTypes>("zustand");
// a variable to hold reset functions for all stores declared in the app
export const storeResetFns = new Set<() => void>();
const createUncurried = <T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
const store = actualCreate(stateCreator);
const initialState = store.getInitialState();
storeResetFns.add(() => {
store.setState(initialState, true);
});
return store;
};
// when creating a store, we get its initial state, create a reset function and add it in the set
export const create = (<T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
console.log("zustand create mock");
// to support curried version of create
return typeof stateCreator === "function"
? createUncurried(stateCreator)
: createUncurried;
}) as typeof ZustandExportedTypes.create;
const createStoreUncurried = <T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
const store = actualCreateStore(stateCreator);
const initialState = store.getInitialState();
storeResetFns.add(() => {
store.setState(initialState, true);
});
return store;
};
// when creating a store, we get its initial state, create a reset function and add it in the set
export const createStore = (<T>(
stateCreator: ZustandExportedTypes.StateCreator<T>
) => {
console.log("zustand createStore mock");
// to support curried version of createStore
return typeof stateCreator === "function"
? createStoreUncurried(stateCreator)
: createStoreUncurried;
}) as typeof ZustandExportedTypes.createStore;
// reset all stores after each test run
afterEach(() => {
act(() => {
storeResetFns.forEach((resetFn) => {
resetFn();
});
});
});

View File

@@ -1,28 +1,33 @@
import js from '@eslint/js' import js from "@eslint/js";
import globals from 'globals' import pluginQuery from "@tanstack/eslint-plugin-query";
import reactHooks from 'eslint-plugin-react-hooks' import reactHooks from "eslint-plugin-react-hooks";
import reactRefresh from 'eslint-plugin-react-refresh' import reactRefresh from "eslint-plugin-react-refresh";
import tseslint from 'typescript-eslint' import globals from "globals";
import tseslint from "typescript-eslint";
export default tseslint.config( export default tseslint.config(
{ ignores: ['dist'] }, { ignores: ["dist"] },
{ {
extends: [js.configs.recommended, ...tseslint.configs.recommended], extends: [
files: ['**/*.{ts,tsx}'], js.configs.recommended,
...tseslint.configs.recommended,
...pluginQuery.configs["flat/recommended"],
],
files: ["**/*.{ts,tsx}"],
languageOptions: { languageOptions: {
ecmaVersion: 2020, ecmaVersion: 2020,
globals: globals.browser, globals: globals.browser,
}, },
plugins: { plugins: {
'react-hooks': reactHooks, "react-hooks": reactHooks,
'react-refresh': reactRefresh, "react-refresh": reactRefresh,
}, },
rules: { rules: {
...reactHooks.configs.recommended.rules, ...reactHooks.configs.recommended.rules,
'react-refresh/only-export-components': [ "react-refresh/only-export-components": [
'warn', "warn",
{ allowConstantExport: true }, { allowConstantExport: true },
], ],
}, },
}, }
) );

12
frontend/jest.config.js Normal file
View File

@@ -0,0 +1,12 @@
/** @type {import('ts-jest').JestConfigWithTsJest} **/
export default {
preset: "ts-jest",
testEnvironment: "jsdom",
transform: {
"^.+\\.tsx?$": ["ts-jest", { tsconfig: "tsconfig.app.json" }],
},
moduleNameMapper: {
"\\.(css|less|scss|sss|styl)$": "<rootDir>/node_modules/jest-css-modules",
},
setupFilesAfterEnv: ["<rootDir>/jest.setup.ts"],
};

1
frontend/jest.setup.ts Normal file
View File

@@ -0,0 +1 @@
import "@testing-library/jest-dom";

File diff suppressed because it is too large Load Diff

View File

@@ -7,11 +7,14 @@
"dev": "vite", "dev": "vite",
"build": "tsc -b && vite build", "build": "tsc -b && vite build",
"lint": "eslint .", "lint": "eslint .",
"preview": "vite preview" "preview": "vite preview",
"test": "jest"
}, },
"dependencies": { "dependencies": {
"@floating-ui/react": "^0.27.8",
"@tanstack/react-query": "^5.74.4", "@tanstack/react-query": "^5.74.4",
"@tanstack/react-query-devtools": "^5.74.6", "@tanstack/react-query-devtools": "^5.74.6",
"axios": "^1.9.0",
"react": "^19.0.0", "react": "^19.0.0",
"react-dom": "^19.0.0", "react-dom": "^19.0.0",
"react-router-dom": "^7.5.1", "react-router-dom": "^7.5.1",
@@ -19,14 +22,25 @@
}, },
"devDependencies": { "devDependencies": {
"@eslint/js": "^9.22.0", "@eslint/js": "^9.22.0",
"@jest/globals": "^29.7.0",
"@tanstack/eslint-plugin-query": "^5.73.3", "@tanstack/eslint-plugin-query": "^5.73.3",
"@types/react": "^19.0.10", "@testing-library/dom": "^10.4.0",
"@types/react-dom": "^19.0.4", "@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/jest": "^29.5.14",
"@types/react": "^19.1.2",
"@types/react-dom": "^19.1.2",
"@vitejs/plugin-react": "^4.3.4", "@vitejs/plugin-react": "^4.3.4",
"eslint": "^9.22.0", "eslint": "^9.22.0",
"eslint-plugin-react-hooks": "^5.2.0", "eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.19", "eslint-plugin-react-refresh": "^0.4.19",
"globals": "^16.0.0", "globals": "^16.0.0",
"jest": "^29.7.0",
"jest-css-modules": "^2.1.0",
"jest-environment-jsdom": "^29.7.0",
"ts-jest": "^29.3.2",
"ts-node": "^10.9.2",
"typescript": "~5.7.2", "typescript": "~5.7.2",
"typescript-eslint": "^8.26.1", "typescript-eslint": "^8.26.1",
"typescript-plugin-css-modules": "^5.1.0", "typescript-plugin-css-modules": "^5.1.0",

View File

@@ -1,64 +1,42 @@
import { useQueries } from "@tanstack/react-query";
import { getArticleIds, getToc } from "./lib/api";
import { Language } from "./lib/types"; import { Language } from "./lib/types";
import ArticleSelector from "./components/ArticleSelector/ArticleSelector"; import ArticleSelector from "./components/ArticleSelector/ArticleSelector";
import Panel from "./components/Panel/Panel"; import Panel from "./components/Panel/Panel";
import TOC from "./components/TOC/TOC"; import TOC from "./components/TOC/TOC";
import useNavState from "./store/navStore";
import useUIStore from "./store/uiStore"; import useUIStore from "./store/uiStore";
import styles from "./App.module.css"; import styles from "./App.module.css";
import CelexSelector from "./components/CelexSelector/CelexSelector"; import CelexSelector from "./components/CelexSelector/CelexSelector";
import { useTOC } from "./hooks/toc";
function App() { function App() {
const { numPanels, addPanel } = useUIStore(); const numPanels = useUIStore((state) => state.numPanels);
const { celexId, articleId } = useNavState(); const addPanel = useUIStore((state) => state.addPanel);
const { data: toc, isPending, error } = useTOC();
const results = useQueries({
queries: [
{
queryKey: ["articleIds", celexId],
queryFn: () => getArticleIds(celexId!),
enabled: !!celexId,
},
{
queryKey: ["toc", celexId],
queryFn: () => getToc(celexId!, Language.ENG),
enabled: !!celexId,
},
],
});
const isPending = results.some((result) => result.isPending);
const error = results.find((result) => result.isError);
if (isPending) { if (isPending) {
return <div>Loading...</div>; return <div>Loading...</div>;
} }
if (error) { if (error) {
return <div>Error: {error.error?.message}</div>; return <div>Error: {error.message}</div>;
} }
return ( return (
<div className={styles.App}> <div className={styles.App}>
<div className={styles.controls}> <div className={styles.controls}>
<CelexSelector /> <CelexSelector />
<ArticleSelector articleIds={results[0].data!} /> <ArticleSelector toc={toc} />
<button onClick={addPanel}>Add Panel</button> <button onClick={addPanel}>Add Panel</button>
</div> </div>
<div className={styles.panelContainer}> <div className={styles.panelContainer}>
<TOC toc={results[1].data!} /> <TOC toc={toc} />
{Array.from({ length: numPanels }, (_, index) => ( {Array.from({ length: numPanels }, (_, index) => (
<Panel <Panel
key={index} key={index}
celexId={celexId!}
language={ language={
Object.values(Language)[index % Object.values(Language).length] Object.values(Language)[index % Object.values(Language).length]
} }
articleId={articleId!}
/> />
))} ))}
</div> </div>

View File

@@ -0,0 +1,4 @@
.articleSelector {
width: 40ch;
text-overflow: ellipsis;
}

View File

@@ -0,0 +1,60 @@
import { render } from "@testing-library/react";
import type { Division } from "../../lib/types";
import ArticleSelector from "./ArticleSelector";
describe("ArticleSelector", () => {
const mockDivision: Division = {
type: "division",
title: "Chapter 1",
subtitle: "Introduction",
level: 0,
content: [
{
type: "division",
title: "Section 1.1",
subtitle: "Overview",
level: 2,
content: [
{
type: "article",
id: 1,
title: "Article 1",
subtitle: "Details",
},
{
type: "article",
id: 2,
title: "Article 2",
subtitle: "Summary",
},
],
},
],
};
test("renders a top-level division as an optgroup", () => {
const { getByRole } = render(<ArticleSelector toc={[mockDivision]} />);
const optgroup = getByRole("group", {
name: "Chapter 1: Introduction",
});
expect(optgroup).toBeInTheDocument();
});
test("renders nested divisions as options", () => {
const { getByText } = render(<ArticleSelector toc={[mockDivision]} />);
const option1 = getByText("Article 1: Details");
const option2 = getByText("Article 2: Summary");
expect(option1).toBeInTheDocument();
expect(option2).toBeInTheDocument();
});
test("flattens nested divisions correctly", () => {
const { container } = render(<ArticleSelector toc={[mockDivision]} />);
const options = container.querySelectorAll("option");
expect(options).toHaveLength(2); // Two options rendered
});
});

View File

@@ -1,33 +1,57 @@
import useNavState from "../../store/navStore"; import { Fragment, JSX } from "react";
import type { Division } from "../../lib/types";
import useNavStore from "../../store/navStore";
import styles from "./ArticleSelector.module.css";
type ArticleSelectorProps = { type ArticleSelectorProps = {
articleIds: number[]; toc: Division[];
}; };
function ArticleSelector({ articleIds }: ArticleSelectorProps) { /**
const { articleId, setArticleId } = useNavState(); * Renders a division and its contents as a nested structure of optgroups and options.
* @param {Division} div - The division to render.
* @returns {JSX.Element} The rendered division, suitable for use inside a `select` tag.
*/
function renderDivision(div: Division): JSX.Element {
const contents = div.content.map((c) => {
if (c.type === "division") {
return renderDivision(c);
} else {
const title = `${c.title}: ${c.subtitle}`;
return (
<option key={title} value={c.id}>
{title}
</option>
);
}
});
const title = `${div.title}: ${div.subtitle}`;
if (div.level === 0) {
return (
// For top-level divisions, we can use optgroup
<optgroup key={title} label={title}>
{contents}
</optgroup>
);
} else {
// HTML does not support nested optgroups, so we need to flatten the structure
return <Fragment key={title}>{contents}</Fragment>;
}
}
function ArticleSelector({ toc }: ArticleSelectorProps) {
const articleId = useNavStore.use.articleId();
const setArticleId = useNavStore.use.setArticleId();
return ( return (
<> <select
{articleId && articleId > 1 && ( value={articleId || undefined}
<button onClick={() => setArticleId(articleId - 1)}>prev</button> className={styles.articleSelector}
)} onChange={(e) => setArticleId(parseInt(e.target.value))}
<select >
value={articleId || undefined} {toc.map(renderDivision)}
onChange={(e) => { </select>
const id = parseInt(e.currentTarget.value);
setArticleId(id);
}}
>
{articleIds.map((id) => (
<option key={id} value={id}>
Article {id}
</option>
))}
</select>
{articleId && articleId < articleIds[articleIds.length - 1] && (
<button onClick={() => setArticleId(articleId + 1)}>next</button>
)}
</>
); );
} }

View File

@@ -0,0 +1,40 @@
import { fireEvent, render } from "@testing-library/react";
import { examples } from "../../lib/examples";
import useNavStore from "../../store/navStore";
import CelexSelector from "./CelexSelector";
describe("CelexSelector", () => {
it("renders the dropdown with options", () => {
const { getByLabelText, getAllByRole, getByRole } = render(
<CelexSelector />
);
expect(getByLabelText("Select example:")).toBeInTheDocument();
expect(getByRole("combobox")).toBeInTheDocument();
const [def, ...options] = getAllByRole("option");
// First option is the disabled placeholder option
expect(def).toHaveValue("");
expect(def).toHaveTextContent("Select an example");
expect(def).toBeDisabled();
expect(options).toHaveLength(examples.length);
for (const i in examples) {
expect(options[i]).toHaveValue(examples[i].id);
expect(options[i]).toHaveTextContent(examples[i].name);
}
});
it("calls setCelexId and setArticleId on selection change", async () => {
const celexId = examples[2].id;
const { getByRole } = render(<CelexSelector />);
fireEvent.change(getByRole("combobox"), {
target: { value: celexId },
});
expect(useNavStore.getState().celexId).toEqual(celexId);
expect(useNavStore.getState().articleId).toEqual(1);
});
});

View File

@@ -1,13 +1,10 @@
import useNavState from "../../store/navStore"; import { examples } from "../../lib/examples";
import useNavStore from "../../store/navStore";
const examples = [
{ name: "GDPR", id: "32016R0679" },
{ name: "AI Act", id: "32024R1689" },
{ name: "Cyber Resilience Act", id: "32024R2847" },
];
function CelexSelector() { function CelexSelector() {
const { celexId, setCelexId, setArticleId } = useNavState(); const celexId = useNavStore.use.celexId();
const setCelexId = useNavStore.use.setCelexId();
const setArticleId = useNavStore.use.setArticleId();
return ( return (
<div> <div>
@@ -20,6 +17,9 @@ function CelexSelector() {
setArticleId(1); setArticleId(1);
}} }}
> >
<option value="" disabled>
Select an example
</option>
{examples.map((example) => ( {examples.map((example) => (
<option key={example.id} value={example.id}> <option key={example.id} value={example.id}>
{example.name} {example.name}

View File

@@ -0,0 +1,44 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { Language } from "../../lib/types";
import LanguageSwitcher from "./LanguageSwitcher";
describe("LanguageSwitcher", () => {
const mockOnChange = jest.fn();
const renderSwitcher = (
onChange = mockOnChange,
defaultLang = Language.ENG
) => {
render(<LanguageSwitcher defaultLang={defaultLang} onChange={onChange} />);
};
beforeEach(() => {
jest.clearAllMocks();
});
test("renders with correct default language", async () => {
renderSwitcher();
expect(screen.getByRole("combobox")).toHaveValue(Language.ENG);
});
test("calls onChange handler when a new language is selected", async () => {
renderSwitcher();
await userEvent.selectOptions(screen.getByRole("combobox"), Language.ESP);
expect(mockOnChange).toHaveBeenCalledWith(Language.ESP);
});
test("renders all language options correctly", () => {
renderSwitcher();
const options = screen.getAllByRole("option");
const languageValues = Object.values(Language);
expect(options).toHaveLength(languageValues.length);
languageValues.forEach((lang, index) => {
expect(options[index]).toHaveValue(lang);
expect(options[index]).toHaveTextContent(lang.toUpperCase());
});
});
});

View File

@@ -10,6 +10,7 @@ function LanguageSwitcher({
}) { }) {
return ( return (
<select <select
data-testid="language-switcher"
defaultValue={defaultLang} defaultValue={defaultLang}
onChange={(ev) => onChange(ev.currentTarget.value as Language)} onChange={(ev) => onChange(ev.currentTarget.value as Language)}
className={styles.languageSwitcher} className={styles.languageSwitcher}

View File

@@ -1,5 +1,5 @@
.panel { .panel {
flex: 1 auto; flex: auto;
padding: 1rem; padding: 1rem;
border-radius: 8px; border-radius: 8px;
border: 1px solid #ccc; border: 1px solid #ccc;

View File

@@ -0,0 +1,114 @@
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { fireEvent, render } from "@testing-library/react";
import React from "react";
import { getArticle } from "../../lib/api";
import { Language } from "../../lib/types";
import useNavStore from "../../store/navStore";
import useUIStore from "../../store/uiStore";
import Panel from "./Panel";
jest.mock("../../store/uiStore");
jest.mock("../../store/navStore");
jest.mock("../../lib/api");
jest.mock("../../constants", () =>
Promise.resolve({
API_URL: "http://localhost:8000/api", // Mock the API_URL to a local server for testing
})
);
const queryClient = new QueryClient({
defaultOptions: {
queries: {
retry: false,
},
},
});
const wrapper = ({ children }: { children: React.ReactNode }) => (
<QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
);
describe("Panel Component", () => {
const mockSetSelectedParagraphId = jest.fn();
const mockUseUIStore = {
selectedParagraphId: null,
setSelectedParagraphId: mockSetSelectedParagraphId,
};
const mockNavState = {
celexId: "123",
articleId: 1,
};
beforeEach(() => {
jest.mocked(useNavStore).mockReturnValue(mockNavState);
jest.mocked(useUIStore).mockReturnValue(mockUseUIStore);
});
afterEach(() => {
jest.clearAllMocks();
queryClient.clear();
});
test("renders loading state", () => {
(getArticle as jest.Mock).mockReturnValue(new Promise(() => {}));
const { getByText } = render(<Panel />, { wrapper });
expect(getByText("Loading...")).toBeInTheDocument();
});
test("renders error state", async () => {
(getArticle as jest.Mock).mockRejectedValue(new Error("Failed to fetch"));
const { findByText } = render(<Panel />, { wrapper });
expect(
await findByText("An error has occurred: Failed to fetch")
).toBeInTheDocument();
});
test("renders article content", async () => {
const mockData = `<div class='paragraph' data-paragraph-id='1'>Test Content</div>`;
jest.mocked(getArticle).mockResolvedValue(mockData);
const result = render(<Panel />, { wrapper });
expect(await result.findByText("Test Content")).toBeInTheDocument();
});
test("highlights a paragraph on click", async () => {
const mockData = `
<div class='paragraph' data-paragraph-id='1'>Paragraph 1</div>
<div class='paragraph' data-paragraph-id='2'>Paragraph 2</div>
`;
(getArticle as jest.Mock).mockResolvedValue(mockData);
const result = render(<Panel />, { wrapper });
const paragraph1 = await result.findByText("Paragraph 1");
const paragraph2 = await result.findByText("Paragraph 2");
fireEvent.click(paragraph1);
expect(paragraph1.classList.contains("highlight")).toBe(true);
expect(paragraph2.classList.contains("highlight")).toBe(false);
expect(mockSetSelectedParagraphId).toHaveBeenCalledWith("1");
fireEvent.click(paragraph2);
expect(paragraph1.classList.contains("highlight")).toBe(false);
expect(paragraph2.classList.contains("highlight")).toBe(true);
expect(mockSetSelectedParagraphId).toHaveBeenCalledWith("2");
});
test("renders LanguageSwitcher and updates language", async () => {
jest
.mocked(getArticle)
.mockResolvedValue(
"<div class='paragraph' data-paragraph-id='1'>Test Content</div>"
);
const result = render(<Panel language={Language.FRA} />, { wrapper });
const languageSwitcher = await result.findByRole("combobox");
expect(languageSwitcher).toBeInTheDocument();
fireEvent.change(languageSwitcher, { target: { value: Language.ENG } });
expect(jest.mocked(getArticle)).toHaveBeenCalledWith(
"123",
1,
Language.ENG
);
});
});

View File

@@ -1,35 +1,70 @@
import { useQuery } from "@tanstack/react-query";
import { useEffect, useRef, useState } from "react"; import { useEffect, useRef, useState } from "react";
import { getArticle } from "../../lib/api";
import { Language } from "../../lib/types"; import { Language } from "../../lib/types";
import useUIStore from "../../store/uiStore"; import useUIStore from "../../store/uiStore";
import LanguageSwitcher from "../LanguageSwitcher/LanguageSwitcher"; import LanguageSwitcher from "../LanguageSwitcher/LanguageSwitcher";
import { useArticle } from "../../hooks/useArticle";
import useNavStore from "../../store/navStore";
import "../../styles/PanelContent.css"; import "../../styles/PanelContent.css";
import ParagraphPreview from "../ParagraphPreview/ParagraphPreview";
import { Tooltip, TooltipContent } from "../Tooltip/Tooltip";
import styles from "./Panel.module.css"; import styles from "./Panel.module.css";
type PanelProps = { type PanelProps = {
celexId: string;
language?: Language; language?: Language;
articleId: number;
}; };
function Panel({ celexId, language, articleId }: PanelProps) { function Panel({ language }: PanelProps) {
const { selectedParagraphId, setSelectedParagraphId } = useUIStore(); const { selectedParagraphId, setSelectedParagraphId } = useUIStore();
const [lang, setLang] = useState(language || Language.ENG); const [lang, setLang] = useState(language || Language.ENG);
const articleRef = useRef<HTMLDivElement>(null); const articleRef = useRef<HTMLDivElement>(null);
const { data, isPending, error } = useQuery({ const { articleId, celexId, setArticleId } = useNavStore();
queryKey: ["article", celexId, articleId, lang], const { data, isPending, error } = useArticle(celexId, articleId, lang);
queryFn: () => getArticle(celexId, articleId, lang),
enabled: !!celexId && !!articleId, const [hoverArticleId, setHoverArticleId] = useState<number | null>(null);
}); const [hoverParagraphId, setHoverParagraphId] = useState<number | null>(null);
const [isTooltipOpen, setIsTooltipOpen] = useState(false);
useEffect(() => { useEffect(() => {
const articleElement = articleRef.current; const articleElement = articleRef.current;
if (!articleElement) return; if (!articleElement) return;
// Replace cross-reference links with page navigation
const crossRefs = articleElement.querySelectorAll(
"a.cross-ref"
) as NodeListOf<HTMLAnchorElement>;
crossRefs.forEach((link) => {
const target = link.getAttribute("data-target");
const targetId = link.getAttribute("data-id");
const paragraphId = link.getAttribute("data-paragraph-id");
if (target && targetId) {
if (target === "article") {
// Replace link for easier copying, but navigate in-place to maintain UI state
link.setAttribute("href", `/${celexId}/articles/${targetId}`);
link.onclick = () => {
setArticleId(parseInt(targetId));
return false;
};
if (paragraphId) {
link.onmouseover = () => {
setHoverArticleId(parseInt(targetId));
setHoverParagraphId(parseInt(paragraphId));
setIsTooltipOpen(true);
};
link.onmouseout = () => {
setIsTooltipOpen(false);
};
}
}
} else {
console.warn("No target or ID found for link:", link);
}
});
const paragraphs = articleElement.querySelectorAll(".paragraph"); const paragraphs = articleElement.querySelectorAll(".paragraph");
// Highlight the selected paragraph // Highlight the selected paragraph
@@ -66,11 +101,15 @@ function Panel({ celexId, language, articleId }: PanelProps) {
// Cleanup event listeners // Cleanup event listeners
return () => { return () => {
console.log("Cleaning up event listeners"); console.log("Cleaning up event listeners");
// crossRefs.forEach((link) => {
// link.onmouseover = null;
// link.onmouseout = null;
// });
paragraphs.forEach((element) => { paragraphs.forEach((element) => {
element.removeEventListener("click", handleClick(element)); element.removeEventListener("click", handleClick(element));
}); });
}; };
}, [articleRef, data, selectedParagraphId, setSelectedParagraphId]); });
if (isPending) return "Loading..."; if (isPending) return "Loading...";
if (error) return "An error has occurred: " + error.message; if (error) return "An error has occurred: " + error.message;
@@ -81,10 +120,24 @@ function Panel({ celexId, language, articleId }: PanelProps) {
defaultLang={lang} defaultLang={lang}
onChange={setLang} onChange={setLang}
></LanguageSwitcher> ></LanguageSwitcher>
<Tooltip
open={isTooltipOpen}
placement="right-start"
onOpenChange={setIsTooltipOpen}
>
<TooltipContent>
<ParagraphPreview
celexId={celexId!}
articleId={hoverArticleId!}
paragraphId={hoverParagraphId!}
lang={lang}
/>
</TooltipContent>
</Tooltip>
<div <div
ref={articleRef} ref={articleRef}
lang={lang.substring(0, 2)} lang={lang.substring(0, 2)}
dangerouslySetInnerHTML={{ __html: data || "" }} dangerouslySetInnerHTML={{ __html: data }}
/> />
</div> </div>
); );

View File

@@ -0,0 +1,34 @@
import { useParagraph } from "../../hooks/useParagraph";
import { Language } from "../../lib/types";
type ParagraphPreviewProps = {
celexId: string;
articleId: number;
paragraphId: number;
lang: Language;
};
function ParagraphPreview({
celexId,
articleId,
paragraphId,
lang,
}: ParagraphPreviewProps) {
const { data, isPending, error } = useParagraph(
celexId,
articleId,
paragraphId,
lang
);
if (isPending) {
return <p>Loading...</p>;
}
if (error) {
return <p>Error loading paragraph: {error.message}</p>;
}
return <div dangerouslySetInnerHTML={{ __html: data }} />;
}
export default ParagraphPreview;

View File

@@ -1,16 +1,15 @@
.toc { .toc {
font-size: 0.8rem; font-size: 0.8rem;
min-width: 25vw; flex: 1 0 25vw;
flex: 1 auto;
&.hidden { &.hidden {
flex: 0 0; display: none;
min-width: 0;
} }
transition: flex-basis 0.1s ease-in-out;
overflow-y: scroll; overflow-y: scroll;
overflow-x: wrap; overflow-x: wrap;
height: 100vh;
.tocDivision { .tocDivision {
margin-block: 0.5rem; margin-block: 0.5rem;

View File

@@ -1,16 +1,14 @@
import { useState } from "react"; import { useState } from "react";
import { Division } from "../../lib/types"; import { Division } from "../../lib/types";
import useNavState from "../../store/navStore"; import useNavStore from "../../store/navStore";
import styles from "./TOC.module.css"; import styles from "./TOC.module.css";
type TOC = Division[];
type TOCProps = { type TOCProps = {
toc: TOC; toc: Division[];
}; };
function TOC({ toc }: TOCProps) { function TOC({ toc }: TOCProps) {
const { articleId, setArticleId } = useNavState(); const { articleId, setArticleId } = useNavStore();
function containsArticle(division: Division, articleId: number): boolean { function containsArticle(division: Division, articleId: number): boolean {
return division.content.some((c) => { return division.content.some((c) => {
@@ -57,15 +55,17 @@ function TOC({ toc }: TOCProps) {
const [isVisible, setIsVisible] = useState(true); const [isVisible, setIsVisible] = useState(true);
return ( return (
<nav className={[styles.toc, isVisible ? "" : styles.hidden].join(" ")}> <>
<button <button
onClick={() => setIsVisible(!isVisible)} onClick={() => setIsVisible(!isVisible)}
className={styles.toggleButton} className={styles.toggleButton}
> >
{isVisible ? "<" : ">"} {isVisible ? "<" : ">"}
</button> </button>
{toc.map((division) => renderDivision(division))} <nav className={[styles.toc, isVisible ? "" : styles.hidden].join(" ")}>
</nav> {toc.map((division) => renderDivision(division))}
</nav>
</>
); );
} }
export default TOC; export default TOC;

View File

@@ -0,0 +1,10 @@
.Tooltip {
background-color: #444;
color: white;
font-size: 90%;
padding: 4px 8px;
border-radius: 4px;
box-sizing: border-box;
width: max-content;
max-width: 60ch;
}

View File

@@ -0,0 +1,163 @@
import type { Placement } from "@floating-ui/react";
import {
autoUpdate,
flip,
FloatingPortal,
offset,
shift,
useDismiss,
useFloating,
useFocus,
useHover,
useInteractions,
useMergeRefs,
useRole,
} from "@floating-ui/react";
import * as React from "react";
import styles from "./Tooltip.module.css";
interface TooltipOptions {
initialOpen?: boolean;
placement?: Placement;
open?: boolean;
onOpenChange?: (open: boolean) => void;
}
export function useTooltip({
initialOpen = false,
placement = "top",
open: controlledOpen,
onOpenChange: setControlledOpen,
}: TooltipOptions = {}) {
const [uncontrolledOpen, setUncontrolledOpen] = React.useState(initialOpen);
const open = controlledOpen ?? uncontrolledOpen;
const setOpen = setControlledOpen ?? setUncontrolledOpen;
const data = useFloating({
placement,
open,
onOpenChange: setOpen,
whileElementsMounted: autoUpdate,
middleware: [
offset(5),
flip({
crossAxis: placement.includes("-"),
fallbackAxisSideDirection: "start",
padding: 5,
}),
shift({ padding: 5 }),
],
});
const context = data.context;
const hover = useHover(context, {
move: false,
enabled: controlledOpen == null,
});
const focus = useFocus(context, {
enabled: controlledOpen == null,
});
const dismiss = useDismiss(context);
const role = useRole(context, { role: "tooltip" });
const interactions = useInteractions([hover, focus, dismiss, role]);
return React.useMemo(
() => ({
open,
setOpen,
...interactions,
...data,
}),
[open, setOpen, interactions, data]
);
}
type ContextType = ReturnType<typeof useTooltip> | null;
const TooltipContext = React.createContext<ContextType>(null);
export const useTooltipContext = () => {
const context = React.useContext(TooltipContext);
if (context == null) {
throw new Error("Tooltip components must be wrapped in <Tooltip />");
}
return context;
};
export function Tooltip({
children,
...options
}: { children: React.ReactNode } & TooltipOptions) {
// This can accept any props as options, e.g. `placement`,
// or other positioning options.
const tooltip = useTooltip(options);
return (
<TooltipContext.Provider value={tooltip}>
{children}
</TooltipContext.Provider>
);
}
export const TooltipTrigger = React.forwardRef<
HTMLElement,
React.HTMLProps<HTMLElement> & { asChild?: boolean }
>(function TooltipTrigger({ children, asChild = false, ...props }, propRef) {
const context = useTooltipContext();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const childrenRef = (children as any).ref;
const ref = useMergeRefs([context.refs.setReference, propRef, childrenRef]);
// `asChild` allows the user to pass any element as the anchor
if (asChild && React.isValidElement(children)) {
return React.cloneElement(
children,
context.getReferenceProps({
ref,
...props,
...children.props,
"data-state": context.open ? "open" : "closed",
})
);
}
return (
<button
ref={ref}
// The user can style the trigger based on the state
data-state={context.open ? "open" : "closed"}
{...context.getReferenceProps(props)}
>
{children}
</button>
);
});
export const TooltipContent = React.forwardRef<
HTMLDivElement,
React.HTMLProps<HTMLDivElement>
>(function TooltipContent({ style, ...props }, propRef) {
const context = useTooltipContext();
const ref = useMergeRefs([context.refs.setFloating, propRef]);
if (!context.open) return null;
return (
<FloatingPortal>
<div
ref={ref}
className={styles.Tooltip}
style={{
...context.floatingStyles,
...style,
}}
{...context.getFloatingProps(props)}
/>
</FloatingPortal>
);
});

View File

@@ -0,0 +1,3 @@
const { VITE_API_URL: API_URL } = import.meta.env;
export { API_URL };

14
frontend/src/hooks/toc.ts Normal file
View File

@@ -0,0 +1,14 @@
import { useQuery } from "@tanstack/react-query";
import { getToc } from "../lib/api";
import { Language } from "../lib/types";
import useNavStore from "../store/navStore";
export const useTOC = () => {
const celexId = useNavStore((state) => state.celexId);
const query = useQuery({
queryKey: ["toc", celexId],
queryFn: () => getToc(celexId!, Language.ENG),
enabled: !!celexId,
});
return query;
};

View File

@@ -1,11 +1,15 @@
import { useEffect } from "react"; import { useEffect } from "react";
import { useNavigate, useParams } from "react-router-dom"; import { useNavigate, useParams } from "react-router-dom";
import useNavState from "../store/navStore"; import useNavStore from "../store/navStore";
export const useUrlSync = () => { export const useUrlSync = () => {
const navigate = useNavigate(); const navigate = useNavigate();
const { celexId: paramCelexId, articleId: paramArticleId } = useParams(); const { celexId: paramCelexId, articleId: paramArticleId } = useParams();
const { articleId, celexId, setArticleId, setCelexId } = useNavState();
const celexId = useNavStore.use.celexId();
const setCelexId = useNavStore.use.setCelexId();
const articleId = useNavStore.use.articleId();
const setArticleId = useNavStore.use.setArticleId();
// Effect to read from URL when URL changes // Effect to read from URL when URL changes
useEffect(() => { useEffect(() => {
@@ -26,13 +30,13 @@ export const useUrlSync = () => {
// Update the URL when Zustand changes // Update the URL when Zustand changes
useEffect(() => { useEffect(() => {
if (celexId === null) { if (!celexId) {
return; return;
} }
let targetUrl = `/${celexId}`; let targetUrl = `/${celexId}`;
if (articleId !== null) { if (articleId !== null) {
targetUrl += `/articles/${articleId}`; targetUrl += `/articles/${articleId}`;
} }
navigate(targetUrl, { replace: true }); navigate(targetUrl);
}, [navigate, celexId, articleId]); // Only sync URL when Zustand changes }, [navigate, celexId, articleId]); // Only sync URL when Zustand changes
}; };

View File

@@ -0,0 +1,15 @@
import { useQuery } from "@tanstack/react-query";
import { getArticle } from "../lib/api";
import { Language } from "../lib/types";
export const useArticle = (
celexId: string | null,
articleId: number | null,
lang: Language
) => {
return useQuery({
queryKey: ["article", celexId, articleId, lang],
queryFn: () => getArticle(celexId!, articleId!, lang),
enabled: !!celexId && !!articleId,
});
};

View File

@@ -0,0 +1,16 @@
import { useQuery } from "@tanstack/react-query";
import { getParagraph } from "../lib/api";
import { Language } from "../lib/types";
export const useParagraph = (
celexId: string | null,
articleId: number | null,
paragraphId: number | null,
lang: Language
) => {
return useQuery({
queryKey: ["paragraph", celexId, articleId, paragraphId, lang],
queryFn: () => getParagraph(celexId!, articleId!, paragraphId!, lang),
enabled: !!celexId && !!articleId && !!paragraphId,
});
};

View File

@@ -1,7 +1,14 @@
import TOC from "../components/TOC/TOC"; import Axios from "axios";
import { Language } from "./types"; import { API_URL } from "../constants";
import { Division, Language } from "./types";
const API_URL = import.meta.env.VITE_API_URL; const axios = Axios.create({
baseURL: API_URL,
timeout: 5000,
headers: {
"Content-Type": "application/json",
},
});
async function getArticle( async function getArticle(
celexId: string, celexId: string,
@@ -11,22 +18,40 @@ async function getArticle(
console.debug( console.debug(
`Fetching article ${article} for CELEX ID ${celexId} in ${language} language` `Fetching article ${article} for CELEX ID ${celexId} in ${language} language`
); );
const response = await fetch( const response = await axios.get<string>(
`${API_URL}/${celexId}/articles/${article}/${language}` `${celexId}/articles/${article}/${language}`
); );
return await response.text(); return response.data;
}
async function getParagraph(
celexId: string,
article: number,
paragraph: number,
language: string
): Promise<string> {
console.debug(
`Fetching article ${article} paragraph ${paragraph} for CELEX ID ${celexId} in ${language} language`
);
const response = await axios.get<string>(
`${celexId}/articles/${article}/${paragraph}/${language}`
);
return response.data;
} }
async function getArticleIds(celexId: string): Promise<number[]> { async function getArticleIds(celexId: string): Promise<number[]> {
console.debug(`Fetching article list for CELEX ID ${celexId}`); console.debug(`Fetching article list for CELEX ID ${celexId}`);
const response = await fetch(`${API_URL}/${celexId}/articles`); const response = await axios.get<number[]>(`${celexId}/articles`);
return await response.json(); return response.data;
} }
async function getToc(celexId: string, language: Language): Promise<TOC> { async function getToc(
celexId: string,
language: Language
): Promise<Division[]> {
console.debug(`Fetching TOC for CELEX ID ${celexId}`); console.debug(`Fetching TOC for CELEX ID ${celexId}`);
const response = await fetch(`${API_URL}/${celexId}/toc/${language}`); const response = await axios.get<Division[]>(`${celexId}/toc/${language}`);
return await response.json(); return response.data;
} }
export { getArticle, getArticleIds, getToc }; export { getArticle, getArticleIds, getParagraph, getToc };

View File

@@ -0,0 +1,9 @@
export const examples = [
{ name: "GDPR", id: "32016R0679" },
{ name: "AI Act", id: "32024R1689" },
{ name: "Cybersecurity Act", id: "32019R0881" },
{ name: "Cyber Resilience Act", id: "32024R2847" },
{ name: "Medical Device Regulation", id: "32017R0745" },
{ name: "NIS 2 Directive", id: "32022L2555" },
{ name: "Digital Services Act", id: "32022R2065" },
];

View File

@@ -16,7 +16,7 @@ type Division = {
title: string; title: string;
subtitle: string; subtitle: string;
level: number; level: number;
content: Article[] | Division[]; content: (Article | Division)[];
}; };
export { Language }; export { Language };

View File

@@ -1,10 +1,10 @@
import App from "../App"; import App from "../App";
import { useUrlSync } from "../hooks/urlSync"; import { useUrlSync } from "../hooks/urlSync";
import useNavState from "../store/navStore"; import useNavStore from "../store/navStore";
function MainView() { function MainView() {
useUrlSync(); useUrlSync();
const celexId = useNavState((state) => state.celexId); const celexId = useNavStore.use.celexId();
if (!celexId) { if (!celexId) {
return <div>Error: No CELEX ID provided</div>; return <div>Error: No CELEX ID provided</div>;

View File

@@ -1,17 +1,19 @@
import { create } from "zustand"; import { create, StateCreator } from "zustand";
import { createSelectors } from "../util/zustand";
interface NavState { export type NavStore = {
celexId: string | null; celexId: string | null;
articleId: number | null; articleId: number | null;
setCelexId: (celexId: string) => void; setCelexId: (celexId: string) => void;
setArticleId: (articleId: number | null) => void; setArticleId: (articleId: number | null) => void;
} };
const useNavState = create<NavState>((set) => ({ const navStoreCreator: StateCreator<NavStore> = (set) => ({
celexId: null, celexId: "",
articleId: null, articleId: 1,
setCelexId: (celexId) => set({ celexId }), setCelexId: (celexId) => set({ celexId }),
setArticleId: (articleId) => set({ articleId }), setArticleId: (articleId) => set({ articleId }),
})); });
export default useNavState; const useNavStore = createSelectors(create<NavStore>()(navStoreCreator));
export default useNavStore;

View File

@@ -11,11 +11,11 @@ interface UIState {
const useUIStore = create<UIState>((set) => ({ const useUIStore = create<UIState>((set) => ({
numPanels: 1, numPanels: 1,
selectedParagraphId: null, selectedParagraphId: null,
addPanel: () => set((state) => ({ numPanels: state.numPanels + 1 })), addPanel: () => set((state) => ({ numPanels: state.numPanels + 1 })),
removePanel: () => removePanel: () =>
set((state) => ({ numPanels: Math.max(state.numPanels - 1, 1) })), set((state) => ({ numPanels: Math.max(state.numPanels - 1, 1) })),
setSelectedParagraphId: (selectedParagraphId: string | null) => setSelectedParagraphId: (selectedParagraphId) => set({ selectedParagraphId }),
set({ selectedParagraphId }),
})); }));
export default useUIStore; export default useUIStore;

View File

@@ -5,6 +5,8 @@
} }
article { article {
max-width: 64ch;
.list-lower-alpha { .list-lower-alpha {
list-style-type: lower-alpha; list-style-type: lower-alpha;
} }

View File

@@ -0,0 +1,18 @@
import { StoreApi, UseBoundStore } from "zustand";
type WithSelectors<S> = S extends { getState: () => infer T }
? S & { use: { [K in keyof T]: () => T[K] } }
: never;
export const createSelectors = <S extends UseBoundStore<StoreApi<object>>>(
_store: S
) => {
const store = _store as WithSelectors<typeof _store>;
store.use = {};
for (const k of Object.keys(store.getState())) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(store.use as any)[k] = () => store((s) => s[k as keyof typeof s]);
}
return store;
};

View File

@@ -24,5 +24,5 @@
"plugins": [{ "name": "typescript-plugin-css-modules" }] "plugins": [{ "name": "typescript-plugin-css-modules" }]
}, },
"include": ["src"] "include": ["src", "./jest.setup.ts", "__mocks__"]
} }

View File

@@ -19,7 +19,7 @@ app = modal.App("formex-viewer", image=app_image)
) )
@modal.asgi_app() @modal.asgi_app()
def fastapi_app(): def fastapi_app():
import fastapi.staticfiles import fastapi
from formex_viewer.server import app as formex_app from formex_viewer.server import app as formex_app
@@ -28,7 +28,7 @@ def fastapi_app():
@formex_app.get("/{path:path}") @formex_app.get("/{path:path}")
async def frontend_handler(path: str): async def frontend_handler(path: str):
fp = assets_path / path fp = assets_path / path
if not fp.exists(): if not fp.exists() or not fp.is_file():
fp = assets_path / "index.html" fp = assets_path / "index.html"
return fastapi.responses.FileResponse(fp) return fastapi.responses.FileResponse(fp)

View File

@@ -20,3 +20,8 @@ formex-viewer = "formex_viewer:main"
[build-system] [build-system]
requires = ["hatchling"] requires = ["hatchling"]
build-backend = "hatchling.build" build-backend = "hatchling.build"
[dependency-groups]
dev = [
"pytest>=8.3.5",
]

View File

@@ -1,12 +1,16 @@
import html import html
import re import re
from typing import Optional, Union import warnings
from dataclasses import dataclass
from typing import Literal, Optional, Union, cast
import lxml.etree import lxml.etree
from lxml import etree as ET from lxml import etree as ET
from formex_viewer.main import Language
def text_content(el: lxml.etree.Element) -> str:
def text_content(el: ET._Element) -> str:
"""Get the text content of an XML element, including all child elements.""" """Get the text content of an XML element, including all child elements."""
def _iterate(el): def _iterate(el):
@@ -23,24 +27,130 @@ def text_content(el: lxml.etree.Element) -> str:
return "".join(_iterate(el)) return "".join(_iterate(el))
@dataclass
class CrossReference:
target: Literal["article", "annex"]
text: str
id: str
paragraph: int | None = None
def extract_xrefs(el: ET._Element, language: Language) -> list[CrossReference]:
"""Extract cross-references from an XML element.
Args:
el: The XML element to extract cross-references from.
Returns:
A dictionary with cross-reference IDs as keys and their text content as values.
"""
crossrefs = []
text = text_content(el)
PATTERN_PARTS = {
Language.ENG: {
"article": r"(Art\.|Articles?)",
"annex": r"(Ann\.|Annex)",
"exclusion": r"(?! of(?! this))",
},
Language.DEU: {
"article": r"(Art\.|Artikels?)",
"annex": r"(Anhang)",
"exclusion": r"(?! von)",
},
}
if language not in PATTERN_PARTS:
warnings.warn(
f"Language '{language}' not supported for cross-reference extraction. Returning empty list."
)
return []
# Prevent zealous matching of references to other texts by using a negative lookahead
# Also, match only at word boundaries to prevent partial matches
parts = PATTERN_PARTS[language]
patterns = {
"article": rf"\b{parts["article"]}\s+(?P<art_num>\d+)(?:[(](?P<parag_num>\d+)[)])?(?:{parts["exclusion"]})",
"annex": rf"\b{parts["annex"]}\s+(?P<annex_num>[DILMVX]+)(?:{parts["exclusion"]})",
}
for key, pattern in patterns.items():
matches = re.finditer(pattern, text, flags=re.IGNORECASE)
for match in matches:
crossref_id = (
match.group("art_num") if key == "article" else match.group("annex_num")
)
parag_num = match.groupdict().get("parag_num")
if key not in ["article", "annex"]:
raise RuntimeError()
crossref_text = match.group(0)
crossrefs.append(
CrossReference(
target=key,
id=crossref_id,
paragraph=int(parag_num) if parag_num else None,
text=crossref_text,
)
)
return crossrefs
def extract_article(doc: ET._Element, article_id: int) -> ET._Element | None:
"""Extract a specific article from a Formex document.
Args:
doc: The XML document to extract from.
article_id: The article number.
Returns:
The extracted article element.
"""
# Use XPath to find the specific article
xpath = f".//ARTICLE[@IDENTIFIER='{article_id:03d}']"
return doc.xpath(xpath)[0] if doc.xpath(xpath) else None
def extract_paragraph(
doc: ET._Element, article_id: int, paragraph_id: int
) -> ET._Element | None:
"""Extract a specific paragraph from an article in a Formex document.
Args:
doc: The XML document to extract from.
article_id: The article number.
paragraph_id: The paragraph number.
Returns:
The extracted paragraph element.
"""
# Use XPath to find the specific paragraph
xpath = f".//PARAG[@IDENTIFIER='{article_id:03d}.{paragraph_id:03d}']"
return doc.xpath(xpath)[0] if doc.xpath(xpath) else None
class FormexArticleConverter: class FormexArticleConverter:
"""Converts Formex XML <ARTICLE> elements to semantic HTML5.""" """Converts Formex XML <ARTICLE> elements to semantic HTML5."""
def __init__(self, namespace: Optional[str] = None): def __init__(self, language: Language, namespace: Optional[str] = None):
""" """
Initialize the converter. Initialize the converter.
Args: Args:
language: Language object to determine the language for cross-reference extraction
namespace: Optional XML namespace to use when parsing elements namespace: Optional XML namespace to use when parsing elements
""" """
self.ns = namespace self.ns = namespace
self.language = language
self.ns_prefix = f"{{{namespace}}}" if namespace else "" self.ns_prefix = f"{{{namespace}}}" if namespace else ""
def _get_tag(self, tag: str) -> str: def _get_tag(self, tag: str) -> str:
"""Get the tag name with namespace if available.""" """Get the tag name with namespace if available."""
return f"{self.ns_prefix}{tag}" return f"{self.ns_prefix}{tag}"
def _get_text(self, element: ET.Element) -> str: def _get_text(self, element: ET._Element) -> str:
"""Get the text content of an element, including all nested text. """Get the text content of an element, including all nested text.
This uses lxml's text_content() method when available, falling back to This uses lxml's text_content() method when available, falling back to
@@ -55,7 +165,7 @@ class FormexArticleConverter:
except AttributeError: except AttributeError:
# Fall back to manual traversal if text_content() is not available # Fall back to manual traversal if text_content() is not available
text = element.text or "" text = element.text or ""
for child in element: for child in element.iterchildren(tag="*"):
text += self._get_text(child) text += self._get_text(child)
if child.tail: if child.tail:
text += child.tail text += child.tail
@@ -67,7 +177,16 @@ class FormexArticleConverter:
clean_id = re.sub(r"[^a-zA-Z0-9-]", "-", identifier) clean_id = re.sub(r"[^a-zA-Z0-9-]", "-", identifier)
return f"art-{clean_id}" return f"art-{clean_id}"
def _convert_btx(self, element: ET.Element) -> str: def _replace_xref(self, text: str, xref: CrossReference) -> str:
"""Replace a cross-reference instance with semantic markup in the text."""
# Replace the cross-reference text with a link
text = text.replace(
xref.text,
f'<a href="" data-target="{xref.target}" data-id="{xref.id}" data-paragraph-id="{xref.paragraph or ''}" class="cross-ref">{xref.text}</a>',
)
return text
def _convert_btx(self, element: ET._Element) -> str:
""" """
Convert basic text elements (t_btx, t_btx.seq) to HTML. Convert basic text elements (t_btx, t_btx.seq) to HTML.
@@ -79,7 +198,15 @@ class FormexArticleConverter:
result = element.text or "" result = element.text or ""
for child in element: is_title = element.tag in ("TI", "STI", "TI.ART", "STI.ART")
if not is_title and not element.getchildren():
# Cross-references should be treated at the deepest level
xrefs = extract_xrefs(element, self.language)
for xref in xrefs:
# Replace the cross-reference text with a link
result = self._replace_xref(result, xref)
for child in element.iterchildren(tag="*"):
child_tag = child.tag.replace(self.ns_prefix, "") child_tag = child.tag.replace(self.ns_prefix, "")
# Process common inline elements # Process common inline elements
@@ -176,11 +303,17 @@ class FormexArticleConverter:
result += self._convert_btx(child) result += self._convert_btx(child)
if child.tail: if child.tail:
result += child.tail xrefs = extract_xrefs(child, self.language)
tail_text = child.tail
for xref in xrefs:
# Replace the cross-reference text with a link
tail_text = self._replace_xref(tail_text, xref)
result += tail_text
return result return result
def _convert_list(self, list_element: ET.Element) -> str: def _convert_list(self, list_element: ET._Element) -> str:
"""Convert a Formex LIST element to HTML list items.""" """Convert a Formex LIST element to HTML list items."""
result = "" result = ""
# Using lxml's xpath to get direct child ITEM elements # Using lxml's xpath to get direct child ITEM elements
@@ -200,6 +333,12 @@ class FormexArticleConverter:
if no_p is not None and txt is not None: if no_p is not None and txt is not None:
num = self._get_text(no_p) num = self._get_text(no_p)
text = self._get_text(txt) text = self._get_text(txt)
# Handle cross-references within the text
xrefs = extract_xrefs(txt, self.language)
for xref in xrefs:
text = self._replace_xref(text, xref)
item_content += f'<span class="item-number">{num}</span> {text}' item_content += f'<span class="item-number">{num}</span> {text}'
elif child_tag == "P": elif child_tag == "P":
# Regular paragraph # Regular paragraph
@@ -212,41 +351,40 @@ class FormexArticleConverter:
return result return result
def _convert_alinea(self, alinea: ET.Element) -> str: def _convert_alinea(self, alinea: ET._Element) -> str:
"""Convert an ALINEA element to HTML.""" """Convert an ALINEA element to HTML."""
return f'<p class="alinea">{self._convert_btx(alinea)}</p>' return f'<p class="alinea">{self._convert_btx(alinea)}</p>'
def _convert_parag(self, parag: ET.Element) -> str: def _convert_parag(self, parag: ET._Element) -> str:
"""Convert a PARAG (paragraph) element to HTML.""" """Convert a PARAG (paragraph) element to HTML."""
identifier = parag.get("IDENTIFIER", "") identifier = parag.get("IDENTIFIER", "")
parag_id = self._create_id(identifier) if identifier else "" parag_id = self._create_id(identifier) if identifier else ""
# Get the paragraph number using XPath
no_parag_elems = parag.xpath(f"./{self._get_tag('NO.PARAG')}")
parag_num = self._get_text(no_parag_elems[0]) if no_parag_elems else ""
# Process the alineas within the paragraph
content = "" content = ""
for alinea in parag.xpath(f"./{self._get_tag('ALINEA')}"): for child in parag.iterchildren(tag="*"):
content += self._convert_alinea(alinea) child_tag = child.tag.replace(self.ns_prefix, "")
if child_tag == "ALINEA":
content += self._convert_alinea(child)
elif child_tag == "COMMENT":
content += f'<div class="comment">{self._convert_btx(child)}</div>'
elif child_tag == "QUOT.S":
content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
elif child_tag == "NO.PARAG":
content += (
f'<span class="paragraph-number">{self._convert_btx(child)}</span>'
)
else:
raise RuntimeError(
f"Unexpected child element '{child_tag}' in PARAG: {text_content(child)}"
)
# Process any comments return f'<div class="paragraph" data-paragraph-id="{parag_id}">{content}</div>'
for comment in parag.xpath(f"./{self._get_tag('COMMENT')}"):
content += f'<div class="comment">{self._convert_btx(comment)}</div>'
# Process any quotations def _convert_subdiv(self, subdiv: ET._Element) -> str:
for quot in parag.xpath(f"./{self._get_tag('QUOT.S')}"): """Convert a SUBDIV (subdivision) element to HTML, preserving child order."""
content += ( # Get the title using XPath (should be the first TITLE child if present)
f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
)
return f'<div class="paragraph" data-paragraph-id="{parag_id}"><span class="paragraph-number">{parag_num}</span>{content}</div>'
def _convert_subdiv(self, subdiv: ET.Element) -> str:
"""Convert a SUBDIV (subdivision) element to HTML."""
# Get the title using XPath
title_elems = subdiv.xpath(f"./{self._get_tag('TITLE')}")
title = "" title = ""
title_elems = subdiv.xpath(f"./{self._get_tag('TITLE')}")
if title_elems: if title_elems:
title_elem = title_elems[0] title_elem = title_elems[0]
# Process TI (title) and STI (subtitle) elements # Process TI (title) and STI (subtitle) elements
@@ -261,34 +399,30 @@ class FormexArticleConverter:
if sti_list: if sti_list:
title += f'<h5 class="subdivision-subtitle">{" ".join(sti_list)}</h5>' title += f'<h5 class="subdivision-subtitle">{" ".join(sti_list)}</h5>'
# Process content: either paragraphs, alineas, or nested subdivisions # Process all children in order, skipping TITLE (already handled)
content = "" content = ""
for child in subdiv.iterchildren(tag="*"):
# Process paragraphs directly under this subdivision child_tag = child.tag.replace(self.ns_prefix, "")
for parag in subdiv.xpath(f"./{self._get_tag('PARAG')}"): if child_tag == "TITLE":
content += self._convert_parag(parag) continue # already handled
elif child_tag == "PARAG":
# Process alineas directly under this subdivision content += self._convert_parag(child)
for alinea in subdiv.xpath(f"./{self._get_tag('ALINEA')}"): elif child_tag == "ALINEA":
content += self._convert_alinea(alinea) content += self._convert_alinea(child)
elif child_tag == "COMMENT":
# Process comments directly under this subdivision content += f'<div class="comment">{self._convert_btx(child)}</div>'
for comment in subdiv.xpath(f"./{self._get_tag('COMMENT')}"): elif child_tag == "QUOT.S":
content += f'<div class="comment">{self._convert_btx(comment)}</div>' content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
elif child_tag == "SUBDIV":
# Process quotations directly under this subdivision content += self._convert_subdiv(child)
for quot in subdiv.xpath(f"./{self._get_tag('QUOT.S')}"): else:
content += ( raise RuntimeError(
f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>' f"Unexpected child element '{child_tag}' in SUBDIV: {text_content(child)}"
) )
# Process nested subdivisions directly under this subdivision
for sub in subdiv.xpath(f"./{self._get_tag('SUBDIV')}"):
content += self._convert_subdiv(sub)
return f'<section class="subdivision">{title}{content}</section>' return f'<section class="subdivision">{title}{content}</section>'
def convert_article(self, article: Union[str, ET.Element]) -> str: def convert_article(self, article: Union[str, ET._Element]) -> str:
""" """
Convert a Formex <ARTICLE> element to HTML5. Convert a Formex <ARTICLE> element to HTML5.
@@ -302,7 +436,9 @@ class FormexArticleConverter:
if isinstance(article, str): if isinstance(article, str):
try: try:
parser = ET.XMLParser(remove_blank_text=True) parser = ET.XMLParser(remove_blank_text=True)
article = ET.fromstring(article.encode("utf-8"), parser) article = cast(
ET._Element, ET.fromstring(article.encode("utf-8"), parser)
)
except ET.XMLSyntaxError as e: except ET.XMLSyntaxError as e:
return f"<p>Error parsing XML: {e}</p>" return f"<p>Error parsing XML: {e}</p>"
@@ -325,43 +461,36 @@ class FormexArticleConverter:
article_subtitle = self._convert_btx(sti_art) if sti_art is not None else "" article_subtitle = self._convert_btx(sti_art) if sti_art is not None else ""
# Build the header section # Build the header section
header = f'<header><h3 class="article-title">{article_title}</h3>' if article_title and article_subtitle:
if article_subtitle: header = f'<header><h3 class="article-title">{article_title}</h3>'
header += f'<h4 class="article-subtitle">{article_subtitle}</h4>' if article_subtitle:
header += "</header>" header += f'<h4 class="article-subtitle">{article_subtitle}</h4>'
header += "</header>"
else:
header = ""
# Process the content based on what's present # Process the content based on what's present
content = "" content = ""
# Check if we have alineas directly under the article # Process all child elements (except TITLE) in tree order
alineas = article.xpath(f"./{self._get_tag('ALINEA')}") for child in article.iterchildren(tag="*"):
if alineas: child_tag = child.tag.replace(self.ns_prefix, "")
for alinea in alineas: if child_tag in ["TI.ART", "STI.ART"]:
content += self._convert_alinea(alinea) continue # already handled
elif child_tag == "ALINEA":
# Check if we have paragraphs directly under the article content += self._convert_alinea(child)
parags = article.xpath(f"./{self._get_tag('PARAG')}") elif child_tag == "PARAG":
if parags: content += self._convert_parag(child)
for parag in parags: elif child_tag == "COMMENT":
content += self._convert_parag(parag) content += f'<div class="comment">{self._convert_btx(child)}</div>'
elif child_tag == "QUOT.S":
# Check for comments directly under the article content += f'<blockquote class="quotation">{self._convert_btx(child)}</blockquote>'
comments = article.xpath(f"./{self._get_tag('COMMENT')}") elif child_tag == "SUBDIV":
if comments: content += self._convert_subdiv(child)
for comment in comments: else:
content += f'<div class="comment">{self._convert_btx(comment)}</div>' raise RuntimeError(
f"Unexpected child element '{child_tag}' in ARTICLE: {text_content(child)}"
# Check for quotations directly under the article )
quots = article.xpath(f"./{self._get_tag('QUOT.S')}")
if quots:
for quot in quots:
content += f'<blockquote class="quotation">{self._convert_btx(quot)}</blockquote>'
# Check for subdivisions directly under the article
subdivs = article.xpath(f"./{self._get_tag('SUBDIV')}")
if subdivs:
for subdiv in subdivs:
content += self._convert_subdiv(subdiv)
# Assemble the complete article # Assemble the complete article
return f'<article id="{article_id}" class="formex-article">{header}<div class="article-content">{content}</div></article>' return f'<article id="{article_id}" class="formex-article">{header}<div class="article-content">{content}</div></article>'

View File

@@ -2,7 +2,11 @@ import lxml.etree as ET
from fastapi import APIRouter, FastAPI, Response from fastapi import APIRouter, FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from formex_viewer.formex4 import FormexArticleConverter from formex_viewer.formex4 import (
FormexArticleConverter,
extract_article,
extract_paragraph,
)
from formex_viewer.main import ( from formex_viewer.main import (
CellarClient, CellarClient,
CellarIdentifier, CellarIdentifier,
@@ -68,9 +72,16 @@ def article_ids(celex_id: str, language: Language = Language.ENG):
@api_router.get("/{celex_id}/toc/{language}") @api_router.get("/{celex_id}/toc/{language}")
def toc(celex_id: str, language: Language = Language.ENG): def toc(celex_id: str, language: Language = Language.ENG):
def _extract_text(root: ET.Element, tag: str) -> str:
"""
Extract text from the given tag in the XML element.
"""
text = root.xpath(f"{tag}//text()")
return "".join(text) if text else ""
def _handle_division(division: ET.Element, level: int): def _handle_division(division: ET.Element, level: int):
title = ti_el[0] if (ti_el := division.xpath("TITLE/TI//text()")) else "" title = _extract_text(division, "TITLE/TI")
subtitle = sti_el[0] if (sti_el := division.xpath("TITLE/STI//text()")) else "" subtitle = _extract_text(division, "TITLE/STI")
subdivisions = [] subdivisions = []
for subdivision in division.xpath("DIVISION") or []: for subdivision in division.xpath("DIVISION") or []:
@@ -81,10 +92,9 @@ def toc(celex_id: str, language: Language = Language.ENG):
art_id = article.get("IDENTIFIER") art_id = article.get("IDENTIFIER")
if not art_id: if not art_id:
continue continue
art_title = ti_el[0] if (ti_el := article.xpath("TI.ART//text()")) else ""
art_subtitle = ( art_title = _extract_text(article, "TI.ART")
sti_el[0] if (sti_el := article.xpath("STI.ART//text()")) else "" art_subtitle = _extract_text(article, "STI.ART")
)
articles.append( articles.append(
{ {
"id": int(art_id.lstrip("0")), "id": int(art_id.lstrip("0")),
@@ -115,21 +125,51 @@ def toc(celex_id: str, language: Language = Language.ENG):
@api_router.get("/{celex_id}/articles/{article_id}/{language}") @api_router.get("/{celex_id}/articles/{article_id}/{language}")
def article(celex_id: str, article_id: int, language: Language = Language.ENG): def article(
celex_id: str,
article_id: int,
language: Language = Language.ENG,
):
""" """
Fetch an article from the server. Fetch an article from the server.
""" """
xml = _get_fmx4_data(celex_id, language) xml = _get_fmx4_data(celex_id, language)
article = extract_article(xml, article_id=article_id)
article_xpath = "//ARTICLE" if article is None:
articles = xml.xpath(article_xpath) return Response(
for article in articles: "Article not found",
num = article.get("IDENTIFIER").lstrip("0") status_code=404,
if num == str(article_id): )
return Response(
FormexArticleConverter().convert_article(article), return Response(
media_type="text/html", FormexArticleConverter(language=language).convert_article(article),
) media_type="text/html",
)
@api_router.get("/{celex_id}/articles/{article_id}/{parag_id}/{language}")
def paragraph(
celex_id: str,
article_id: int,
parag_id: int,
language: Language = Language.ENG,
):
"""
Fetch a paragraph within an article from the server.
"""
xml = _get_fmx4_data(celex_id, language)
parag = extract_paragraph(xml, article_id=article_id, paragraph_id=parag_id)
if parag is None:
return Response(
"Paragraph not found",
status_code=404,
)
return Response(
FormexArticleConverter(language=language)._convert_parag(parag),
media_type="text/html",
)
app.include_router(api_router, prefix="/api") app.include_router(api_router, prefix="/api")

52
tests/test_parser.py Normal file
View File

@@ -0,0 +1,52 @@
import pytest
from lxml import etree as ET
from formex_viewer.formex4 import FormexArticleConverter
from formex_viewer.main import Language
@pytest.fixture
def converter():
return FormexArticleConverter(language=Language.ENG)
def test_convert_tree_order(converter):
"""Test that the order of HTML blocks in the converted article matches the order of elements in the XML tree."""
xml = """
<ARTICLE>
<SUBDIV>
<TITLE>
<TI>Subdivision Title</TI>
<STI>Subdivision Subtitle</STI>
</TITLE>
<PARAG IDENTIFIER="001.001">
<NO.PARAG>1</NO.PARAG>
<ALINEA>Paragraph 1 text.</ALINEA>
</PARAG>
<COMMENT>Comment text.</COMMENT>
<ALINEA>Alinea text.</ALINEA>
<QUOT.S>Quotation text.</QUOT.S>
<SUBDIV>
<TITLE>
<TI>Nested Subdivision</TI>
</TITLE>
<ALINEA>Nested alinea.</ALINEA>
</SUBDIV>
</SUBDIV>
</ARTICLE>
"""
parser = ET.XMLParser(remove_blank_text=True)
el = ET.fromstring(xml, parser)
html = converter.convert_article(el)
# Check that the order of HTML blocks matches the order of elements in the XML tree
idx_title = html.index("Subdivision Title")
idx_parag = html.index('class="paragraph"')
idx_comment = html.index("Comment text.")
idx_alinea = html.index("Alinea text.")
idx_quot = html.index("Quotation text.")
idx_nested = html.index("Nested Subdivision")
# The order in the XML: title, parag, alinea, comment, quot, nested subdiv
assert idx_title < idx_parag < idx_comment < idx_alinea < idx_quot < idx_nested

764
uv.lock generated

File diff suppressed because it is too large Load Diff