from langchain_core.documents import Document from langchain_core.language_models import BaseChatModel def categorize(doc: Document, llm: BaseChatModel) -> set[str]: # Create a prompt for category extraction prompt = f""" Extract up to 3 relevant categories from the following document. Return only the category names as a list of JSON strings. If you cannot find any relevant categories, return an empty list. Title: {doc.metadata.get('title', 'No title')} Content: {doc.page_content}... Categories:""" # Get response from LLM result = llm.with_structured_output(method="json_mode").invoke(prompt) categories = result.get("categories", []) return set(categories)