diff --git a/README.md b/README.md index d7ab617..22bd838 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ The code retrieves Hacker News front page stories, categorizes them, stores them - `OPENAI_API_KEY`: Your OpenAI API key for chat and embedding models. - `JINA_AI_KEY`: Your [Jina AI Reader](https://jina.ai/reader/) key for text extraction. + - `SLACK_BOT_TOKEN`: Your Slack bot token for sending messages (optional). 2. Start local Weaviate vector store instance: @@ -22,5 +23,5 @@ The code retrieves Hacker News front page stories, categorizes them, stores them uv run python indexing.py ``` -Adjust the constants in `indexing.py` to change the number of stories to fetch and the categories to use. +Adjust the constants in `indexing.py` to configure the behavior of the application. You can optionally enable MLflow tracing by setting `ENABLE_MLFLOW_TRACING=True` there (make sure to run `mlflow server` first). diff --git a/indexing.py b/indexing.py index 4b3dbdb..7929362 100644 --- a/indexing.py +++ b/indexing.py @@ -12,13 +12,15 @@ import langchain_openai import langchain_weaviate import langgraph.graph +import slack import weaviate from hn import HackerNewsClient, Story from scrape import JinaScraper NUM_STORIES = 20 -USER_PREFERENCES = [] -ENABLE_MLFLOW_TRACING = False # Set to True if you want to use MLflow for tracing +USER_PREFERENCES = ["Machine Learning", "Linux", "Open-Source"] +ENABLE_SLACK = False # Send updates to Slack, need to set SLACK_BOT_TOKEN env var +ENABLE_MLFLOW_TRACING = False # Use MLflow (at http://localhost:5000) for tracing llm = langchain.chat_models.init_chat_model( @@ -100,13 +102,13 @@ def generate(state: State): return {"answer": response.content} -def run_query(preferences: Iterable[str]): +def run_query(preferences: Iterable[str]) -> str: graph_builder = langgraph.graph.StateGraph(State).add_sequence([retrieve, generate]) graph_builder.add_edge(langgraph.graph.START, "retrieve") graph = graph_builder.compile() response = graph.invoke(State(preferences=preferences, context=[], answer="")) - print(response["answer"]) + return response["answer"] def get_existing_story_ids() -> set[str]: @@ -233,7 +235,10 @@ async def main(): print("No new stories to process") # 4. Query - run_query(USER_PREFERENCES) + answer = run_query(USER_PREFERENCES) + print(answer) + if ENABLE_SLACK: + slack.send_message(channel="#ragpull-demo", text=answer) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 42f63a9..afaf88f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,4 +14,5 @@ dependencies = [ "mlflow>=3.1.1", "python-dateutil>=2.9.0.post0", "readability-lxml>=0.8.4.1", + "slack-sdk>=3.35.0", ] diff --git a/slack.py b/slack.py new file mode 100644 index 0000000..672bde0 --- /dev/null +++ b/slack.py @@ -0,0 +1,55 @@ +import logging +import os + +from langchain_core.documents import Document +from slack_sdk import WebClient +from slack_sdk.errors import SlackApiError + + +def prepare_message_blocks(stories: list[Document]) -> list: + blocks = [] + for story in stories: + block = [ + { + "type": "header", + "text": {"type": "plain_text", "text": story.metadata["title"]}, + }, + { + "type": "context", + "elements": [ + { + "type": "plain_text", + "text": f"Categories: {', '.join(story.metadata.get('categories', []))}", + }, + ], + }, + { + "type": "context", + "elements": [ + { + "type": "plain_text", + "text": f"Posted on: {story.metadata['created_at']}", + } + ], + }, + {"type": "section", "text": {"type": "mrkdwn", "text": story.page_content}}, + ] + + blocks.append(block) + return blocks + + +def send_message(channel: str, text: str) -> None: + client = WebClient(token=os.environ["SLACK_BOT_TOKEN"]) + + try: + response = client.chat_postMessage( + channel=channel, + username="HN Ragandy", + text=text, + unfurl_links=False, + ) + response.validate() + logging.info(f"Message sent successfully to channel {channel}") + except SlackApiError as e: + logging.error(f"Error sending message: {e.response['error']}") diff --git a/uv.lock b/uv.lock index 6dd108d..e255705 100644 --- a/uv.lock +++ b/uv.lock @@ -875,6 +875,7 @@ dependencies = [ { name = "mlflow" }, { name = "python-dateutil" }, { name = "readability-lxml" }, + { name = "slack-sdk" }, ] [package.metadata] @@ -888,6 +889,7 @@ requires-dist = [ { name = "mlflow", specifier = ">=3.1.1" }, { name = "python-dateutil", specifier = ">=2.9.0.post0" }, { name = "readability-lxml", specifier = ">=0.8.4.1" }, + { name = "slack-sdk", specifier = ">=3.35.0" }, ] [[package]] @@ -1860,6 +1862,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "slack-sdk" +version = "3.35.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/32/a5/13077a5696ded22cc955ff6314028b7e6140b1c989b19ca27a6b26590e6e/slack_sdk-3.35.0.tar.gz", hash = "sha256:8183b6cbf26a0c1e2441478cd9c0dc4eef08d60c1394cfdc9a769e309a9b6459", size = 232887, upload-time = "2025-03-17T15:32:51.959Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/8e/eed71dc79a187ba32681f12a104786ab89355bc474082211d92e1fba6bcf/slack_sdk-3.35.0-py2.py3-none-any.whl", hash = "sha256:00933d171fbd8a068b321ebb5f89612cc781d3183d8e3447c85499eca9d865be", size = 293272, upload-time = "2025-03-17T15:32:50.294Z" }, +] + [[package]] name = "smmap" version = "5.0.2"