From 259c9699ad65d95dab340d41f21f906f0d78e730 Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Wed, 2 Jul 2025 12:22:05 +0200 Subject: [PATCH] fix: Fix aio resource leaks --- scrape.py | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/scrape.py b/scrape.py index 88b6b95..1432eb8 100644 --- a/scrape.py +++ b/scrape.py @@ -1,4 +1,3 @@ -import asyncio import logging from abc import ABC, abstractmethod from typing import override @@ -8,41 +7,22 @@ import httpx class TextScraper(ABC): def __init__(self): - self._client = httpx.AsyncClient(timeout=httpx.Timeout(5.0)) + self._http_headers = {} async def _fetch_text(self, url: str) -> str: """Fetch the raw HTML content from the URL.""" - response = None try: - response = await self._client.get(url) - response.raise_for_status() - return response.text + async with httpx.AsyncClient(headers=self._http_headers) as client: + response = await client.get(url) + response.raise_for_status() + return response.text except Exception: logging.warning(f"Failed to fetch text from {url}", exc_info=True) raise - finally: - if response: - await response.aclose() @abstractmethod async def get_content(self, url: str) -> str: ... - async def close(self): - """Close the underlying HTTP client.""" - if self._client and not self._client.is_closed: - await self._client.aclose() - - def __del__(self): - """Ensure the HTTP client is closed when the object is deleted.""" - try: - loop = asyncio.get_event_loop() - if loop.is_running(): - loop.create_task(self.close()) - else: - loop.run_until_complete(self.close()) - except Exception: - pass - class Html2textScraper(TextScraper): @override @@ -65,7 +45,7 @@ class JinaScraper(TextScraper): def __init__(self, api_key: str | None = None): super().__init__() if api_key: - self._client.headers.update({"Authorization": f"Bearer {api_key}"}) + self._http_headers.update({"Authorization": f"Bearer {api_key}"}) @override async def get_content(self, url: str) -> str: