fix: Fix aio resource leaks
This commit is contained in:
28
scrape.py
28
scrape.py
@@ -1,4 +1,3 @@
|
||||
import asyncio
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import override
|
||||
@@ -8,41 +7,22 @@ import httpx
|
||||
|
||||
class TextScraper(ABC):
|
||||
def __init__(self):
|
||||
self._client = httpx.AsyncClient(timeout=httpx.Timeout(5.0))
|
||||
self._http_headers = {}
|
||||
|
||||
async def _fetch_text(self, url: str) -> str:
|
||||
"""Fetch the raw HTML content from the URL."""
|
||||
response = None
|
||||
try:
|
||||
response = await self._client.get(url)
|
||||
async with httpx.AsyncClient(headers=self._http_headers) as client:
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception:
|
||||
logging.warning(f"Failed to fetch text from {url}", exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
if response:
|
||||
await response.aclose()
|
||||
|
||||
@abstractmethod
|
||||
async def get_content(self, url: str) -> str: ...
|
||||
|
||||
async def close(self):
|
||||
"""Close the underlying HTTP client."""
|
||||
if self._client and not self._client.is_closed:
|
||||
await self._client.aclose()
|
||||
|
||||
def __del__(self):
|
||||
"""Ensure the HTTP client is closed when the object is deleted."""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
loop.create_task(self.close())
|
||||
else:
|
||||
loop.run_until_complete(self.close())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class Html2textScraper(TextScraper):
|
||||
@override
|
||||
@@ -65,7 +45,7 @@ class JinaScraper(TextScraper):
|
||||
def __init__(self, api_key: str | None = None):
|
||||
super().__init__()
|
||||
if api_key:
|
||||
self._client.headers.update({"Authorization": f"Bearer {api_key}"})
|
||||
self._http_headers.update({"Authorization": f"Bearer {api_key}"})
|
||||
|
||||
@override
|
||||
async def get_content(self, url: str) -> str:
|
||||
|
||||
Reference in New Issue
Block a user