import asyncio
import logging
from abc import ABC, abstractmethod
from typing import override

import httpx


class TextScraper(ABC):
    def __init__(self):
        self._client = httpx.AsyncClient(timeout=httpx.Timeout(5.0))

    async def _fetch_text(self, url: str) -> str:
        """Fetch the raw HTML content from the URL."""
        response = None
        try:
            response = await self._client.get(url)
            response.raise_for_status()
            return response.text
        except Exception:
            logging.warning(f"Failed to fetch text from {url}", exc_info=True)
            raise
        finally:
            if response:
                await response.aclose()

    @abstractmethod
    async def get_content(self, url: str) -> str: ...

    async def close(self):
        """Close the underlying HTTP client."""
        if self._client and not self._client.is_closed:
            await self._client.aclose()

    def __del__(self):
        """Ensure the HTTP client is closed when the object is deleted."""
        try:
            loop = asyncio.get_event_loop()
            if loop.is_running():
                loop.create_task(self.close())
            else:
                loop.run_until_complete(self.close())
        except Exception:
            pass


class Html2textScraper(TextScraper):
    @override
    async def get_content(self, url: str) -> str:
        import html2text

        return html2text.html2text(await self._fetch_text(url))


class ReadabilityScraper(TextScraper):
    @override
    async def get_content(self, url: str) -> str:
        import readability

        doc = readability.Document(await self._fetch_text(url))
        return doc.summary(html_partial=True)


class JinaScraper(TextScraper):
    def __init__(self, api_key: str | None = None):
        super().__init__()
        if api_key:
            self._client.headers.update({"Authorization": f"Bearer {api_key}"})

    @override
    async def get_content(self, url: str) -> str:
        print(f"Fetching content from: {url}")
        try:
            return await self._fetch_text(f"https://r.jina.ai/{url}")
        except Exception:
            logging.warning(f"Failed to fetch content from {url}", exc_info=True)
            return ""