3 Commits

Author SHA1 Message Date
Adrian Rumpold
2f73be9010 chore: Refactor duplicated HTTP fetching code 2021-08-11 08:12:48 +02:00
Adrian Rumpold
b6184be32f fix: Enable commented out code 2021-08-10 20:11:23 +02:00
Adrian Rumpold
70fa0619d4 fix: Simply HTTP error handling 2021-08-10 20:11:14 +02:00
2 changed files with 24 additions and 29 deletions

View File

@@ -78,6 +78,22 @@ class CovidCrawler(CovidCrawlerBase):
def __init__(self, hass=None) -> None: def __init__(self, hass=None) -> None:
self.hass = hass self.hass = hass
async def _fetch(self, url: str) -> str:
"""Fetch a URL, using either the current Home Assistant instance or requests"""
if self.hass:
from homeassistant.helpers import aiohttp_client
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
soup = BeautifulSoup(await result.text(), "html.parser")
else:
import requests
result = requests.get(url)
result.raise_for_status()
soup = BeautifulSoup(result.text, "html.parser")
return soup
async def crawl_incidence(self) -> IncidenceData: async def crawl_incidence(self) -> IncidenceData:
""" """
Fetch COVID-19 infection data from the target website. Fetch COVID-19 infection data from the target website.
@@ -88,18 +104,7 @@ class CovidCrawler(CovidCrawlerBase):
url = ( url = (
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen" "https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
) )
if self.hass: soup = await self._fetch(url)
from homeassistant.helpers import aiohttp_client
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
soup = BeautifulSoup(await result.text(), "html.parser")
else:
import requests
result = requests.get(url)
if not result.ok:
result.raise_for_status()
soup = BeautifulSoup(result.text, "html.parser")
match = soup.find(class_="frame--type-textpic") match = soup.find(class_="frame--type-textpic")
text = match.p.text text = match.p.text
@@ -153,22 +158,12 @@ class CovidCrawler(CovidCrawlerBase):
async def crawl_vaccination(self) -> VaccinationData: async def crawl_vaccination(self) -> VaccinationData:
_log.info("Fetching COVID-19 vaccination data update") _log.info("Fetching COVID-19 vaccination data update")
url = "https://www.augsburg.de/umwelt-sozgcoiales/gesundheit/coronavirus/impfzentrum" url = (
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/impfzentrum"
)
soup = await self._fetch(url)
container_id = "c1088140" container_id = "c1088140"
if self.hass:
from homeassistant.helpers import aiohttp_client
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
soup = BeautifulSoup(await result.text(), "html.parser")
else:
import requests
result = requests.get(url)
if not result.ok:
result.raise_for_status()
soup = BeautifulSoup(result.text, "html.parser")
result = soup.find(id=container_id) result = soup.find(id=container_id)
text = re.sub(r"\s+", " ", result.text) text = re.sub(r"\s+", " ", result.text)
regexes = [ regexes = [

View File

@@ -3,8 +3,8 @@ from .crawler import CovidCrawler
async def main(): async def main():
crawler = CovidCrawler() crawler = CovidCrawler()
# result = await crawler.crawl() result = await crawler.crawl_incidence()
# print(result) print(result)
result = await crawler.crawl_vaccination() result = await crawler.crawl_vaccination()
print(result) print(result)