Initial commit
This commit is contained in:
94
custom_components/home_assistant_covid19_augsburg/crawler.py
Normal file
94
custom_components/home_assistant_covid19_augsburg/crawler.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import datetime
|
||||
import locale
|
||||
import logging
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class IncidenceData:
|
||||
location: str
|
||||
date: datetime.date
|
||||
incidence: float
|
||||
total_cases: int = 0
|
||||
num_infected: int = 0
|
||||
num_recovered: int = 0
|
||||
num_dead: int = 0
|
||||
|
||||
|
||||
class CovidCrawlerBase(ABC):
|
||||
@abstractmethod
|
||||
def crawl(self) -> IncidenceData:
|
||||
pass
|
||||
|
||||
|
||||
class CovidCrawler(CovidCrawlerBase):
|
||||
def __init__(self) -> None:
|
||||
self.url = (
|
||||
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
|
||||
)
|
||||
|
||||
def crawl(self) -> IncidenceData:
|
||||
"""
|
||||
Fetch COVID-19 infection data from the target website.
|
||||
"""
|
||||
|
||||
_log.info("Fetching COVID-19 data update")
|
||||
|
||||
locale.setlocale(locale.LC_ALL, "de_DE.utf8")
|
||||
|
||||
result = requests.get(self.url)
|
||||
if not result.ok:
|
||||
result.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(result.text, features="html.parser")
|
||||
|
||||
match = soup.find(class_="frame--type-textpic")
|
||||
text = match.p.text
|
||||
_log.debug(f"Infection data text: {text}")
|
||||
|
||||
matches = re.search(r"(\d+,\d+) Neuinfektion", text)
|
||||
if not matches:
|
||||
raise ValueError("Could not extract incidence from scraped web page")
|
||||
|
||||
incidence = locale.atof(matches.group(1))
|
||||
_log.debug(f"Parsed incidence: {incidence}")
|
||||
|
||||
text = match.h2.text
|
||||
matches = re.search(r"\((\d+\. \w+)\)", text)
|
||||
if not matches:
|
||||
raise ValueError("Could not extract date from scraped web page")
|
||||
|
||||
date = datetime.datetime.strptime(matches.group(1), "%d. %B")
|
||||
date = date.replace(year=datetime.datetime.now().year).date()
|
||||
_log.debug(f"Parsed date: {date}")
|
||||
|
||||
match = match.find_next_sibling(class_="frame--type-textpic")
|
||||
text = match.text
|
||||
_log.debug(f"Infection counts text: {text}")
|
||||
|
||||
regexes = [
|
||||
r"Insgesamt: (?P<total_cases>[0-9.]+)",
|
||||
r"genesen: (?P<num_recovered>[0-9.]+)",
|
||||
r"infiziert: (?P<num_infected>[0-9.]+)",
|
||||
r"verstorben: (?P<num_dead>[0-9.]+)",
|
||||
]
|
||||
cases = {}
|
||||
for r in regexes:
|
||||
matches = re.search(r, text)
|
||||
if not matches:
|
||||
continue
|
||||
cases.update(
|
||||
{k: int(v.replace(".", "")) for k, v in matches.groupdict().items()}
|
||||
)
|
||||
|
||||
result = IncidenceData("Augsburg", incidence, date, **cases)
|
||||
_log.debug(f"Result data: {result}")
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user