|
|
|
|
@@ -15,6 +15,32 @@ def parse_num(s, t=int):
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_date(
|
|
|
|
|
day: int, month: str, year=datetime.datetime.now().year
|
|
|
|
|
) -> datetime.date:
|
|
|
|
|
"""Parse a German medium-form date, e.g. 17. August into a datetime.date"""
|
|
|
|
|
months = [
|
|
|
|
|
"Januar",
|
|
|
|
|
"Februar",
|
|
|
|
|
"März",
|
|
|
|
|
"April",
|
|
|
|
|
"Mai",
|
|
|
|
|
"Juni",
|
|
|
|
|
"Juli",
|
|
|
|
|
"August",
|
|
|
|
|
"September",
|
|
|
|
|
"Oktober",
|
|
|
|
|
"November",
|
|
|
|
|
"Dezember",
|
|
|
|
|
]
|
|
|
|
|
date = datetime.date(
|
|
|
|
|
year=int(year),
|
|
|
|
|
month=1 + months.index(month),
|
|
|
|
|
day=parse_num(day),
|
|
|
|
|
)
|
|
|
|
|
return date
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class IncidenceData:
|
|
|
|
|
location: str
|
|
|
|
|
@@ -26,37 +52,51 @@ class IncidenceData:
|
|
|
|
|
num_dead: int = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class VaccinationData:
|
|
|
|
|
date: str
|
|
|
|
|
|
|
|
|
|
total_vaccinations: int = 0
|
|
|
|
|
num_vaccinated_once: int = 0
|
|
|
|
|
num_vaccinated_full: int = 0
|
|
|
|
|
|
|
|
|
|
ratio_vaccinated_once: float = 0.0
|
|
|
|
|
ratio_vaccinated_full: float = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CovidCrawlerBase(ABC):
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def crawl(self) -> IncidenceData:
|
|
|
|
|
def crawl_incidence(self) -> IncidenceData:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def crawl_vaccination(self) -> VaccinationData:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CovidCrawler(CovidCrawlerBase):
|
|
|
|
|
def __init__(self, hass=None) -> None:
|
|
|
|
|
self.url = (
|
|
|
|
|
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
|
|
|
|
|
)
|
|
|
|
|
self.hass = hass
|
|
|
|
|
|
|
|
|
|
async def crawl(self) -> IncidenceData:
|
|
|
|
|
async def crawl_incidence(self) -> IncidenceData:
|
|
|
|
|
"""
|
|
|
|
|
Fetch COVID-19 infection data from the target website.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
_log.info("Fetching COVID-19 data update")
|
|
|
|
|
|
|
|
|
|
url = (
|
|
|
|
|
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
|
|
|
|
|
)
|
|
|
|
|
if self.hass:
|
|
|
|
|
from homeassistant.helpers import aiohttp_client
|
|
|
|
|
|
|
|
|
|
result = await aiohttp_client.async_get_clientsession(self.hass).get(
|
|
|
|
|
self.url
|
|
|
|
|
)
|
|
|
|
|
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
|
|
|
|
|
soup = BeautifulSoup(await result.text(), "html.parser")
|
|
|
|
|
else:
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
result = requests.get(self.url)
|
|
|
|
|
result = requests.get(url)
|
|
|
|
|
if not result.ok:
|
|
|
|
|
result.raise_for_status()
|
|
|
|
|
soup = BeautifulSoup(result.text, "html.parser")
|
|
|
|
|
@@ -79,27 +119,7 @@ class CovidCrawler(CovidCrawlerBase):
|
|
|
|
|
if not matches:
|
|
|
|
|
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
|
|
|
|
|
|
|
|
|
months = [
|
|
|
|
|
"Januar",
|
|
|
|
|
"Februar",
|
|
|
|
|
"März",
|
|
|
|
|
"April",
|
|
|
|
|
"Mai",
|
|
|
|
|
"Juni",
|
|
|
|
|
"Juli",
|
|
|
|
|
"August",
|
|
|
|
|
"September",
|
|
|
|
|
"Oktober",
|
|
|
|
|
"November",
|
|
|
|
|
"Dezember",
|
|
|
|
|
]
|
|
|
|
|
day = parse_num(matches.group(1))
|
|
|
|
|
month_name = matches.group(2)
|
|
|
|
|
date = datetime.date(
|
|
|
|
|
year=datetime.datetime.now().year,
|
|
|
|
|
month=1 + months.index(month_name),
|
|
|
|
|
day=day,
|
|
|
|
|
)
|
|
|
|
|
date = parse_date(matches.group(1), matches.group(2))
|
|
|
|
|
_log.debug(f"Parsed date: {date}")
|
|
|
|
|
|
|
|
|
|
match = match.find_next_sibling(class_="frame--type-textpic")
|
|
|
|
|
@@ -130,3 +150,57 @@ class CovidCrawler(CovidCrawlerBase):
|
|
|
|
|
_log.debug(f"Result data: {result}")
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
async def crawl_vaccination(self) -> VaccinationData:
|
|
|
|
|
_log.info("Fetching COVID-19 vaccination data update")
|
|
|
|
|
url = "https://www.augsburg.de/umwelt-sozgcoiales/gesundheit/coronavirus/impfzentrum"
|
|
|
|
|
container_id = "c1088140"
|
|
|
|
|
|
|
|
|
|
if self.hass:
|
|
|
|
|
from homeassistant.helpers import aiohttp_client
|
|
|
|
|
|
|
|
|
|
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
|
|
|
|
|
soup = BeautifulSoup(await result.text(), "html.parser")
|
|
|
|
|
else:
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
result = requests.get(url)
|
|
|
|
|
if not result.ok:
|
|
|
|
|
result.raise_for_status()
|
|
|
|
|
soup = BeautifulSoup(result.text, "html.parser")
|
|
|
|
|
|
|
|
|
|
result = soup.find(id=container_id)
|
|
|
|
|
text = re.sub(r"\s+", " ", result.text)
|
|
|
|
|
regexes = [
|
|
|
|
|
r"(?P<total_vaccinations>\d+[.]\d+) Impfdosen",
|
|
|
|
|
r"Weitere (?P<num_vaccinated_once>\d+[.]\d+) Personen haben die Erstimpfung erhalten",
|
|
|
|
|
r"(?P<num_vaccinated_full>\d+[.]\d+) Personen sind bereits vollständig geimpft",
|
|
|
|
|
]
|
|
|
|
|
values = {}
|
|
|
|
|
for r in regexes:
|
|
|
|
|
matches = re.search(r, text)
|
|
|
|
|
if not matches:
|
|
|
|
|
continue
|
|
|
|
|
values.update(
|
|
|
|
|
{
|
|
|
|
|
k: parse_num(v.replace(".", ""))
|
|
|
|
|
for k, v in matches.groupdict().items()
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
matches = re.search(r"Stand (?P<day>\d+)\. (?P<month>\w+) (?P<year>\d+)", text)
|
|
|
|
|
if not matches:
|
|
|
|
|
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
|
|
|
|
|
|
|
|
|
values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d")
|
|
|
|
|
result = VaccinationData(**values)
|
|
|
|
|
|
|
|
|
|
# Total population in Augsburg as of 2020
|
|
|
|
|
# https://www.augsburg.de/fileadmin/user_upload/buergerservice_rathaus/rathaus/statisiken_und_geodaten/statistiken/Monitoring/Demografiemonitoring_der_Stadt_Augsburg_2021.pdf
|
|
|
|
|
population = 299021
|
|
|
|
|
|
|
|
|
|
result.ratio_vaccinated_full = result.num_vaccinated_full / population
|
|
|
|
|
result.ratio_vaccinated_once = result.num_vaccinated_once / population
|
|
|
|
|
_log.debug(f"Result data: {result}")
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|