@@ -15,6 +15,32 @@ def parse_num(s, t=int):
|
||||
return 0
|
||||
|
||||
|
||||
def parse_date(
|
||||
day: int, month: str, year=datetime.datetime.now().year
|
||||
) -> datetime.date:
|
||||
"""Parse a German medium-form date, e.g. 17. August into a datetime.date"""
|
||||
months = [
|
||||
"Januar",
|
||||
"Februar",
|
||||
"März",
|
||||
"April",
|
||||
"Mai",
|
||||
"Juni",
|
||||
"Juli",
|
||||
"August",
|
||||
"September",
|
||||
"Oktober",
|
||||
"November",
|
||||
"Dezember",
|
||||
]
|
||||
date = datetime.date(
|
||||
year=int(year),
|
||||
month=1 + months.index(month),
|
||||
day=parse_num(day),
|
||||
)
|
||||
return date
|
||||
|
||||
|
||||
@dataclass
|
||||
class IncidenceData:
|
||||
location: str
|
||||
@@ -26,37 +52,51 @@ class IncidenceData:
|
||||
num_dead: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class VaccinationData:
|
||||
date: str
|
||||
|
||||
total_vaccinations: int = 0
|
||||
num_vaccinated_once: int = 0
|
||||
num_vaccinated_full: int = 0
|
||||
|
||||
ratio_vaccinated_once: float = 0.0
|
||||
ratio_vaccinated_full: float = 0.0
|
||||
|
||||
|
||||
class CovidCrawlerBase(ABC):
|
||||
@abstractmethod
|
||||
def crawl(self) -> IncidenceData:
|
||||
def crawl_incidence(self) -> IncidenceData:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def crawl_vaccination(self) -> VaccinationData:
|
||||
pass
|
||||
|
||||
|
||||
class CovidCrawler(CovidCrawlerBase):
|
||||
def __init__(self, hass=None) -> None:
|
||||
self.url = (
|
||||
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
|
||||
)
|
||||
self.hass = hass
|
||||
|
||||
async def crawl(self) -> IncidenceData:
|
||||
async def crawl_incidence(self) -> IncidenceData:
|
||||
"""
|
||||
Fetch COVID-19 infection data from the target website.
|
||||
"""
|
||||
|
||||
_log.info("Fetching COVID-19 data update")
|
||||
|
||||
url = (
|
||||
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
|
||||
)
|
||||
if self.hass:
|
||||
from homeassistant.helpers import aiohttp_client
|
||||
|
||||
result = await aiohttp_client.async_get_clientsession(self.hass).get(
|
||||
self.url
|
||||
)
|
||||
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
|
||||
soup = BeautifulSoup(await result.text(), "html.parser")
|
||||
else:
|
||||
import requests
|
||||
|
||||
result = requests.get(self.url)
|
||||
result = requests.get(url)
|
||||
if not result.ok:
|
||||
result.raise_for_status()
|
||||
soup = BeautifulSoup(result.text, "html.parser")
|
||||
@@ -79,27 +119,7 @@ class CovidCrawler(CovidCrawlerBase):
|
||||
if not matches:
|
||||
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
||||
|
||||
months = [
|
||||
"Januar",
|
||||
"Februar",
|
||||
"März",
|
||||
"April",
|
||||
"Mai",
|
||||
"Juni",
|
||||
"Juli",
|
||||
"August",
|
||||
"September",
|
||||
"Oktober",
|
||||
"November",
|
||||
"Dezember",
|
||||
]
|
||||
day = parse_num(matches.group(1))
|
||||
month_name = matches.group(2)
|
||||
date = datetime.date(
|
||||
year=datetime.datetime.now().year,
|
||||
month=1 + months.index(month_name),
|
||||
day=day,
|
||||
)
|
||||
date = parse_date(matches.group(1), matches.group(2))
|
||||
_log.debug(f"Parsed date: {date}")
|
||||
|
||||
match = match.find_next_sibling(class_="frame--type-textpic")
|
||||
@@ -130,3 +150,57 @@ class CovidCrawler(CovidCrawlerBase):
|
||||
_log.debug(f"Result data: {result}")
|
||||
|
||||
return result
|
||||
|
||||
async def crawl_vaccination(self) -> VaccinationData:
|
||||
_log.info("Fetching COVID-19 vaccination data update")
|
||||
url = "https://www.augsburg.de/umwelt-sozgcoiales/gesundheit/coronavirus/impfzentrum"
|
||||
container_id = "c1088140"
|
||||
|
||||
if self.hass:
|
||||
from homeassistant.helpers import aiohttp_client
|
||||
|
||||
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
|
||||
soup = BeautifulSoup(await result.text(), "html.parser")
|
||||
else:
|
||||
import requests
|
||||
|
||||
result = requests.get(url)
|
||||
if not result.ok:
|
||||
result.raise_for_status()
|
||||
soup = BeautifulSoup(result.text, "html.parser")
|
||||
|
||||
result = soup.find(id=container_id)
|
||||
text = re.sub(r"\s+", " ", result.text)
|
||||
regexes = [
|
||||
r"(?P<total_vaccinations>\d+[.]\d+) Impfdosen",
|
||||
r"Weitere (?P<num_vaccinated_once>\d+[.]\d+) Personen haben die Erstimpfung erhalten",
|
||||
r"(?P<num_vaccinated_full>\d+[.]\d+) Personen sind bereits vollständig geimpft",
|
||||
]
|
||||
values = {}
|
||||
for r in regexes:
|
||||
matches = re.search(r, text)
|
||||
if not matches:
|
||||
continue
|
||||
values.update(
|
||||
{
|
||||
k: parse_num(v.replace(".", ""))
|
||||
for k, v in matches.groupdict().items()
|
||||
}
|
||||
)
|
||||
|
||||
matches = re.search(r"Stand (?P<day>\d+)\. (?P<month>\w+) (?P<year>\d+)", text)
|
||||
if not matches:
|
||||
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
||||
|
||||
values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d")
|
||||
result = VaccinationData(**values)
|
||||
|
||||
# Total population in Augsburg as of 2020
|
||||
# https://www.augsburg.de/fileadmin/user_upload/buergerservice_rathaus/rathaus/statisiken_und_geodaten/statistiken/Monitoring/Demografiemonitoring_der_Stadt_Augsburg_2021.pdf
|
||||
population = 299021
|
||||
|
||||
result.ratio_vaccinated_full = result.num_vaccinated_full / population
|
||||
result.ratio_vaccinated_once = result.num_vaccinated_once / population
|
||||
_log.debug(f"Result data: {result}")
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user