|
|
|
@@ -59,10 +59,12 @@ class VaccinationData:
|
|
|
|
total_vaccinations: int = 0
|
|
|
|
total_vaccinations: int = 0
|
|
|
|
num_vaccinated_once: int = 0
|
|
|
|
num_vaccinated_once: int = 0
|
|
|
|
num_vaccinated_full: int = 0
|
|
|
|
num_vaccinated_full: int = 0
|
|
|
|
|
|
|
|
num_vaccinated_booster: int = 0
|
|
|
|
|
|
|
|
|
|
|
|
ratio_vaccinated_once: float = 0.0
|
|
|
|
ratio_vaccinated_once: float = 0.0
|
|
|
|
ratio_vaccinated_full: float = 0.0
|
|
|
|
ratio_vaccinated_full: float = 0.0
|
|
|
|
ratio_vaccinated_total: float = 0.0
|
|
|
|
ratio_vaccinated_total: float = 0.0
|
|
|
|
|
|
|
|
ratio_vaccinated_booster: float = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CovidCrawlerBase(ABC):
|
|
|
|
class CovidCrawlerBase(ABC):
|
|
|
|
@@ -107,11 +109,11 @@ class CovidCrawler(CovidCrawlerBase):
|
|
|
|
)
|
|
|
|
)
|
|
|
|
soup = await self._fetch(url)
|
|
|
|
soup = await self._fetch(url)
|
|
|
|
|
|
|
|
|
|
|
|
match = soup.find(class_="frame--type-textpic")
|
|
|
|
match = soup.find(id="c1067628")
|
|
|
|
text = match.p.text
|
|
|
|
text = match.text.strip()
|
|
|
|
_log.debug(f"Infection data text: {text}")
|
|
|
|
_log.debug(f"Infection data text: {text}")
|
|
|
|
|
|
|
|
|
|
|
|
matches = re.search(r"(\d+,\d+)\sNeuinfektion", text)
|
|
|
|
matches = re.search(r"(\d+(,\d+)?)\sNeuinfektion", text)
|
|
|
|
if not matches:
|
|
|
|
if not matches:
|
|
|
|
raise ValueError(
|
|
|
|
raise ValueError(
|
|
|
|
f"Could not extract incidence from scraped web page, {text=}"
|
|
|
|
f"Could not extract incidence from scraped web page, {text=}"
|
|
|
|
@@ -120,18 +122,15 @@ class CovidCrawler(CovidCrawlerBase):
|
|
|
|
incidence = parse_num(matches.group(1), t=float)
|
|
|
|
incidence = parse_num(matches.group(1), t=float)
|
|
|
|
_log.debug(f"Parsed incidence: {incidence}")
|
|
|
|
_log.debug(f"Parsed incidence: {incidence}")
|
|
|
|
|
|
|
|
|
|
|
|
text = match.h2.text
|
|
|
|
match = soup.find(id="c1052517")
|
|
|
|
matches = re.search(r"\((\d+)\. (\w+).*\)", text)
|
|
|
|
text = match.text.strip()
|
|
|
|
|
|
|
|
matches = re.search(r"Stand: (\d+)\. (\w+) (\d{4})", text)
|
|
|
|
if not matches:
|
|
|
|
if not matches:
|
|
|
|
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
|
|
|
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
|
|
|
|
|
|
|
|
|
|
|
date = parse_date(matches.group(1), matches.group(2))
|
|
|
|
date = parse_date(matches.group(1), matches.group(2), matches.group(3))
|
|
|
|
_log.debug(f"Parsed date: {date}")
|
|
|
|
_log.debug(f"Parsed date: {date}")
|
|
|
|
|
|
|
|
|
|
|
|
match = match.find_next_sibling(class_="frame--type-textpic")
|
|
|
|
|
|
|
|
text = match.text
|
|
|
|
|
|
|
|
_log.debug(f"Infection counts text: {text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
regexes = [
|
|
|
|
regexes = [
|
|
|
|
r"Insgesamt: (?P<total_cases>[0-9.]+)",
|
|
|
|
r"Insgesamt: (?P<total_cases>[0-9.]+)",
|
|
|
|
r"genesen: (?P<num_recovered>[0-9.]+)",
|
|
|
|
r"genesen: (?P<num_recovered>[0-9.]+)",
|
|
|
|
@@ -168,9 +167,9 @@ class CovidCrawler(CovidCrawlerBase):
|
|
|
|
result = soup.find(id=container_id)
|
|
|
|
result = soup.find(id=container_id)
|
|
|
|
text = re.sub(r"\s+", " ", result.text)
|
|
|
|
text = re.sub(r"\s+", " ", result.text)
|
|
|
|
regexes = [
|
|
|
|
regexes = [
|
|
|
|
r"(?P<total_vaccinations>\d+[.]\d+) Impfdosen",
|
|
|
|
r"(?P<total_vaccinations>\d+([.]\d+)?) Personen in Augsburg mindestens",
|
|
|
|
r"Weitere (?P<num_vaccinated_once>\d+[.]\d+) Personen haben die Erstimpfung erhalten",
|
|
|
|
r"(?P<num_vaccinated_full>\d+([.]\d+)?) Personen sind mindestens zweimal geimpft",
|
|
|
|
r"(?P<num_vaccinated_full>\d+[.]\d+) Personen sind bereits vollständig geimpft",
|
|
|
|
r"(?P<num_vaccinated_booster>\d+([.]\d+)?) Personen haben eine Auffrischungsimpfung",
|
|
|
|
]
|
|
|
|
]
|
|
|
|
values = {}
|
|
|
|
values = {}
|
|
|
|
for r in regexes:
|
|
|
|
for r in regexes:
|
|
|
|
@@ -188,18 +187,24 @@ class CovidCrawler(CovidCrawlerBase):
|
|
|
|
if not matches:
|
|
|
|
if not matches:
|
|
|
|
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
|
|
|
raise ValueError(f"Could not extract date from scraped web page, {text=}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
values["num_vaccinated_once"] = (
|
|
|
|
|
|
|
|
values["total_vaccinations"] - values["num_vaccinated_full"]
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d")
|
|
|
|
values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d")
|
|
|
|
result = VaccinationData(**values)
|
|
|
|
result = VaccinationData(**values)
|
|
|
|
|
|
|
|
|
|
|
|
# Total population in Augsburg as of 2020
|
|
|
|
# Total population in Augsburg as listed on the crawled page
|
|
|
|
# https://www.augsburg.de/fileadmin/user_upload/buergerservice_rathaus/rathaus/statisiken_und_geodaten/statistiken/Monitoring/Demografiemonitoring_der_Stadt_Augsburg_2021.pdf
|
|
|
|
population = 298014
|
|
|
|
population = 299021
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
result.ratio_vaccinated_full = result.num_vaccinated_full / population * 100
|
|
|
|
result.ratio_vaccinated_full = result.num_vaccinated_full / population * 100
|
|
|
|
result.ratio_vaccinated_once = result.num_vaccinated_once / population * 100
|
|
|
|
result.ratio_vaccinated_once = result.num_vaccinated_once / population * 100
|
|
|
|
result.ratio_vaccinated_total = (
|
|
|
|
result.ratio_vaccinated_total = (
|
|
|
|
result.ratio_vaccinated_once + result.ratio_vaccinated_full
|
|
|
|
result.ratio_vaccinated_once + result.ratio_vaccinated_full
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
result.ratio_vaccinated_booster = (
|
|
|
|
|
|
|
|
result.num_vaccinated_booster / population * 100
|
|
|
|
|
|
|
|
)
|
|
|
|
_log.debug(f"Result data: {result}")
|
|
|
|
_log.debug(f"Result data: {result}")
|
|
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
return result
|
|
|
|
|