8 Commits

Author SHA1 Message Date
Adrian Rumpold
a44308a4e1 Merge pull request #9 from AdrianoKF/8-new-vaccination-report-format
Support new vaccination report format
2021-10-09 14:02:52 +02:00
Adrian Rumpold
dd5bb2916b chore: Bump manifest version number 2021-10-09 14:00:12 +02:00
Adrian Rumpold
67bb1e49ef feat(parser): Support new vaccination report format
Closes #8
2021-10-09 13:59:09 +02:00
Adrian Rumpold
aaea39657e Merge pull request #7 from AdrianoKF/6-integer-incidence-parse-failure
Correctly handle incidence values without decimals
2021-09-20 07:54:05 +02:00
Adrian Rumpold
81c19b552d fix(parser): Correctly handle incidence values without decimals
Closes #6
2021-09-20 07:51:52 +02:00
Adrian Rumpold
0d609ade9a chore: Bump version number 2021-09-17 08:36:54 +02:00
Adrian Rumpold
2b453f4b5e Merge pull request #5 from AdrianoKF/4-infection-data-parsing-error
Update infection data parser for new web page layout
2021-09-17 08:34:55 +02:00
Adrian Rumpold
62904f4c09 fix(parser): Update infection data parser for new web page layout
Closes #4
2021-09-17 08:30:48 +02:00
3 changed files with 23 additions and 18 deletions

View File

@@ -59,10 +59,12 @@ class VaccinationData:
total_vaccinations: int = 0 total_vaccinations: int = 0
num_vaccinated_once: int = 0 num_vaccinated_once: int = 0
num_vaccinated_full: int = 0 num_vaccinated_full: int = 0
num_vaccinated_booster: int = 0
ratio_vaccinated_once: float = 0.0 ratio_vaccinated_once: float = 0.0
ratio_vaccinated_full: float = 0.0 ratio_vaccinated_full: float = 0.0
ratio_vaccinated_total: float = 0.0 ratio_vaccinated_total: float = 0.0
ratio_vaccinated_booster: float = 0.0
class CovidCrawlerBase(ABC): class CovidCrawlerBase(ABC):
@@ -107,11 +109,11 @@ class CovidCrawler(CovidCrawlerBase):
) )
soup = await self._fetch(url) soup = await self._fetch(url)
match = soup.find(class_="frame--type-textpic") match = soup.find(id="c1067628")
text = match.p.text text = match.text.strip()
_log.debug(f"Infection data text: {text}") _log.debug(f"Infection data text: {text}")
matches = re.search(r"(\d+,\d+)\sNeuinfektion", text) matches = re.search(r"(\d+(,\d+)?)\sNeuinfektion", text)
if not matches: if not matches:
raise ValueError( raise ValueError(
f"Could not extract incidence from scraped web page, {text=}" f"Could not extract incidence from scraped web page, {text=}"
@@ -120,18 +122,15 @@ class CovidCrawler(CovidCrawlerBase):
incidence = parse_num(matches.group(1), t=float) incidence = parse_num(matches.group(1), t=float)
_log.debug(f"Parsed incidence: {incidence}") _log.debug(f"Parsed incidence: {incidence}")
text = match.h2.text match = soup.find(id="c1052517")
matches = re.search(r"\((\d+)\. (\w+).*\)", text) text = match.text.strip()
matches = re.search(r"Stand: (\d+)\. (\w+) (\d{4})", text)
if not matches: if not matches:
raise ValueError(f"Could not extract date from scraped web page, {text=}") raise ValueError(f"Could not extract date from scraped web page, {text=}")
date = parse_date(matches.group(1), matches.group(2)) date = parse_date(matches.group(1), matches.group(2), matches.group(3))
_log.debug(f"Parsed date: {date}") _log.debug(f"Parsed date: {date}")
match = match.find_next_sibling(class_="frame--type-textpic")
text = match.text
_log.debug(f"Infection counts text: {text}")
regexes = [ regexes = [
r"Insgesamt: (?P<total_cases>[0-9.]+)", r"Insgesamt: (?P<total_cases>[0-9.]+)",
r"genesen: (?P<num_recovered>[0-9.]+)", r"genesen: (?P<num_recovered>[0-9.]+)",
@@ -168,9 +167,9 @@ class CovidCrawler(CovidCrawlerBase):
result = soup.find(id=container_id) result = soup.find(id=container_id)
text = re.sub(r"\s+", " ", result.text) text = re.sub(r"\s+", " ", result.text)
regexes = [ regexes = [
r"(?P<total_vaccinations>\d+[.]\d+) Impfdosen", r"(?P<total_vaccinations>\d+([.]\d+)?) Personen in Augsburg mindestens",
r"Weitere (?P<num_vaccinated_once>\d+[.]\d+) Personen haben die Erstimpfung erhalten", r"(?P<num_vaccinated_full>\d+([.]\d+)?) Personen sind mindestens zweimal geimpft",
r"(?P<num_vaccinated_full>\d+[.]\d+) Personen sind bereits vollständig geimpft", r"(?P<num_vaccinated_booster>\d+([.]\d+)?) Personen haben eine Auffrischungsimpfung",
] ]
values = {} values = {}
for r in regexes: for r in regexes:
@@ -188,18 +187,24 @@ class CovidCrawler(CovidCrawlerBase):
if not matches: if not matches:
raise ValueError(f"Could not extract date from scraped web page, {text=}") raise ValueError(f"Could not extract date from scraped web page, {text=}")
values["num_vaccinated_once"] = values["total_vaccinations"] - (
values["num_vaccinated_full"] + values["num_vaccinated_booster"]
)
values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d") values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d")
result = VaccinationData(**values) result = VaccinationData(**values)
# Total population in Augsburg as of 2020 # Total population in Augsburg as listed on the crawled page
# https://www.augsburg.de/fileadmin/user_upload/buergerservice_rathaus/rathaus/statisiken_und_geodaten/statistiken/Monitoring/Demografiemonitoring_der_Stadt_Augsburg_2021.pdf population = 298014
population = 299021
result.ratio_vaccinated_full = result.num_vaccinated_full / population * 100 result.ratio_vaccinated_full = result.num_vaccinated_full / population * 100
result.ratio_vaccinated_once = result.num_vaccinated_once / population * 100 result.ratio_vaccinated_once = result.num_vaccinated_once / population * 100
result.ratio_vaccinated_total = ( result.ratio_vaccinated_total = (
result.ratio_vaccinated_once + result.ratio_vaccinated_full result.ratio_vaccinated_once + result.ratio_vaccinated_full
) )
result.ratio_vaccinated_booster = (
result.num_vaccinated_booster / population * 100
)
_log.debug(f"Result data: {result}") _log.debug(f"Result data: {result}")
return result return result

View File

@@ -1,7 +1,7 @@
{ {
"domain": "covid19_augsburg", "domain": "covid19_augsburg",
"name": "COVID-19 Augsburg", "name": "COVID-19 Augsburg",
"version": "1.1.1", "version": "1.2.0",
"config_flow": true, "config_flow": true,
"documentation": "https://github.com/AdrianoKF/home-assistant-covid19-augsburg", "documentation": "https://github.com/AdrianoKF/home-assistant-covid19-augsburg",
"issue_tracker": "https://github.com/AdrianoKF/home-assistant-covid19-augsburg/issues", "issue_tracker": "https://github.com/AdrianoKF/home-assistant-covid19-augsburg/issues",

View File

@@ -1,5 +1,5 @@
[tool.poetry] [tool.poetry]
name = "git add re" name = "home_assistant_covid19_augsburg"
version = "0.1.0" version = "0.1.0"
description = "" description = ""
authors = ["Adrian Rumpold <a.rumpold@gmail.com>"] authors = ["Adrian Rumpold <a.rumpold@gmail.com>"]