From 62904f4c099d4a832fab6dd0aa09292a1f1e5985 Mon Sep 17 00:00:00 2001 From: Adrian Rumpold Date: Fri, 17 Sep 2021 08:29:39 +0200 Subject: [PATCH] fix(parser): Update infection data parser for new web page layout Closes #4 --- .../home_assistant_covid19_augsburg/crawler.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/custom_components/home_assistant_covid19_augsburg/crawler.py b/custom_components/home_assistant_covid19_augsburg/crawler.py index 36c1eb6..8dce9f4 100644 --- a/custom_components/home_assistant_covid19_augsburg/crawler.py +++ b/custom_components/home_assistant_covid19_augsburg/crawler.py @@ -107,8 +107,8 @@ class CovidCrawler(CovidCrawlerBase): ) soup = await self._fetch(url) - match = soup.find(class_="frame--type-textpic") - text = match.p.text + match = soup.find(id="c1067628") + text = match.text.strip() _log.debug(f"Infection data text: {text}") matches = re.search(r"(\d+,\d+)\sNeuinfektion", text) @@ -120,18 +120,15 @@ class CovidCrawler(CovidCrawlerBase): incidence = parse_num(matches.group(1), t=float) _log.debug(f"Parsed incidence: {incidence}") - text = match.h2.text - matches = re.search(r"\((\d+)\. (\w+).*\)", text) + match = soup.find(id="c1052517") + text = match.text.strip() + matches = re.search(r"Stand: (\d+)\. (\w+) (\d{4})", text) if not matches: raise ValueError(f"Could not extract date from scraped web page, {text=}") - date = parse_date(matches.group(1), matches.group(2)) + date = parse_date(matches.group(1), matches.group(2), matches.group(3)) _log.debug(f"Parsed date: {date}") - match = match.find_next_sibling(class_="frame--type-textpic") - text = match.text - _log.debug(f"Infection counts text: {text}") - regexes = [ r"Insgesamt: (?P[0-9.]+)", r"genesen: (?P[0-9.]+)",