Merge pull request #5 from AdrianoKF/4-infection-data-parsing-error

Update infection data parser for new web page layout
2021-09-17 08:34:55 +02:00
parent 35d5232d8e 62904f4c09
commit 2b453f4b5e
1 changed files with 6 additions and 9 deletions
@@ -107,8 +107,8 @@ class CovidCrawler(CovidCrawlerBase):
        )
        soup = await self._fetch(url)

-        match = soup.find(class_="frame--type-textpic")
-        text = match.p.text
+        match = soup.find(id="c1067628")
+        text = match.text.strip()
        _log.debug(f"Infection data text: {text}")

        matches = re.search(r"(\d+,\d+)\sNeuinfektion", text)
@@ -120,18 +120,15 @@ class CovidCrawler(CovidCrawlerBase):
        incidence = parse_num(matches.group(1), t=float)
        _log.debug(f"Parsed incidence: {incidence}")

-        text = match.h2.text
-        matches = re.search(r"\((\d+)\. (\w+).*\)", text)
+        match = soup.find(id="c1052517")
+        text = match.text.strip()
+        matches = re.search(r"Stand: (\d+)\. (\w+) (\d{4})", text)
        if not matches:
            raise ValueError(f"Could not extract date from scraped web page, {text=}")

-        date = parse_date(matches.group(1), matches.group(2))
+        date = parse_date(matches.group(1), matches.group(2), matches.group(3))
        _log.debug(f"Parsed date: {date}")

-        match = match.find_next_sibling(class_="frame--type-textpic")
-        text = match.text
-        _log.debug(f"Infection counts text: {text}")
-
        regexes = [
            r"Insgesamt: (?P<total_cases>[0-9.]+)",
            r"genesen: (?P<num_recovered>[0-9.]+)",