|  |  | @@ -59,10 +59,12 @@ class VaccinationData: | 
			
		
	
		
		
			
				
					
					|  |  |  |     total_vaccinations: int = 0 |  |  |  |     total_vaccinations: int = 0 | 
			
		
	
		
		
			
				
					
					|  |  |  |     num_vaccinated_once: int = 0 |  |  |  |     num_vaccinated_once: int = 0 | 
			
		
	
		
		
			
				
					
					|  |  |  |     num_vaccinated_full: int = 0 |  |  |  |     num_vaccinated_full: int = 0 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     num_vaccinated_booster: int = 0 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     ratio_vaccinated_once: float = 0.0 |  |  |  |     ratio_vaccinated_once: float = 0.0 | 
			
		
	
		
		
			
				
					
					|  |  |  |     ratio_vaccinated_full: float = 0.0 |  |  |  |     ratio_vaccinated_full: float = 0.0 | 
			
		
	
		
		
			
				
					
					|  |  |  |     ratio_vaccinated_total: float = 0.0 |  |  |  |     ratio_vaccinated_total: float = 0.0 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     ratio_vaccinated_booster: float = 0.0 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | class CovidCrawlerBase(ABC): |  |  |  | class CovidCrawlerBase(ABC): | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -107,11 +109,11 @@ class CovidCrawler(CovidCrawlerBase): | 
			
		
	
		
		
			
				
					
					|  |  |  |         ) |  |  |  |         ) | 
			
		
	
		
		
			
				
					
					|  |  |  |         soup = await self._fetch(url) |  |  |  |         soup = await self._fetch(url) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         match = soup.find(class_="frame--type-textpic") |  |  |  |         match = soup.find(id="c1067628") | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         text = match.p.text |  |  |  |         text = match.text.strip() | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         _log.debug(f"Infection data text: {text}") |  |  |  |         _log.debug(f"Infection data text: {text}") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         matches = re.search(r"(\d+,\d+)\sNeuinfektion", text) |  |  |  |         matches = re.search(r"(\d+(,\d+)?)\sNeuinfektion", text) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         if not matches: |  |  |  |         if not matches: | 
			
		
	
		
		
			
				
					
					|  |  |  |             raise ValueError( |  |  |  |             raise ValueError( | 
			
		
	
		
		
			
				
					
					|  |  |  |                 f"Could not extract incidence from scraped web page, {text=}" |  |  |  |                 f"Could not extract incidence from scraped web page, {text=}" | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -120,18 +122,15 @@ class CovidCrawler(CovidCrawlerBase): | 
			
		
	
		
		
			
				
					
					|  |  |  |         incidence = parse_num(matches.group(1), t=float) |  |  |  |         incidence = parse_num(matches.group(1), t=float) | 
			
		
	
		
		
			
				
					
					|  |  |  |         _log.debug(f"Parsed incidence: {incidence}") |  |  |  |         _log.debug(f"Parsed incidence: {incidence}") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         text = match.h2.text |  |  |  |         match = soup.find(id="c1052517") | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         matches = re.search(r"\((\d+)\. (\w+).*\)", text) |  |  |  |         text = match.text.strip() | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         matches = re.search(r"Stand: (\d+)\. (\w+) (\d{4})", text) | 
			
		
	
		
		
			
				
					
					|  |  |  |         if not matches: |  |  |  |         if not matches: | 
			
		
	
		
		
			
				
					
					|  |  |  |             raise ValueError(f"Could not extract date from scraped web page, {text=}") |  |  |  |             raise ValueError(f"Could not extract date from scraped web page, {text=}") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         date = parse_date(matches.group(1), matches.group(2)) |  |  |  |         date = parse_date(matches.group(1), matches.group(2), matches.group(3)) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         _log.debug(f"Parsed date: {date}") |  |  |  |         _log.debug(f"Parsed date: {date}") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         match = match.find_next_sibling(class_="frame--type-textpic") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         text = match.text |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         _log.debug(f"Infection counts text: {text}") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         regexes = [ |  |  |  |         regexes = [ | 
			
		
	
		
		
			
				
					
					|  |  |  |             r"Insgesamt: (?P<total_cases>[0-9.]+)", |  |  |  |             r"Insgesamt: (?P<total_cases>[0-9.]+)", | 
			
		
	
		
		
			
				
					
					|  |  |  |             r"genesen: (?P<num_recovered>[0-9.]+)", |  |  |  |             r"genesen: (?P<num_recovered>[0-9.]+)", | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -168,9 +167,9 @@ class CovidCrawler(CovidCrawlerBase): | 
			
		
	
		
		
			
				
					
					|  |  |  |         result = soup.find(id=container_id) |  |  |  |         result = soup.find(id=container_id) | 
			
		
	
		
		
			
				
					
					|  |  |  |         text = re.sub(r"\s+", " ", result.text) |  |  |  |         text = re.sub(r"\s+", " ", result.text) | 
			
		
	
		
		
			
				
					
					|  |  |  |         regexes = [ |  |  |  |         regexes = [ | 
			
		
	
		
		
			
				
					
					|  |  |  |             r"(?P<total_vaccinations>\d+[.]\d+) Impfdosen", |  |  |  |             r"(?P<total_vaccinations>\d+([.]\d+)?) Personen in Augsburg mindestens", | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             r"Weitere (?P<num_vaccinated_once>\d+[.]\d+) Personen haben die Erstimpfung erhalten", |  |  |  |             r"(?P<num_vaccinated_full>\d+([.]\d+)?) Personen sind mindestens zweimal geimpft", | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             r"(?P<num_vaccinated_full>\d+[.]\d+) Personen sind bereits vollständig geimpft", |  |  |  |             r"(?P<num_vaccinated_booster>\d+([.]\d+)?) Personen haben eine Auffrischungsimpfung", | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         ] |  |  |  |         ] | 
			
		
	
		
		
			
				
					
					|  |  |  |         values = {} |  |  |  |         values = {} | 
			
		
	
		
		
			
				
					
					|  |  |  |         for r in regexes: |  |  |  |         for r in regexes: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -188,18 +187,24 @@ class CovidCrawler(CovidCrawlerBase): | 
			
		
	
		
		
			
				
					
					|  |  |  |         if not matches: |  |  |  |         if not matches: | 
			
		
	
		
		
			
				
					
					|  |  |  |             raise ValueError(f"Could not extract date from scraped web page, {text=}") |  |  |  |             raise ValueError(f"Could not extract date from scraped web page, {text=}") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         values["num_vaccinated_once"] = ( | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             values["total_vaccinations"] - values["num_vaccinated_full"] | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         ) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d") |  |  |  |         values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d") | 
			
		
	
		
		
			
				
					
					|  |  |  |         result = VaccinationData(**values) |  |  |  |         result = VaccinationData(**values) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         # Total population in Augsburg as of 2020 |  |  |  |         # Total population in Augsburg as listed on the crawled page | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         # https://www.augsburg.de/fileadmin/user_upload/buergerservice_rathaus/rathaus/statisiken_und_geodaten/statistiken/Monitoring/Demografiemonitoring_der_Stadt_Augsburg_2021.pdf |  |  |  |         population = 298014 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         population = 299021 |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         result.ratio_vaccinated_full = result.num_vaccinated_full / population * 100 |  |  |  |         result.ratio_vaccinated_full = result.num_vaccinated_full / population * 100 | 
			
		
	
		
		
			
				
					
					|  |  |  |         result.ratio_vaccinated_once = result.num_vaccinated_once / population * 100 |  |  |  |         result.ratio_vaccinated_once = result.num_vaccinated_once / population * 100 | 
			
		
	
		
		
			
				
					
					|  |  |  |         result.ratio_vaccinated_total = ( |  |  |  |         result.ratio_vaccinated_total = ( | 
			
		
	
		
		
			
				
					
					|  |  |  |             result.ratio_vaccinated_once + result.ratio_vaccinated_full |  |  |  |             result.ratio_vaccinated_once + result.ratio_vaccinated_full | 
			
		
	
		
		
			
				
					
					|  |  |  |         ) |  |  |  |         ) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         result.ratio_vaccinated_booster = ( | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             result.num_vaccinated_booster / population * 100 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         ) | 
			
		
	
		
		
			
				
					
					|  |  |  |         _log.debug(f"Result data: {result}") |  |  |  |         _log.debug(f"Result data: {result}") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         return result |  |  |  |         return result | 
			
		
	
	
		
		
			
				
					
					| 
						
						
						
						 |  |   |