13 Commits

Author SHA1 Message Date
Adrian Rumpold
2f73be9010 chore: Refactor duplicated HTTP fetching code 2021-08-11 08:12:48 +02:00
Adrian Rumpold
b6184be32f fix: Enable commented out code 2021-08-10 20:11:23 +02:00
Adrian Rumpold
70fa0619d4 fix: Simply HTTP error handling 2021-08-10 20:11:14 +02:00
Adrian Rumpold
f83bb077c1 feat: Crawling and parsing of vaccination data
See #2
2021-08-10 20:06:41 +02:00
Adrian Rumpold
8a97e92458 Merge pull request #1 from AdrianoKF/add-github-actions
feat(CI): Add Github actions
2021-07-28 08:30:23 +02:00
Adrian Rumpold
68c879583a fix(CI): Exclude venv from linting 2021-07-28 08:26:36 +02:00
Adrian Rumpold
5eeb0c5eae fix(CI): Create Poetry venv in project 2021-07-28 08:19:26 +02:00
Adrian Rumpold
3fa66f0289 fix(CI): Move cache action before poetry install 2021-07-28 08:15:32 +02:00
Adrian Rumpold
79ec497614 feat(tests): Add bogus test suite to prevent pytest errors 2021-07-28 08:14:04 +02:00
Adrian Rumpold
767392a3bb fix(CI): Fix cache file pattern 2021-07-28 08:09:33 +02:00
Adrian Rumpold
cb1b4ecc08 fix(CI): Fix syntax error in GH action 2021-07-28 08:08:03 +02:00
Adrian Rumpold
7ef44355ab fix(CI): Fix tool invocation with Poetry, add cache action 2021-07-28 08:06:31 +02:00
Adrian Rumpold
12762f5027 feat(CI): Add Github actions 2021-07-28 07:58:22 +02:00
5 changed files with 174 additions and 42 deletions

52
.github/workflows/python-app.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Python application
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Cache
uses: actions/cache@v2.1.6
with:
# A list of files, directories, and wildcard patterns to cache and restore
path: .venv
# An explicit key for restoring and saving the cache
key: venv-cache-${{hashFiles('**/poetry.lock')}}
restore-keys: |
venv-cache-${{hashFiles('**/poetry.lock')}}
venv-cache-
- name: Python Poetry Action
# You may pin to the exact commit or the version.
# uses: abatilo/actions-poetry@8284d202bc272a8d0597e26e1c0b4a0d0c73db93
uses: abatilo/actions-poetry@v2.1.0
with:
# The version of poetry to install
poetry-version: 1.1.7
- name: Install dependencies
run: |
poetry config virtualenvs.in-project true
poetry install
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
poetry run flake8 . --count --exclude .git,.venv --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
poetry run flake8 . --count --exclude .git,.venv --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
poetry run pytest

View File

@@ -69,7 +69,7 @@ async def get_coordinator(hass: HomeAssistant):
async def async_get_data() -> IncidenceData: async def async_get_data() -> IncidenceData:
crawler = CovidCrawler(hass) crawler = CovidCrawler(hass)
return await crawler.crawl() return await crawler.crawl_incidence()
hass.data[DOMAIN] = DataUpdateCoordinator( hass.data[DOMAIN] = DataUpdateCoordinator(
hass, hass,

View File

@@ -15,6 +15,32 @@ def parse_num(s, t=int):
return 0 return 0
def parse_date(
day: int, month: str, year=datetime.datetime.now().year
) -> datetime.date:
"""Parse a German medium-form date, e.g. 17. August into a datetime.date"""
months = [
"Januar",
"Februar",
"März",
"April",
"Mai",
"Juni",
"Juli",
"August",
"September",
"Oktober",
"November",
"Dezember",
]
date = datetime.date(
year=int(year),
month=1 + months.index(month),
day=parse_num(day),
)
return date
@dataclass @dataclass
class IncidenceData: class IncidenceData:
location: str location: str
@@ -26,40 +52,59 @@ class IncidenceData:
num_dead: int = 0 num_dead: int = 0
@dataclass
class VaccinationData:
date: str
total_vaccinations: int = 0
num_vaccinated_once: int = 0
num_vaccinated_full: int = 0
ratio_vaccinated_once: float = 0.0
ratio_vaccinated_full: float = 0.0
class CovidCrawlerBase(ABC): class CovidCrawlerBase(ABC):
@abstractmethod @abstractmethod
def crawl(self) -> IncidenceData: def crawl_incidence(self) -> IncidenceData:
pass
@abstractmethod
def crawl_vaccination(self) -> VaccinationData:
pass pass
class CovidCrawler(CovidCrawlerBase): class CovidCrawler(CovidCrawlerBase):
def __init__(self, hass=None) -> None: def __init__(self, hass=None) -> None:
self.url = (
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
)
self.hass = hass self.hass = hass
async def crawl(self) -> IncidenceData: async def _fetch(self, url: str) -> str:
"""Fetch a URL, using either the current Home Assistant instance or requests"""
if self.hass:
from homeassistant.helpers import aiohttp_client
result = await aiohttp_client.async_get_clientsession(self.hass).get(url)
soup = BeautifulSoup(await result.text(), "html.parser")
else:
import requests
result = requests.get(url)
result.raise_for_status()
soup = BeautifulSoup(result.text, "html.parser")
return soup
async def crawl_incidence(self) -> IncidenceData:
""" """
Fetch COVID-19 infection data from the target website. Fetch COVID-19 infection data from the target website.
""" """
_log.info("Fetching COVID-19 data update") _log.info("Fetching COVID-19 data update")
if self.hass: url = (
from homeassistant.helpers import aiohttp_client "https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/fallzahlen"
)
result = await aiohttp_client.async_get_clientsession(self.hass).get( soup = await self._fetch(url)
self.url
)
soup = BeautifulSoup(await result.text(), "html.parser")
else:
import requests
result = requests.get(self.url)
if not result.ok:
result.raise_for_status()
soup = BeautifulSoup(result.text, "html.parser")
match = soup.find(class_="frame--type-textpic") match = soup.find(class_="frame--type-textpic")
text = match.p.text text = match.p.text
@@ -79,27 +124,7 @@ class CovidCrawler(CovidCrawlerBase):
if not matches: if not matches:
raise ValueError(f"Could not extract date from scraped web page, {text=}") raise ValueError(f"Could not extract date from scraped web page, {text=}")
months = [ date = parse_date(matches.group(1), matches.group(2))
"Januar",
"Februar",
"März",
"April",
"Mai",
"Juni",
"Juli",
"August",
"September",
"Oktober",
"November",
"Dezember",
]
day = parse_num(matches.group(1))
month_name = matches.group(2)
date = datetime.date(
year=datetime.datetime.now().year,
month=1 + months.index(month_name),
day=day,
)
_log.debug(f"Parsed date: {date}") _log.debug(f"Parsed date: {date}")
match = match.find_next_sibling(class_="frame--type-textpic") match = match.find_next_sibling(class_="frame--type-textpic")
@@ -130,3 +155,47 @@ class CovidCrawler(CovidCrawlerBase):
_log.debug(f"Result data: {result}") _log.debug(f"Result data: {result}")
return result return result
async def crawl_vaccination(self) -> VaccinationData:
_log.info("Fetching COVID-19 vaccination data update")
url = (
"https://www.augsburg.de/umwelt-soziales/gesundheit/coronavirus/impfzentrum"
)
soup = await self._fetch(url)
container_id = "c1088140"
result = soup.find(id=container_id)
text = re.sub(r"\s+", " ", result.text)
regexes = [
r"(?P<total_vaccinations>\d+[.]\d+) Impfdosen",
r"Weitere (?P<num_vaccinated_once>\d+[.]\d+) Personen haben die Erstimpfung erhalten",
r"(?P<num_vaccinated_full>\d+[.]\d+) Personen sind bereits vollständig geimpft",
]
values = {}
for r in regexes:
matches = re.search(r, text)
if not matches:
continue
values.update(
{
k: parse_num(v.replace(".", ""))
for k, v in matches.groupdict().items()
}
)
matches = re.search(r"Stand (?P<day>\d+)\. (?P<month>\w+) (?P<year>\d+)", text)
if not matches:
raise ValueError(f"Could not extract date from scraped web page, {text=}")
values["date"] = parse_date(**matches.groupdict()).strftime("%Y-%m-%d")
result = VaccinationData(**values)
# Total population in Augsburg as of 2020
# https://www.augsburg.de/fileadmin/user_upload/buergerservice_rathaus/rathaus/statisiken_und_geodaten/statistiken/Monitoring/Demografiemonitoring_der_Stadt_Augsburg_2021.pdf
population = 299021
result.ratio_vaccinated_full = result.num_vaccinated_full / population
result.ratio_vaccinated_once = result.num_vaccinated_once / population
_log.debug(f"Result data: {result}")
return result

View File

@@ -3,7 +3,10 @@ from .crawler import CovidCrawler
async def main(): async def main():
crawler = CovidCrawler() crawler = CovidCrawler()
result = await crawler.crawl() result = await crawler.crawl_incidence()
print(result)
result = await crawler.crawl_vaccination()
print(result) print(result)

8
tests/test_example.py Normal file
View File

@@ -0,0 +1,8 @@
"""Placeholder test suite to Pytest doesn't exit with error code
TODO: Remove once other tests have been added.
"""
def test_example():
assert True