diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2aea3241..5f6d4c99 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ ci: autoupdate_schedule: quarterly skip: [pip-compile] default_language_version: - python: python3.10 + python: python3.11 repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.6.9 diff --git a/ca_qc_sainte_anne_de_bellevue/people.py b/ca_qc_sainte_anne_de_bellevue/people.py index 30d4662b..5b6ba033 100644 --- a/ca_qc_sainte_anne_de_bellevue/people.py +++ b/ca_qc_sainte_anne_de_bellevue/people.py @@ -1,5 +1,3 @@ -import re - from utils import CanadianPerson as Person from utils import CanadianScraper @@ -8,24 +6,51 @@ class SainteAnneDeBellevuePersonScraper(CanadianScraper): def scrape(self): + def decode_email(e): + de = "" + k = int(e[:2], 16) + + for i in range(2, len(e) - 1, 2): + de += chr(int(e[i : i + 2], 16) ^ k) + + return de + page = self.lxmlize(COUNCIL_PAGE) - councillors = page.xpath('//div[@class="block text"]') + councillors = page.xpath('//div[@class="col-md-12"]')[0] assert len(councillors), "No councillors found" - for councillor in councillors: - name = councillor.xpath('.//div[@class="content-writable"]//strong/text()')[0] - district = councillor.xpath(".//h2/text()")[0] - if "Maire" in district: - district = "Sainte-Anne-de-Bellevue" - role = "Maire" - else: - district = "District {}".format(re.search(r"\d+", district)[0]) - role = "Conseiller" + roles_and_districts = councillors.xpath(".//h2/text()") + roles = [] + districts = [] + names = [] + emails = [] - p = Person(primary_org="legislature", name=name, district=district, role=role) - p.add_source(COUNCIL_PAGE) + # Fill in roles and districts via h2 tags + for role in roles_and_districts: + role_and_district = role.split() + + roles.append(role_and_district[0]) - p.image = councillor.xpath(".//@src")[0] - p.add_contact("email", self.get_email(councillor)) + if len(role_and_district) == 1: + districts.append("Sainte-Anne-de-Bellevue") + else: + districts.append("District " + role_and_district[2]) + + # Fill in contact info via p tags. + contact_info = councillors.xpath('.//p[a[contains(@href, "@")]]') + for contact in contact_info: + contact = contact.text_content().split() + name = " ".join(contact[:2]) + names.append(name) + + email = contact[3] + email = email.replace("Président", "") + emails.append(email) + + assert len(roles) == len(districts) == len(names) == len(emails), "Lists are not of equal length" + for i in range(len(roles)): + p = Person(primary_org="legislature", name=names[i], district=districts[i], role=roles[i]) + p.add_source(COUNCIL_PAGE) + p.add_contact("email", emails[i]) yield p diff --git a/requirements.txt b/requirements.txt index 080af981..9ec2ad3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -69,8 +69,6 @@ sqlparse==0.5.1 # via django text-unidecode==1.3 # via python-slugify -typing-extensions==4.12.2 - # via asgiref unidecode==0.4.14 # via -r requirements.in urllib3==1.26.20