opencivicdata · jpmckinney · Nov 4, 2024 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024
diff --git a/ca_on_guelph/people.py b/ca_on_guelph/people.py
@@ -1,7 +1,52 @@
-from utils import CSVScraper
+from utils import CanadianPerson as Person
+from utils import CanadianScraper
 
+COUNCIL_PAGE = "https://guelph.ca/city-hall/mayor-and-council/city-council/"
+MAYOR_PAGE = "https://guelph.ca/city-hall/mayor-and-council/mayors-office/"
 
-class GuelphPersonScraper(CSVScraper):
-    # http://data.open.guelph.ca/dataset/city-of-guelph-contacts
-    csv_url = "http://data.open.guelph.ca/datafiles/guelph-mayor-and-councillors-contact-information-2018-2022.csv"
-    many_posts_per_area = True
+
+class GuelphPersonScraper(CanadianScraper):
+    def scrape(self):
+        page = self.lxmlize(COUNCIL_PAGE)
+
+        councillor_nodes = page.xpath('.//div[@class="thumbnail"]')[1:]
+        assert len(councillor_nodes), "No councillors found"
+
+        for councillor_node in councillor_nodes:
+            district = councillor_node.xpath(".//h2/text()")[0].split("Councillors")[0].strip()
+
+            councillors = councillor_node.xpath(".//div/div")
+            for councillor in councillors:
+                role_and_name = councillor.xpath(".//h3/text()")
+                if not role_and_name:
+                    continue
+
+                role_and_name = councillor.xpath(".//h3/text()")[0]
+                role, name = role_and_name.split(" ", 1)
+                phone = councillor.xpath(".//p/text()")[1].strip()
+                email = self.get_email(councillor)
+                image = councillor.xpath(".//img/@src")[0]
+
+                p = Person(primary_org="legislature", name=name, district=district, role=role, image=image)
+                p.add_contact("email", email)
+                if phone:
+                    p.add_contact("voice", phone, "legislature")
+                p.add_source(COUNCIL_PAGE)
+
+        yield self.scrape_mayor(MAYOR_PAGE)
+
+    def scrape_mayor(self, url):
+        page = self.lxmlize(url)
+
+        mayor_node = page.xpath('.//div[@class="entry-content"]/p')[-1]
+        name = mayor_node.xpath(".//text()")[0].strip().split("Mayor ")[1]
+        phone = self.get_phone(mayor_node)
+        email = self.get_email(mayor_node)
+        image = mayor_node.xpath('//img[contains(@alt, "Mayor")]/@src')[0]
+
+        p = Person(primary_org="legislature", name=name, district="Guelph", role="Mayor", image=image)
+        p.add_contact("voice", phone, "legislature")
+        p.add_contact("email", email)
+        p.add_source(MAYOR_PAGE)
+
+        return p