From 3f6de35e382b23480d4bfb02521221a66922355c Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Wed, 30 Oct 2024 12:06:26 -0400
Subject: [PATCH 01/21] fix

---
 ca_on_markham/people.py | 83 +++++++++++++++++++++++++++--------------
 1 file changed, 55 insertions(+), 28 deletions(-)

diff --git a/ca_on_markham/people.py b/ca_on_markham/people.py
index 2b01dfd7..cc20b976 100644
--- a/ca_on_markham/people.py
+++ b/ca_on_markham/people.py
@@ -3,10 +3,8 @@
 from utils import CanadianPerson as Person
 from utils import CanadianScraper
 
-COUNCIL_PAGE = (
-    "https://www.markham.ca/wps/portal/home/about/city-hall/regional-ward-councillors/02-regional-ward-councillors"
-)
-MAYOR_PAGE = "https://www.markham.ca/wps/portal/home/about/city-hall/mayor/00-mayors-office"
+COUNCIL_PAGE = "https://www.markham.ca/about-city-markham/city-hall/regional-ward-councillors"
+MAYOR_PAGE = "https://www.markham.ca/about-city-markham/city-hall/mayors-office"
 
 
 class MarkhamPersonScraper(CanadianScraper):
@@ -17,25 +15,19 @@ def scrape(self):
 
         yield self.scrape_mayor(MAYOR_PAGE)
 
-        councillors = page.xpath('//div[@class="col-sm-3 col-xs-6"]')
+        regional_councillors = page.xpath('//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]')[0]
+        ward_councillors = page.xpath('//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]')[1]
+        councillors = [regional_councillors, ward_councillors]
         assert len(councillors), "No councillors found"
-        for councillor in councillors:
-            name, district = councillor.xpath(".//h4/text()")[0].split(", ")
-            if "Ward" in district:
-                district = district.replace("Councillor", "").strip()
-                role = "Councillor"
-            elif "Regional" in district:
-                role = "Regional Councillor"
-                district = f"Markham (seat {regional_councillor_seat_number})"
-                regional_councillor_seat_number += 1
-            else:
-                role = district
-                district = "Markham"
+        for i, councillor in enumerate(regional_councillors):
+            name = councillor.xpath(".//h3/text()")[0].strip()
+            district = councillor.xpath(".//p/text()")[0].strip()
+            role = "Regional Councillor"
+            district = f"Markham (seat {regional_councillor_seat_number})"
+            regional_councillor_seat_number += 1
 
             image = councillor.xpath(".//img/@src")[0]
-            url = "https://www.markham.ca/wps/portal/home/about" + re.search(
-                r"(?<=about).*(?='\))", councillor.xpath(".//a/@href")[0]
-            ).group(0)
+            url = councillor.xpath(".//a/@href")[0]
 
             address, phone, email, links = self.get_contact(url)
 
@@ -52,14 +44,48 @@ def scrape(self):
                 p.add_link(link)
 
             yield p
+        
+        for i, councillor in enumerate(ward_councillors):
+            name = councillor.xpath(".//h3/text()")[0].strip()
+            district = councillor.xpath(".//p/text()")[0].strip()
+            district = district.replace("Councillor", "").strip()
+            role = "Councillor"
 
+            image = councillor.xpath(".//img/@src")[0]
+            url = councillor.xpath(".//a/@href")[0]
+
+            address, phone, email, links = self.get_contact(url)
+
+            p = Person(primary_org="legislature", name=name, district=district, role=role)
+            p.add_source(COUNCIL_PAGE)
+            p.add_source(url)
+
+            p.image = image
+            p.add_contact("address", address, "legislature")
+            p.add_contact("voice", phone, "legislature")
+            p.add_contact("email", email)
+
+            for link in links:
+                p.add_link(link)
+
+            yield p
+    
     def get_contact(self, url):
         page = self.lxmlize(url)
 
-        contact_node = page.xpath('//div[@class="vcard col-sm-6"]')[0]
+        contact_node = page.xpath('//div[@class="pd-x-16 pd-y-32 bg-white committee-right-info-section layout__region layout__region--second"]')[0]
         links = []
 
-        address = contact_node.xpath(".//p/text()")[:2]
+        if contact_node.xpath('.//span[@class="address-line1"]/text()'):
+            address = (contact_node.xpath('.//span[@class="address-line1"]/text()')[0]
+                + " " + contact_node.xpath('.//span[@class="locality"]/text()')[0]
+                + " " + contact_node.xpath('.//span[@class="administrative-area"]/text()')[0]
+                + " " + contact_node.xpath('.//span[@class="postal-code"]/text()')[0]
+                + " " + contact_node.xpath('.//span[@class="country"]/text()')[0])
+        else:
+            contact_node = page.xpath('//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]')[0]
+            address = contact_node.xpath('.//p/text()')[0] + " " + contact_node.xpath('.//p/text()')[1]
+        
         links = get_links(contact_node)
         phone = self.get_phone(contact_node)
         email = self.get_email(contact_node)
@@ -68,12 +94,13 @@ def get_contact(self, url):
 
     def scrape_mayor(self, url):
         page = self.lxmlize(url)
-        name = page.xpath('//img/@alt[contains(., "Mayor")]')[0].split(", ", 1)[1]
-        email = self.get_email(page)
-        phone = self.get_phone(page)
-
+        name = page.xpath('.//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]/h1/span/span/text()')[0]
+        contact_node = page.xpath('.//div[@class="dept-contact-info--block"]')[0]
+        email = self.get_email(contact_node)
+        phone = self.get_phone(contact_node)
+        
         p = Person(primary_org="legislature", name=name, district="Markham", role="Mayor")
-        p.image = page.xpath('//img[contains(./@alt, "Mayor")]/@src')[0]
+        p.image = page.xpath('.//div[@class="align-right media--image"]/div/img/@src')[0]
         p.add_contact("email", email)
         p.add_contact("voice", phone, "legislature")
         p.add_source(url)
@@ -86,6 +113,6 @@ def get_links(elem):
     links = elem.xpath(".//a")
     for link in links:
         link = link.attrib["href"]
-        if "http://www.markham.ca" not in link and "mail" not in link:
+        if "http://www.markham.ca" not in link and "mail" not in link and "tel" not in link:
             links_r.append(link)
     return links_r

From 2c417f86f74e25a12a3d225d780cbc4c744dbe58 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Wed, 30 Oct 2024 15:05:49 -0400
Subject: [PATCH 02/21] cleanup

---
 ca_on_markham/people.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/ca_on_markham/people.py b/ca_on_markham/people.py
index cc20b976..5f8ae577 100644
--- a/ca_on_markham/people.py
+++ b/ca_on_markham/people.py
@@ -1,5 +1,3 @@
-import re
-
 from utils import CanadianPerson as Person
 from utils import CanadianScraper
 
@@ -19,7 +17,7 @@ def scrape(self):
         ward_councillors = page.xpath('//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]')[1]
         councillors = [regional_councillors, ward_councillors]
         assert len(councillors), "No councillors found"
-        for i, councillor in enumerate(regional_councillors):
+        for councillor in regional_councillors:
             name = councillor.xpath(".//h3/text()")[0].strip()
             district = councillor.xpath(".//p/text()")[0].strip()
             role = "Regional Councillor"
@@ -44,8 +42,8 @@ def scrape(self):
                 p.add_link(link)
 
             yield p
-        
-        for i, councillor in enumerate(ward_councillors):
+
+        for councillor in ward_councillors:
             name = councillor.xpath(".//h3/text()")[0].strip()
             district = councillor.xpath(".//p/text()")[0].strip()
             district = district.replace("Councillor", "").strip()
@@ -69,7 +67,7 @@ def scrape(self):
                 p.add_link(link)
 
             yield p
-    
+   
     def get_contact(self, url):
         page = self.lxmlize(url)
 
@@ -85,7 +83,7 @@ def get_contact(self, url):
         else:
             contact_node = page.xpath('//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]')[0]
             address = contact_node.xpath('.//p/text()')[0] + " " + contact_node.xpath('.//p/text()')[1]
-        
+
         links = get_links(contact_node)
         phone = self.get_phone(contact_node)
         email = self.get_email(contact_node)

From 5fe2b2175cf5aa46b0f50d580d80c0ac69dc51f7 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Wed, 30 Oct 2024 15:07:10 -0400
Subject: [PATCH 03/21] cleanup

---
 ca_on_markham/people.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ca_on_markham/people.py b/ca_on_markham/people.py
index 5f8ae577..2def610d 100644
--- a/ca_on_markham/people.py
+++ b/ca_on_markham/people.py
@@ -67,7 +67,7 @@ def scrape(self):
                 p.add_link(link)
 
             yield p
-   
+
     def get_contact(self, url):
         page = self.lxmlize(url)
 
@@ -96,7 +96,7 @@ def scrape_mayor(self, url):
         contact_node = page.xpath('.//div[@class="dept-contact-info--block"]')[0]
         email = self.get_email(contact_node)
         phone = self.get_phone(contact_node)
-        
+
         p = Person(primary_org="legislature", name=name, district="Markham", role="Mayor")
         p.image = page.xpath('.//div[@class="align-right media--image"]/div/img/@src')[0]
         p.add_contact("email", email)

From 8ea25dda32894d71b35a2d97447dfae8f136bb04 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Wed, 30 Oct 2024 14:48:01 -0400
Subject: [PATCH 04/21] fix

---
 ca_on_thunder_bay/people.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ca_on_thunder_bay/people.py b/ca_on_thunder_bay/people.py
index 0c09dcac..86d56487 100644
--- a/ca_on_thunder_bay/people.py
+++ b/ca_on_thunder_bay/people.py
@@ -45,4 +45,4 @@ def scrape(self):
 
     def lxmlize(self, url, encoding=None, *, user_agent=DEFAULT_USER_AGENT, cookies=None, xml=False):
         requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ":HIGH:!DH:!aNULL"  # site uses a weak DH key
-        return super().lxmlize(url, encoding, user_agent, cookies, xml)
+        return super().lxmlize(url, encoding, user_agent=user_agent, cookies=cookies, xml=xml)

From dc1fe25d24217e34c99ace0cf3b458bb7eb84bdd Mon Sep 17 00:00:00 2001
From: Samuel Pei <samuel.pei@jmaconsulting.biz>
Date: Tue, 29 Oct 2024 10:07:58 -0400
Subject: [PATCH 05/21] Fixed index list out of range

---
 ca_qc_dollard_des_ormeaux/people.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ca_qc_dollard_des_ormeaux/people.py b/ca_qc_dollard_des_ormeaux/people.py
index ef11a5f8..74045a82 100644
--- a/ca_qc_dollard_des_ormeaux/people.py
+++ b/ca_qc_dollard_des_ormeaux/people.py
@@ -29,8 +29,10 @@ def scrape(self):
 
             p = Person(primary_org="legislature", name=name, district=district, role=role)
             p.add_source(COUNCIL_PAGE)
-            p.image = councillor.xpath(".//@data-src")[0]
-
+            img_path = councillor.xpath(".//@data-src")
+            if img_path:
+                p.image = img_path[0]
+                print(p.image)
             p.add_contact("email", email)
             p.add_contact("voice", general_phone, "legislature")
             p.add_contact("fax", general_fax, "legislature")

From 44c9d68c98bf3278a0183f85f1f8a276f4613420 Mon Sep 17 00:00:00 2001
From: Samuel Pei <samuel.pei@jmaconsulting.biz>
Date: Tue, 29 Oct 2024 16:39:08 -0400
Subject: [PATCH 06/21] Removed unnecessary print statement

---
 ca_qc_dollard_des_ormeaux/people.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ca_qc_dollard_des_ormeaux/people.py b/ca_qc_dollard_des_ormeaux/people.py
index 74045a82..f9b2ccda 100644
--- a/ca_qc_dollard_des_ormeaux/people.py
+++ b/ca_qc_dollard_des_ormeaux/people.py
@@ -32,7 +32,6 @@ def scrape(self):
             img_path = councillor.xpath(".//@data-src")
             if img_path:
                 p.image = img_path[0]
-                print(p.image)
             p.add_contact("email", email)
             p.add_contact("voice", general_phone, "legislature")
             p.add_contact("fax", general_fax, "legislature")

From 36e54c922ee10cf915a8ffecdfa8e9cf034053ea Mon Sep 17 00:00:00 2001
From: James McKinney <26463+jpmckinney@users.noreply.github.com>
Date: Wed, 30 Oct 2024 17:37:49 -0400
Subject: [PATCH 07/21] chore: Use consistent variable name

---
 ca_qc_dollard_des_ormeaux/people.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ca_qc_dollard_des_ormeaux/people.py b/ca_qc_dollard_des_ormeaux/people.py
index f9b2ccda..1753f0ff 100644
--- a/ca_qc_dollard_des_ormeaux/people.py
+++ b/ca_qc_dollard_des_ormeaux/people.py
@@ -29,9 +29,9 @@ def scrape(self):
 
             p = Person(primary_org="legislature", name=name, district=district, role=role)
             p.add_source(COUNCIL_PAGE)
-            img_path = councillor.xpath(".//@data-src")
-            if img_path:
-                p.image = img_path[0]
+            image = councillor.xpath(".//@data-src")
+            if image:
+                p.image = image[0]
             p.add_contact("email", email)
             p.add_contact("voice", general_phone, "legislature")
             p.add_contact("fax", general_fax, "legislature")

From ed22c5a7e01e5e60bf2ab5228f292bb0e7244019 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Thu, 31 Oct 2024 10:45:53 -0400
Subject: [PATCH 08/21] fix

---
 ca_on_markham/people.py | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/ca_on_markham/people.py b/ca_on_markham/people.py
index 2def610d..7f4384bd 100644
--- a/ca_on_markham/people.py
+++ b/ca_on_markham/people.py
@@ -13,8 +13,12 @@ def scrape(self):
 
         yield self.scrape_mayor(MAYOR_PAGE)
 
-        regional_councillors = page.xpath('//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]')[0]
-        ward_councillors = page.xpath('//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]')[1]
+        regional_councillors = page.xpath(
+            '//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]'
+        )[0]
+        ward_councillors = page.xpath(
+            '//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]'
+        )[1]
         councillors = [regional_councillors, ward_councillors]
         assert len(councillors), "No councillors found"
         for councillor in regional_councillors:
@@ -71,18 +75,28 @@ def scrape(self):
     def get_contact(self, url):
         page = self.lxmlize(url)
 
-        contact_node = page.xpath('//div[@class="pd-x-16 pd-y-32 bg-white committee-right-info-section layout__region layout__region--second"]')[0]
+        contact_node = page.xpath(
+            '//div[@class="pd-x-16 pd-y-32 bg-white committee-right-info-section layout__region layout__region--second"]'
+        )[0]
         links = []
 
         if contact_node.xpath('.//span[@class="address-line1"]/text()'):
-            address = (contact_node.xpath('.//span[@class="address-line1"]/text()')[0]
-                + " " + contact_node.xpath('.//span[@class="locality"]/text()')[0]
-                + " " + contact_node.xpath('.//span[@class="administrative-area"]/text()')[0]
-                + " " + contact_node.xpath('.//span[@class="postal-code"]/text()')[0]
-                + " " + contact_node.xpath('.//span[@class="country"]/text()')[0])
+            address = (
+                contact_node.xpath('.//span[@class="address-line1"]/text()')[0]
+                + " "
+                + contact_node.xpath('.//span[@class="locality"]/text()')[0]
+                + " "
+                + contact_node.xpath('.//span[@class="administrative-area"]/text()')[0]
+                + " "
+                + contact_node.xpath('.//span[@class="postal-code"]/text()')[0]
+                + " "
+                + contact_node.xpath('.//span[@class="country"]/text()')[0]
+            )
         else:
-            contact_node = page.xpath('//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]')[0]
-            address = contact_node.xpath('.//p/text()')[0] + " " + contact_node.xpath('.//p/text()')[1]
+            contact_node = page.xpath(
+                '//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]'
+            )[0]
+            address = contact_node.xpath(".//p/text()")[0] + " " + contact_node.xpath(".//p/text()")[1]
 
         links = get_links(contact_node)
         phone = self.get_phone(contact_node)
@@ -92,7 +106,9 @@ def get_contact(self, url):
 
     def scrape_mayor(self, url):
         page = self.lxmlize(url)
-        name = page.xpath('.//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]/h1/span/span/text()')[0]
+        name = page.xpath(
+            './/div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]/h1/span/span/text()'
+        )[0]
         contact_node = page.xpath('.//div[@class="dept-contact-info--block"]')[0]
         email = self.get_email(contact_node)
         phone = self.get_phone(contact_node)

From 5a348b393a05fd057ef5ae0d7912ff58f7190494 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Wed, 30 Oct 2024 15:37:21 -0400
Subject: [PATCH 09/21] cleanup

---
 ca_bc_victoria/people.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ca_bc_victoria/people.py b/ca_bc_victoria/people.py
index 9796b6ca..12a70505 100644
--- a/ca_bc_victoria/people.py
+++ b/ca_bc_victoria/people.py
@@ -36,7 +36,10 @@ def scrape(self):
             '//ul[@class="menu menu--level-0"]//a[contains(., "Mayor") and not(contains(., "Council"))]/@href'
         )[0]
         page = self.lxmlize(mayor_url)
-        role, name = page.xpath("//h1/span")[0].text_content().split(" ", 1)
+        role = "Mayor"
+        role, name = page.xpath(
+            '//ul[@class="menu menu--level-0"]//a[contains(., "Mayor") and not(contains(., "Council"))]/text()'
+        )[0].split(" ", 1)
         photo = councillor.xpath('//div[@class="field__item"]/img/@src')[0]
         email = self.get_email(page)
         phone = self.get_phone(page)

From c5517886c31c8ebef30ff60dc5032f32e61ddddd Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Thu, 31 Oct 2024 10:53:45 -0400
Subject: [PATCH 10/21] remove

---
 ca_bc_victoria/people.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ca_bc_victoria/people.py b/ca_bc_victoria/people.py
index 12a70505..8e35d5d9 100644
--- a/ca_bc_victoria/people.py
+++ b/ca_bc_victoria/people.py
@@ -36,7 +36,6 @@ def scrape(self):
             '//ul[@class="menu menu--level-0"]//a[contains(., "Mayor") and not(contains(., "Council"))]/@href'
         )[0]
         page = self.lxmlize(mayor_url)
-        role = "Mayor"
         role, name = page.xpath(
             '//ul[@class="menu menu--level-0"]//a[contains(., "Mayor") and not(contains(., "Council"))]/text()'
         )[0].split(" ", 1)

From aca7b8474f12897def384a2fe78d0415dff85918 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Tue, 29 Oct 2024 10:23:10 -0400
Subject: [PATCH 11/21] undo me

---
 ca_on_wilmot/people.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ca_on_wilmot/people.py b/ca_on_wilmot/people.py
index d9676cfe..68497e2c 100644
--- a/ca_on_wilmot/people.py
+++ b/ca_on_wilmot/people.py
@@ -1,14 +1,16 @@
 from utils import CanadianPerson as Person
 from utils import CanadianScraper
 
-COUNCIL_PAGE = "https://www.wilmot.ca/Modules/contact/search.aspx?s=EFHOVXSi8AOIMKMStZMNvAeQuAleQuAl"
+COUNCIL_PAGE = "https://www.wilmot.ca/en/township-office/council.aspx"
 
 
 class WilmotPersonScraper(CanadianScraper):
     def scrape(self):
         page = self.lxmlize(COUNCIL_PAGE)
 
-        councillors = page.xpath('//table[@class="contactList"]//tr')
+        # councillors = page.xpath('//div[@id="StandardOneColumnTK1_lm1723651463356"]')
+        councillors = page.xpath('//div[contains(@class, "icrtAccordion")]')
+        print("councillors", councillors)
         assert len(councillors), "No councillors found"
         for councillor in councillors:
             name, role_district = councillor.xpath(".//button/text()")[0].split(" - ", 1)

From 911613282882209c4e6854151e9f6c1cd72d14a0 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Wed, 30 Oct 2024 11:58:26 -0400
Subject: [PATCH 12/21] fix

---
 ca_on_wilmot/people.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/ca_on_wilmot/people.py b/ca_on_wilmot/people.py
index 68497e2c..2d5ea96d 100644
--- a/ca_on_wilmot/people.py
+++ b/ca_on_wilmot/people.py
@@ -8,22 +8,28 @@ class WilmotPersonScraper(CanadianScraper):
     def scrape(self):
         page = self.lxmlize(COUNCIL_PAGE)
 
-        # councillors = page.xpath('//div[@id="StandardOneColumnTK1_lm1723651463356"]')
-        councillors = page.xpath('//div[contains(@class, "icrtAccordion")]')
-        print("councillors", councillors)
+        councillors = page.xpath('.//table[@class="icrtAccordion"]//tr')
+        councillors = parse_counsillors(councillors)
         assert len(councillors), "No councillors found"
         for councillor in councillors:
-            name, role_district = councillor.xpath(".//button/text()")[0].split(" - ", 1)
-            if "Mayor" in role_district:
-                yield scrape_mayor(councillor, name)
-                continue
-            role, district = role_district.split(" - ")
-
+            roleAndName, contactInfo = councillor
+            try:
+                role, name = roleAndName.text_content().strip().split("—\xa0")
+            except:
+                role, name = roleAndName.text_content().strip().split("— ")
+
+            if "Councillor" in role:
+                district = role.split(" Councillor")[0]
+                role = "Councillor"
+            else:
+                district = "Wilmot"
+
+            phone = self.get_phone(contactInfo)
+            email = self.get_email(contactInfo)
             p = Person(primary_org="legislature", name=name, district=district, role=role)
             p.add_source(COUNCIL_PAGE)
-
-            phone = self.get_phone(councillor).replace("/", "")
             p.add_contact("voice", phone, "legislature")
+            p.add_contact("email", email)
             yield p
 
 
@@ -39,3 +45,6 @@ def scrape_mayor(div, name):
     p.add_contact("voice", other_phone, "office")
 
     return p
+
+def parse_counsillors(councillors):
+    return [councillors[i:i + 2] for i in range(0, len(councillors), 2)]
\ No newline at end of file

From b185fdc38c5648579e0b80ea9c1d542e5a4f9bbe Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Wed, 30 Oct 2024 15:19:23 -0400
Subject: [PATCH 13/21] cleanup

---
 ca_on_wilmot/people.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/ca_on_wilmot/people.py b/ca_on_wilmot/people.py
index 2d5ea96d..c9c16a7f 100644
--- a/ca_on_wilmot/people.py
+++ b/ca_on_wilmot/people.py
@@ -12,11 +12,11 @@ def scrape(self):
         councillors = parse_counsillors(councillors)
         assert len(councillors), "No councillors found"
         for councillor in councillors:
-            roleAndName, contactInfo = councillor
-            try:
-                role, name = roleAndName.text_content().strip().split("—\xa0")
-            except:
-                role, name = roleAndName.text_content().strip().split("— ")
+            role_name, contact_info = councillor
+            if "—\xa0" in role_name.text_content().strip():
+                role, name = role_name.text_content().strip().split("—\xa0")
+            else:
+                role, name = role_name.text_content().strip().split("— ")
 
             if "Councillor" in role:
                 district = role.split(" Councillor")[0]
@@ -24,8 +24,8 @@ def scrape(self):
             else:
                 district = "Wilmot"
 
-            phone = self.get_phone(contactInfo)
-            email = self.get_email(contactInfo)
+            phone = self.get_phone(contact_info)
+            email = self.get_email(contact_info)
             p = Person(primary_org="legislature", name=name, district=district, role=role)
             p.add_source(COUNCIL_PAGE)
             p.add_contact("voice", phone, "legislature")
@@ -47,4 +47,4 @@ def scrape_mayor(div, name):
     return p
 
 def parse_counsillors(councillors):
-    return [councillors[i:i + 2] for i in range(0, len(councillors), 2)]
\ No newline at end of file
+    return [councillors[i:i + 2] for i in range(0, len(councillors), 2)]

From 5d93ed684174f106a5b109f2094f07a640bed818 Mon Sep 17 00:00:00 2001
From: bzhangjma <brighten.zhang@jmaconsulting.biz>
Date: Thu, 31 Oct 2024 10:47:25 -0400
Subject: [PATCH 14/21] Update ca_on_wilmot/people.py

Simplify

Co-authored-by: James McKinney <26463+jpmckinney@users.noreply.github.com>
---
 ca_on_wilmot/people.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ca_on_wilmot/people.py b/ca_on_wilmot/people.py
index c9c16a7f..1b7828ca 100644
--- a/ca_on_wilmot/people.py
+++ b/ca_on_wilmot/people.py
@@ -13,10 +13,11 @@ def scrape(self):
         assert len(councillors), "No councillors found"
         for councillor in councillors:
             role_name, contact_info = councillor
-            if "—\xa0" in role_name.text_content().strip():
-                role, name = role_name.text_content().strip().split("—\xa0")
+            role_name = role_name.text_content().strip()
+            if "—\xa0" in role_name:
+                role, name = role_name.split("—\xa0")
             else:
-                role, name = role_name.text_content().strip().split("— ")
+                role, name = role_name.split("— ")
 
             if "Councillor" in role:
                 district = role.split(" Councillor")[0]

From b2b178cee1a5f97cfcbff8b925bb29126fd80554 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Thu, 31 Oct 2024 10:51:15 -0400
Subject: [PATCH 15/21] trigger pre commit ci

---
 ca_on_wilmot/people.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ca_on_wilmot/people.py b/ca_on_wilmot/people.py
index 1b7828ca..7045af15 100644
--- a/ca_on_wilmot/people.py
+++ b/ca_on_wilmot/people.py
@@ -47,5 +47,6 @@ def scrape_mayor(div, name):
 
     return p
 
+
 def parse_counsillors(councillors):
-    return [councillors[i:i + 2] for i in range(0, len(councillors), 2)]
+    return [councillors[i : i + 2] for i in range(0, len(councillors), 2)]

From 4f95e76de491aea0b0670e3ef289d6e24ae25de6 Mon Sep 17 00:00:00 2001
From: James McKinney <26463+jpmckinney@users.noreply.github.com>
Date: Fri, 1 Nov 2024 12:11:11 -0400
Subject: [PATCH 16/21] chore: Tidy Wilmot

---
 ca_on_wilmot/people.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/ca_on_wilmot/people.py b/ca_on_wilmot/people.py
index 7045af15..29797efe 100644
--- a/ca_on_wilmot/people.py
+++ b/ca_on_wilmot/people.py
@@ -9,15 +9,10 @@ def scrape(self):
         page = self.lxmlize(COUNCIL_PAGE)
 
         councillors = page.xpath('.//table[@class="icrtAccordion"]//tr')
-        councillors = parse_counsillors(councillors)
         assert len(councillors), "No councillors found"
-        for councillor in councillors:
-            role_name, contact_info = councillor
-            role_name = role_name.text_content().strip()
-            if "—\xa0" in role_name:
-                role, name = role_name.split("—\xa0")
-            else:
-                role, name = role_name.split("— ")
+        for i in range(0, len(councillors), 2):
+            role_name, contact_info = councillors[i], councillors[i + 1]
+            role, name = role_name.text_content().strip().replace("\xa0", " ").split("— ")
 
             if "Councillor" in role:
                 district = role.split(" Councillor")[0]
@@ -46,7 +41,3 @@ def scrape_mayor(div, name):
     p.add_contact("voice", other_phone, "office")
 
     return p
-
-
-def parse_counsillors(councillors):
-    return [councillors[i : i + 2] for i in range(0, len(councillors), 2)]

From b6dc6a89c497406ed8de599b31c462a5462f1e5d Mon Sep 17 00:00:00 2001
From: James McKinney <26463+jpmckinney@users.noreply.github.com>
Date: Fri, 1 Nov 2024 12:11:35 -0400
Subject: [PATCH 17/21] build: Add .python-version

---
 .python-version | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .python-version

diff --git a/.python-version b/.python-version
new file mode 100644
index 00000000..bd28b9c5
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.9

From 361c1e3c118914d1f05f1cc835a0762c3d0cc9a6 Mon Sep 17 00:00:00 2001
From: James McKinney <26463+jpmckinney@users.noreply.github.com>
Date: Fri, 1 Nov 2024 12:17:37 -0400
Subject: [PATCH 18/21] ca_on_wilmot: Remove unused scrapy_mayor function

---
 ca_on_wilmot/people.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/ca_on_wilmot/people.py b/ca_on_wilmot/people.py
index 29797efe..d4ac6f1c 100644
--- a/ca_on_wilmot/people.py
+++ b/ca_on_wilmot/people.py
@@ -8,15 +8,17 @@ class WilmotPersonScraper(CanadianScraper):
     def scrape(self):
         page = self.lxmlize(COUNCIL_PAGE)
 
-        councillors = page.xpath('.//table[@class="icrtAccordion"]//tr')
+        councillors = page.xpath('//table[@class="icrtAccordion"]//tr')
         assert len(councillors), "No councillors found"
         for i in range(0, len(councillors), 2):
             role_name, contact_info = councillors[i], councillors[i + 1]
             role, name = role_name.text_content().strip().replace("\xa0", " ").split("— ")
 
+            # "Ward 1 Councillor"
             if "Councillor" in role:
                 district = role.split(" Councillor")[0]
                 role = "Councillor"
+            # "Mayor", "Executive Officer to the Mayor and Council"
             else:
                 district = "Wilmot"
 
@@ -27,17 +29,3 @@ def scrape(self):
             p.add_contact("voice", phone, "legislature")
             p.add_contact("email", email)
             yield p
-
-
-def scrape_mayor(div, name):
-    p = Person(primary_org="legislature", name=name, district="Wilmot", role="Mayor")
-    p.add_source(COUNCIL_PAGE)
-
-    address = div.xpath('.//div[@class="contactListAddress"]')[0].text_content()
-    phone = div.xpath('.//div[@class="contactListMainNumber"]/a/text()')[0]
-    other_phone = div.xpath('.//div[@class="contactListPhNumber"]/a/text()')[0]
-    p.add_contact("address", address, "legislature")
-    p.add_contact("voice", phone, "legislature")
-    p.add_contact("voice", other_phone, "office")
-
-    return p

From 4853a958c7ebc5dd87b602e290334e9763fd6604 Mon Sep 17 00:00:00 2001
From: James McKinney <26463+jpmckinney@users.noreply.github.com>
Date: Fri, 1 Nov 2024 12:25:53 -0400
Subject: [PATCH 19/21] ca_on_markham: Reduce repetition

---
 ca_on_markham/people.py | 81 ++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 53 deletions(-)

diff --git a/ca_on_markham/people.py b/ca_on_markham/people.py
index 7f4384bd..8fe1e105 100644
--- a/ca_on_markham/people.py
+++ b/ca_on_markham/people.py
@@ -7,70 +7,45 @@
 
 class MarkhamPersonScraper(CanadianScraper):
     def scrape(self):
-        regional_councillor_seat_number = 1
-
-        page = self.lxmlize(COUNCIL_PAGE)
-
         yield self.scrape_mayor(MAYOR_PAGE)
 
-        regional_councillors = page.xpath(
+        groups = self.lxmlize(COUNCIL_PAGE).xpath(
             '//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]'
-        )[0]
-        ward_councillors = page.xpath(
-            '//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]'
-        )[1]
-        councillors = [regional_councillors, ward_councillors]
-        assert len(councillors), "No councillors found"
-        for councillor in regional_councillors:
-            name = councillor.xpath(".//h3/text()")[0].strip()
-            district = councillor.xpath(".//p/text()")[0].strip()
-            role = "Regional Councillor"
-            district = f"Markham (seat {regional_councillor_seat_number})"
-            regional_councillor_seat_number += 1
-
-            image = councillor.xpath(".//img/@src")[0]
-            url = councillor.xpath(".//a/@href")[0]
+        )
+        assert len(groups) == 2, "No councillors found"
 
-            address, phone, email, links = self.get_contact(url)
-
-            p = Person(primary_org="legislature", name=name, district=district, role=role)
-            p.add_source(COUNCIL_PAGE)
-            p.add_source(url)
-
-            p.image = image
-            p.add_contact("address", address, "legislature")
-            p.add_contact("voice", phone, "legislature")
-            p.add_contact("email", email)
-
-            for link in links:
-                p.add_link(link)
-
-            yield p
+        regional_councillor_seat_number = 1
+        for i, group in enumerate(groups):
+            for councillor in group:
+                name = councillor.xpath(".//h3/text()")[0].strip()
+                district = councillor.xpath(".//p/text()")[0].strip()
 
-        for councillor in ward_councillors:
-            name = councillor.xpath(".//h3/text()")[0].strip()
-            district = councillor.xpath(".//p/text()")[0].strip()
-            district = district.replace("Councillor", "").strip()
-            role = "Councillor"
+                if i == 0:
+                    role = "Regional Councillor"
+                    district = f"Markham (seat {regional_councillor_seat_number})"
+                    regional_councillor_seat_number += 1
+                else:
+                    role = "Councillor"
+                    district = district.replace("Councillor", "").strip()
 
-            image = councillor.xpath(".//img/@src")[0]
-            url = councillor.xpath(".//a/@href")[0]
+                image = councillor.xpath(".//img/@src")[0]
+                url = councillor.xpath(".//a/@href")[0]
 
-            address, phone, email, links = self.get_contact(url)
+                address, phone, email, links = self.get_contact(url)
 
-            p = Person(primary_org="legislature", name=name, district=district, role=role)
-            p.add_source(COUNCIL_PAGE)
-            p.add_source(url)
+                p = Person(primary_org="legislature", name=name, district=district, role=role)
+                p.add_source(COUNCIL_PAGE)
+                p.add_source(url)
 
-            p.image = image
-            p.add_contact("address", address, "legislature")
-            p.add_contact("voice", phone, "legislature")
-            p.add_contact("email", email)
+                p.image = image
+                p.add_contact("address", address, "legislature")
+                p.add_contact("voice", phone, "legislature")
+                p.add_contact("email", email)
 
-            for link in links:
-                p.add_link(link)
+                for link in links:
+                    p.add_link(link)
 
-            yield p
+                yield p
 
     def get_contact(self, url):
         page = self.lxmlize(url)

From 746369d037c5e4aef7ac07fb0a9eb5b53a714787 Mon Sep 17 00:00:00 2001
From: James McKinney <26463+jpmckinney@users.noreply.github.com>
Date: Fri, 1 Nov 2024 12:37:18 -0400
Subject: [PATCH 20/21] ca_on_markham: Align with original code

---
 ca_on_markham/people.py | 81 +++++++++++++++++++++--------------------
 1 file changed, 42 insertions(+), 39 deletions(-)

diff --git a/ca_on_markham/people.py b/ca_on_markham/people.py
index 8fe1e105..276f91f0 100644
--- a/ca_on_markham/people.py
+++ b/ca_on_markham/people.py
@@ -7,45 +7,50 @@
 
 class MarkhamPersonScraper(CanadianScraper):
     def scrape(self):
+        regional_councillor_seat_number = 1
+
+        page = self.lxmlize(COUNCIL_PAGE)
+
         yield self.scrape_mayor(MAYOR_PAGE)
 
-        groups = self.lxmlize(COUNCIL_PAGE).xpath(
-            '//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]'
+        councillors = page.xpath(
+            '//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]/div'
         )
-        assert len(groups) == 2, "No councillors found"
+        assert len(councillors), "No councillors found"
 
-        regional_councillor_seat_number = 1
-        for i, group in enumerate(groups):
-            for councillor in group:
-                name = councillor.xpath(".//h3/text()")[0].strip()
-                district = councillor.xpath(".//p/text()")[0].strip()
+        for councillor in councillors:
+            name = councillor.xpath(".//h3/text()")[0].strip()
+            district = councillor.xpath(".//p/text()")[0].strip()
 
-                if i == 0:
-                    role = "Regional Councillor"
-                    district = f"Markham (seat {regional_councillor_seat_number})"
-                    regional_councillor_seat_number += 1
-                else:
-                    role = "Councillor"
-                    district = district.replace("Councillor", "").strip()
+            if "Ward" in district:
+                district = district.replace("Councillor", "").strip()
+                role = "Councillor"
+            elif "Regional" in district:
+                role = "Regional Councillor"
+                district = f"Markham (seat {regional_councillor_seat_number})"
+                regional_councillor_seat_number += 1
+            else:
+                role = district
+                district = "Markham"
 
-                image = councillor.xpath(".//img/@src")[0]
-                url = councillor.xpath(".//a/@href")[0]
+            image = councillor.xpath(".//img/@src")[0]
+            url = councillor.xpath(".//a/@href")[0]
 
-                address, phone, email, links = self.get_contact(url)
+            address, phone, email, links = self.get_contact(url)
 
-                p = Person(primary_org="legislature", name=name, district=district, role=role)
-                p.add_source(COUNCIL_PAGE)
-                p.add_source(url)
+            p = Person(primary_org="legislature", name=name, district=district, role=role)
+            p.add_source(COUNCIL_PAGE)
+            p.add_source(url)
 
-                p.image = image
-                p.add_contact("address", address, "legislature")
-                p.add_contact("voice", phone, "legislature")
-                p.add_contact("email", email)
+            p.image = image
+            p.add_contact("address", address, "legislature")
+            p.add_contact("voice", phone, "legislature")
+            p.add_contact("email", email)
 
-                for link in links:
-                    p.add_link(link)
+            for link in links:
+                p.add_link(link)
 
-                yield p
+            yield p
 
     def get_contact(self, url):
         page = self.lxmlize(url)
@@ -56,22 +61,20 @@ def get_contact(self, url):
         links = []
 
         if contact_node.xpath('.//span[@class="address-line1"]/text()'):
-            address = (
-                contact_node.xpath('.//span[@class="address-line1"]/text()')[0]
-                + " "
-                + contact_node.xpath('.//span[@class="locality"]/text()')[0]
-                + " "
-                + contact_node.xpath('.//span[@class="administrative-area"]/text()')[0]
-                + " "
-                + contact_node.xpath('.//span[@class="postal-code"]/text()')[0]
-                + " "
-                + contact_node.xpath('.//span[@class="country"]/text()')[0]
+            address = " ".join(
+                (
+                    contact_node.xpath('.//span[@class="address-line1"]/text()')[0],
+                    contact_node.xpath('.//span[@class="locality"]/text()')[0],
+                    contact_node.xpath('.//span[@class="administrative-area"]/text()')[0],
+                    contact_node.xpath('.//span[@class="postal-code"]/text()')[0],
+                    contact_node.xpath('.//span[@class="country"]/text()')[0],
+                )
             )
         else:
             contact_node = page.xpath(
                 '//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]'
             )[0]
-            address = contact_node.xpath(".//p/text()")[0] + " " + contact_node.xpath(".//p/text()")[1]
+            address = f'{contact_node.xpath(".//p/text()")[0]} {contact_node.xpath(".//p/text()")[1]}'
 
         links = get_links(contact_node)
         phone = self.get_phone(contact_node)

From b088f711986f0b3131d8dba6bd7e1f33f6d54566 Mon Sep 17 00:00:00 2001
From: bzhangjma <brighten.zhang@jmaconsulting.biz>
Date: Mon, 4 Nov 2024 14:33:15 -0500
Subject: [PATCH 21/21] Fix Guelph scraper, update csv file url (#444)

* fix

* simplify find image, fix name role

* include ward in district

* scrape correct csv file remove scraping webpage
---
 ca_on_guelph/people.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ca_on_guelph/people.py b/ca_on_guelph/people.py
index ef9488cd..24eddf6e 100644
--- a/ca_on_guelph/people.py
+++ b/ca_on_guelph/people.py
@@ -2,6 +2,6 @@
 
 
 class GuelphPersonScraper(CSVScraper):
-    # http://data.open.guelph.ca/dataset/city-of-guelph-contacts
-    csv_url = "http://data.open.guelph.ca/datafiles/guelph-mayor-and-councillors-contact-information-2018-2022.csv"
+    # https://explore.guelph.ca/documents/5ec8d85028c94e83be12a9f01d14eb7f/about
+    csv_url = "https://gismaps.guelph.ca/OpenData/guelph-city-council.csv"
     many_posts_per_area = True