From 17fd1735973de89068c86642e2441d220a495b8f Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Thu, 31 Oct 2024 10:13:32 -0400
Subject: [PATCH 1/4] fix

---
 ca_on_guelph/people.py | 60 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 55 insertions(+), 5 deletions(-)

diff --git a/ca_on_guelph/people.py b/ca_on_guelph/people.py
index ef9488cd..3d7ac254 100644
--- a/ca_on_guelph/people.py
+++ b/ca_on_guelph/people.py
@@ -1,7 +1,57 @@
-from utils import CSVScraper
+from utils import CanadianPerson as Person
+from utils import CanadianScraper
 
+COUNCIL_PAGE = "https://guelph.ca/city-hall/mayor-and-council/city-council/"
+MAYOR_PAGE = "https://guelph.ca/city-hall/mayor-and-council/mayors-office/"
 
-class GuelphPersonScraper(CSVScraper):
-    # http://data.open.guelph.ca/dataset/city-of-guelph-contacts
-    csv_url = "http://data.open.guelph.ca/datafiles/guelph-mayor-and-councillors-contact-information-2018-2022.csv"
-    many_posts_per_area = True
+
+class GuelphPersonScraper(CanadianScraper):
+    def scrape(self):
+        page = self.lxmlize(COUNCIL_PAGE)
+
+        councillor_nodes = page.xpath('.//div[@class="thumbnail"]')[1:]
+        assert len(councillor_nodes), "No councillors found"
+
+        for councillor_node in councillor_nodes:
+            ward_district = councillor_node.xpath(".//h2/text()")[0].split(" Councillors")[0]
+            district = ward_district.split(" ")[-1]
+
+            councillors = councillor_node.xpath(".//div/div")
+            for councillor in councillors:
+                role_and_name = councillor.xpath(".//h3/text()")
+                if not role_and_name:
+                    continue
+
+                role_and_name = councillor.xpath(".//h3/text()")[0]
+                name, role = role_and_name.split(" ", 1)
+                contact_info = councillor.xpath(".//p/text()")
+                phone = contact_info[1].strip()
+                email = self.get_email(councillor)
+                if councillor.xpath(".//p/img/@src"):
+                    image = councillor.xpath(".//p/img/@src")[0]
+                else:
+                    image = councillor.xpath(".//div/img/@src")[0]
+
+                p = Person(primary_org="legislature", name=name, district=district, role=role, image=image)
+                p.add_contact("email", email)
+                if phone:
+                    p.add_contact("voice", phone, "legislature")
+                p.add_source(COUNCIL_PAGE)
+
+        yield self.scrape_mayor(MAYOR_PAGE)
+
+    def scrape_mayor(self, url):
+        page = self.lxmlize(url)
+
+        mayor_node = page.xpath('.//div[@class="entry-content"]/p')[-1]
+        name = mayor_node.xpath(".//text()")[0].strip().split("Mayor ")[1]
+        phone = self.get_phone(mayor_node)
+        email = self.get_email(mayor_node)
+        image = mayor_node.xpath('//img[contains(@alt, "Mayor")]/@src')[0]
+
+        p = Person(primary_org="legislature", name=name, district="Guelph", role="Mayor", image=image)
+        p.add_contact("voice", phone, "legislature")
+        p.add_contact("email", email)
+        p.add_source(MAYOR_PAGE)
+
+        return p

From 5d81fb1c72c907bb6974dac57bceec01cc94a505 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Thu, 31 Oct 2024 10:28:42 -0400
Subject: [PATCH 2/4] simplify find image, fix name role

---
 ca_on_guelph/people.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/ca_on_guelph/people.py b/ca_on_guelph/people.py
index 3d7ac254..4dbd275b 100644
--- a/ca_on_guelph/people.py
+++ b/ca_on_guelph/people.py
@@ -23,14 +23,10 @@ def scrape(self):
                     continue
 
                 role_and_name = councillor.xpath(".//h3/text()")[0]
-                name, role = role_and_name.split(" ", 1)
-                contact_info = councillor.xpath(".//p/text()")
-                phone = contact_info[1].strip()
+                role, name = role_and_name.split(" ", 1)
+                phone = councillor.xpath(".//p/text()")[1].strip()
                 email = self.get_email(councillor)
-                if councillor.xpath(".//p/img/@src"):
-                    image = councillor.xpath(".//p/img/@src")[0]
-                else:
-                    image = councillor.xpath(".//div/img/@src")[0]
+                image = councillor.xpath(".//img/@src")[0]
 
                 p = Person(primary_org="legislature", name=name, district=district, role=role, image=image)
                 p.add_contact("email", email)

From b85cd512e4d18e416b04978fecff50ac492444d9 Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Thu, 31 Oct 2024 10:37:29 -0400
Subject: [PATCH 3/4] include ward in district

---
 ca_on_guelph/people.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ca_on_guelph/people.py b/ca_on_guelph/people.py
index 4dbd275b..a1debbbc 100644
--- a/ca_on_guelph/people.py
+++ b/ca_on_guelph/people.py
@@ -13,8 +13,7 @@ def scrape(self):
         assert len(councillor_nodes), "No councillors found"
 
         for councillor_node in councillor_nodes:
-            ward_district = councillor_node.xpath(".//h2/text()")[0].split(" Councillors")[0]
-            district = ward_district.split(" ")[-1]
+            district = councillor_node.xpath(".//h2/text()")[0].split("Councillors")[0].strip()
 
             councillors = councillor_node.xpath(".//div/div")
             for councillor in councillors:

From 0b80560bd4ec7bd5852ff0305c570c09c71f626d Mon Sep 17 00:00:00 2001
From: Brighten Zhang <brighten.zhang@jmaconsulting.biz>
Date: Mon, 4 Nov 2024 13:33:57 -0500
Subject: [PATCH 4/4] scrape correct csv file remove scraping webpage

---
 ca_on_guelph/people.py | 55 ++++--------------------------------------
 1 file changed, 5 insertions(+), 50 deletions(-)

diff --git a/ca_on_guelph/people.py b/ca_on_guelph/people.py
index a1debbbc..24eddf6e 100644
--- a/ca_on_guelph/people.py
+++ b/ca_on_guelph/people.py
@@ -1,52 +1,7 @@
-from utils import CanadianPerson as Person
-from utils import CanadianScraper
+from utils import CSVScraper
 
-COUNCIL_PAGE = "https://guelph.ca/city-hall/mayor-and-council/city-council/"
-MAYOR_PAGE = "https://guelph.ca/city-hall/mayor-and-council/mayors-office/"
 
-
-class GuelphPersonScraper(CanadianScraper):
-    def scrape(self):
-        page = self.lxmlize(COUNCIL_PAGE)
-
-        councillor_nodes = page.xpath('.//div[@class="thumbnail"]')[1:]
-        assert len(councillor_nodes), "No councillors found"
-
-        for councillor_node in councillor_nodes:
-            district = councillor_node.xpath(".//h2/text()")[0].split("Councillors")[0].strip()
-
-            councillors = councillor_node.xpath(".//div/div")
-            for councillor in councillors:
-                role_and_name = councillor.xpath(".//h3/text()")
-                if not role_and_name:
-                    continue
-
-                role_and_name = councillor.xpath(".//h3/text()")[0]
-                role, name = role_and_name.split(" ", 1)
-                phone = councillor.xpath(".//p/text()")[1].strip()
-                email = self.get_email(councillor)
-                image = councillor.xpath(".//img/@src")[0]
-
-                p = Person(primary_org="legislature", name=name, district=district, role=role, image=image)
-                p.add_contact("email", email)
-                if phone:
-                    p.add_contact("voice", phone, "legislature")
-                p.add_source(COUNCIL_PAGE)
-
-        yield self.scrape_mayor(MAYOR_PAGE)
-
-    def scrape_mayor(self, url):
-        page = self.lxmlize(url)
-
-        mayor_node = page.xpath('.//div[@class="entry-content"]/p')[-1]
-        name = mayor_node.xpath(".//text()")[0].strip().split("Mayor ")[1]
-        phone = self.get_phone(mayor_node)
-        email = self.get_email(mayor_node)
-        image = mayor_node.xpath('//img[contains(@alt, "Mayor")]/@src')[0]
-
-        p = Person(primary_org="legislature", name=name, district="Guelph", role="Mayor", image=image)
-        p.add_contact("voice", phone, "legislature")
-        p.add_contact("email", email)
-        p.add_source(MAYOR_PAGE)
-
-        return p
+class GuelphPersonScraper(CSVScraper):
+    # https://explore.guelph.ca/documents/5ec8d85028c94e83be12a9f01d14eb7f/about
+    csv_url = "https://gismaps.guelph.ca/OpenData/guelph-city-council.csv"
+    many_posts_per_area = True