Skip to content

Commit

Permalink
Use Salesforce to read data (#3)
Browse files Browse the repository at this point in the history
Before, the member data had to be loaded via a CSV that we manually
exported. Now, we dynamically read in the data through the Salesforce
REST API and the helper library
https://github.com/simple-salesforce/simple-salesforce.

A follow-up will save the results to Salesforce.

For now, I turned off Mailchimp data. Our CSV is out-of-date and all
those entries were already processed and saved in Salesforce a while
ago.

Note that Salesforce stores unset values as `None`, rather than `""`, so
we change `SalesforceEntry` to better model unset values.
  • Loading branch information
Eric-Arellano authored Jun 23, 2024
1 parent 2d29b1d commit 9ae167b
Show file tree
Hide file tree
Showing 10 changed files with 937 additions and 775 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ __pycache__
.envrc

data/mailchimp.csv
data/result.csv
data/result*.csv
data/salesforce.csv
1 change: 1 addition & 0 deletions constraints.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sqlalchemy<2
1,582 changes: 863 additions & 719 deletions default.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pants.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ enabled = false
interpreter_constraints = [">=3.10"]
enable_resolves = true
resolves = { python-default = "default.lock" }
resolves_to_constraints_file = { python-default = "constraints.txt" }
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ uszipcode
python-Levenshtein
pydantic
pytest
simple-salesforce
6 changes: 2 additions & 4 deletions src/mailchimp_entry.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Optional

from pydantic import BaseModel, Field, field_validator


Expand All @@ -22,8 +20,8 @@ def normalize_longitude(cls, v: str) -> str:
def mock(
cls,
*,
latitude: Optional[str] = None,
longitude: Optional[str] = None,
latitude: str | None = None,
longitude: str | None = None,
) -> "MailchimpEntry":
return cls(
**{
Expand Down
11 changes: 5 additions & 6 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@
import metro_csvs
from mailchimp_entry import MailchimpEntry
from salesforce_entry import SalesforceEntry
from salesforce_api import load_salesforce_data


def main() -> None:
with Path("data/salesforce.csv").open() as f:
entries = [SalesforceEntry(**row) for row in csv.DictReader(f)]
with Path("data/mailchimp.csv").open() as f:
mailchimp_by_email = {
row["Email Address"]: MailchimpEntry(**row) for row in csv.DictReader(f)
}
entries = load_salesforce_data()

# TODO: read in Mailchimp data
mailchimp_by_email = {}

us_zip_to_metro = metro_csvs.read_us_zip_to_metro()
us_city_and_state_to_metro = metro_csvs.read_us_city_and_state_to_metro()
Expand Down
18 changes: 18 additions & 0 deletions src/salesforce_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os

from simple_salesforce import Salesforce

from salesforce_entry import SalesforceEntry


INSTANCE_URL = os.environ.pop("SALESFORCE_INSTANCE_URL")
TOKEN = os.environ.pop("SALESFORCE_TOKEN")
sf = Salesforce(instance_url=INSTANCE_URL, session_id=TOKEN)


def load_salesforce_data() -> list[SalesforceEntry]:
fields = ", ".join(info.alias for info in SalesforceEntry.model_fields.values())
return [
SalesforceEntry(**raw)
for raw in sf.query_all_iter(f"SELECT {fields} FROM Contact")
]
78 changes: 39 additions & 39 deletions src/salesforce_entry.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Optional

from geopy import Nominatim
from uszipcode import SearchEngine
from pydantic import BaseModel, Field
Expand All @@ -9,41 +7,41 @@


class SalesforceEntry(BaseModel):
email: str = Field(..., alias="Email", frozen=True)
uid: str = Field(..., alias="Id", frozen=True)
city: str = Field(..., alias="MailingCity")
country: str = Field(..., alias="MailingCountry")
latitude: str = Field(..., alias="MailingLatitude")
longitude: str = Field(..., alias="MailingLongitude")
zipcode: str = Field(..., alias="MailingPostalCode")
state: str = Field(..., alias="MailingState")
street: str = Field(..., alias="MailingStreet")
metro: str = Field("", alias="MetropolitanArea")
email: str | None = Field(..., alias="Email", frozen=True)
city: str | None = Field(..., alias="MailingCity")
country: str | None = Field(..., alias="MailingCountry")
latitude: str | None = Field(..., alias="MailingLatitude")
longitude: str | None = Field(..., alias="MailingLongitude")
zipcode: str | None = Field(..., alias="MailingPostalCode")
state: str | None = Field(..., alias="MailingState")
street: str | None = Field(..., alias="MailingStreet")
metro: str | None = Field(..., alias="Metro_Area__c")

@classmethod
def mock(
cls,
*,
city: Optional[str] = None,
country: Optional[str] = None,
latitude: Optional[str] = None,
longitude: Optional[str] = None,
zipcode: Optional[str] = None,
state: Optional[str] = None,
street: Optional[str] = None,
metro: Optional[str] = None,
city: str | None = None,
country: str | None = None,
latitude: str | None = None,
longitude: str | None = None,
zipcode: str | None = None,
state: str | None = None,
street: str | None = None,
metro: str | None = None,
) -> "SalesforceEntry":
return cls(
Email="[email protected]",
Id="12345",
MailingCity=city or "",
MailingCountry=country or "",
MailingLatitude=latitude or "",
MailingLongitude=longitude or "",
MailingPostalCode=zipcode or "",
MailingState=state or "",
MailingStreet=street or "",
MetropolitanArea=metro or "",
MailingCity=city,
MailingCountry=country,
MailingLatitude=latitude,
MailingLongitude=longitude,
MailingPostalCode=zipcode,
MailingState=state,
MailingStreet=street,
Metro_Area__c=metro,
)

def normalize(self) -> None:
Expand All @@ -55,26 +53,29 @@ def normalize(self) -> None:
self.country = "USA"

# Convert US state names to two-digit codes.
if self.country == "USA" and len(self.state) > 2:
if self.country == "USA" and self.state and len(self.state) > 2:
if self.state not in US_STATES_TO_CODES:
raise ValueError(f"Unrecognized state {self.state} for {self.uid}")
self.state = US_STATES_TO_CODES[self.state]

# Lowercase all-caps city names.
if self.city.isupper():
if self.city and self.city.isupper():
self.city = self.city.title()

# Normalize US zip codes to be 5 digits.
if self.country == "USA" and len(self.zipcode) > 5:
if self.country == "USA" and self.zipcode and len(self.zipcode) > 5:
if self.zipcode[5] != "-":
raise AssertionError(f"Unexpected zipcode for {self}")
self.zipcode = self.zipcode[:5]

def populate_via_latitude_longitude(
self, mailchimp: Optional[MailchimpEntry], geocoder: Nominatim
self, mailchimp: MailchimpEntry | None, geocoder: Nominatim
) -> None:
mailchimp_missing = mailchimp is None or not (
mailchimp.latitude and mailchimp.longitude
)
if self.zipcode or mailchimp_missing:
metro_area_can_be_computed = self.zipcode or (self.city and self.country)
if mailchimp_missing or metro_area_can_be_computed:
return

addr = geocoder.reverse(f"{mailchimp.latitude}, {mailchimp.longitude}").raw[
Expand All @@ -90,9 +91,9 @@ def populate_via_latitude_longitude(
# Also overwrite any existing values so that we don't mix the prior address
# with the new one.
self.street = None
self.country = addr.get("country_code", "").upper()
self.state = addr.get("state", "")
self.city = addr.get("city", "")
self.country = addr.get("country_code", "").upper() or None
self.state = addr.get("state")
self.city = addr.get("city")

def populate_via_zipcode(self, zipcode_search_engine: SearchEngine) -> None:
"""Look up city and state for US zip codes."""
Expand All @@ -109,7 +110,6 @@ def populate_metro_area(
) -> None:
if self.country != "USA":
return
metro = us_zip_to_metro.get(self.zipcode) or us_city_and_state_to_metro.get(
(self.city, self.state)
)
self.metro = metro or ""
self.metro = us_zip_to_metro.get(
self.zipcode
) or us_city_and_state_to_metro.get((self.city, self.state))
12 changes: 6 additions & 6 deletions src/salesforce_entry_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_normalize_city_capitalization(arg: str, expected: str) -> None:
[
("USA", "11370-2314", "11370"),
("USA", "11370", "11370"),
("USA", "", ""),
("USA", None, None),
("MEX", "11370-54", "11370-54"),
],
)
Expand All @@ -81,7 +81,7 @@ def test_normalize_zip_code_length(country: str, zip: str, expected: str) -> Non
"country,zip,expected_state,expected_city",
[
("USA", "11370", "NY", "East Elmhurst"),
("MEX", "11370", "", ""),
("MEX", "11370", None, None),
],
)
def test_populate_via_zipcode(
Expand Down Expand Up @@ -109,10 +109,10 @@ def test_populate_via_lat_long(geocoder_mock) -> None:
"country,zip,city,state,expected",
[
("USA", "11370", "Flushing", "NY", "My Metro"),
("USA", "99999", "Flushing", "NY", ""),
("USA", "", "Tempe", "AZ", "My Metro"),
("USA", "", "", "", ""),
("MEX", "11370", "Tempe", "AZ", ""),
("USA", "99999", "Flushing", "NY", None),
("USA", None, "Tempe", "AZ", "My Metro"),
("USA", None, None, None, None),
("MEX", "11370", "Tempe", "AZ", None),
],
)
def test_populate_metro_area(
Expand Down

0 comments on commit 9ae167b

Please sign in to comment.