Skip to content

Commit

Permalink
Testing the ETL pipeline in detail. Committed to a schema.
Browse files Browse the repository at this point in the history
  • Loading branch information
davepeck committed Nov 19, 2023
1 parent a40405d commit a3a4d19
Show file tree
Hide file tree
Showing 8 changed files with 943 additions and 4 deletions.
2 changes: 1 addition & 1 deletion scripts/test.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh

pre-commit run --all
pre-commit run --all-files
python -m unittest discover -s server
10 changes: 10 additions & 0 deletions server/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
"""Tools for working with all raw data files."""


# CONSIDER: the FEC publishes what amounts to a relational dataset, and I
# originally considered just dumping stuff into a massive SQLite database.
# But then I got hooked on summarizing, and building fuzzy identifiers, and
# the code took a different form. In retrospect, the existence of IGetNicknameIndex
# and IGetCommittee just screams "dude, you shoulda used SQLAlchemy and done
# some ETL on the inbound side to slim it down".
#
# So this comment asks me to revisit this, and consider it a TODO.
29 changes: 29 additions & 0 deletions server/data/fec/committees.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ def name_for_code(cls, code: str) -> str | None:


class CommitteeColumns:
"""
Column indices for the committee master file.
See:
https://www.fec.gov/campaign-finance-data/committee-master-file-description/
"""

ID = 0 # CMTE_ID
NAME = 1 # CMTE_NM
TREASURER_NAME = 2 # TRES_NM
Expand Down Expand Up @@ -142,6 +149,28 @@ def to_data(self) -> dict:
return data


class IGetCommittee(t.Protocol):
"""Interface for getting a committee."""

def get_committee(self, id: str) -> Committee | None:
"""Get the committee with the given id, or None."""
...


class MockGetCommittee(IGetCommittee):
"""A mock implementation of IGetCommittee."""

_id_to_committee: dict[str, Committee]

def __init__(self, committees: t.Sequence[Committee]) -> None:
"""Create a mock implementation."""
self._id_to_committee = {committee.id: committee for committee in committees}

def get_committee(self, id: str) -> Committee | None:
"""Get the committee with the given id, or None."""
return self._id_to_committee.get(id)


class CommitteeManager:
"""Manages a collection of committees."""

Expand Down
Loading

0 comments on commit a3a4d19

Please sign in to comment.