From d3b0a655977c6df1d0fc70b19597d5b64d4710ff Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 23 Nov 2023 00:01:37 -0500 Subject: [PATCH] FEC command line tool. --- fec.py | 68 ++++++++++++++++++++++++++++++++ server/data/fec/committees.py | 2 +- server/data/fec/contributions.py | 7 ++-- server/data/manager.py | 6 +-- server/data/names/nicknames.py | 4 +- 5 files changed, 78 insertions(+), 9 deletions(-) create mode 100755 fec.py diff --git a/fec.py b/fec.py new file mode 100755 index 0000000..15b90f8 --- /dev/null +++ b/fec.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +# ruff: noqa: E501 + +import click + +from server.data.fec.contributions import ContributionsManager +from server.data.manager import DataManager +from server.data.names.nicknames import MessyNicknamesManager + + +@click.group() +def fec(): + """Work with FEC data.""" + pass + + +@fec.group() +def names(): + """Work with names data.""" + pass + + +@names.command() +@click.option( + "--data", + type=click.Path(exists=True), + help="Path to data dir.", + required=False, + default=None, +) +def clean(data: str | None = None): + """Clean raw names data.""" + data_manager = DataManager(data) if data is not None else DataManager.default() + messy_names_manager = MessyNicknamesManager.from_data_manager(data_manager) + nicknames_manager = messy_names_manager.nicknames_manager + nicknames_manager.to_jsonl_data_manager(data_manager) + + +@fec.group() +def contributions(): + """Work with FEC contributions data.""" + pass + + +@contributions.command() +@click.option( + "--data", + type=click.Path(exists=True), + help="Path to data dir.", + required=False, + default=None, +) +def summarize(data: str | None = None): + """Summarize raw FEC individual contribution data.""" + data_manager = DataManager(data) if data is not None else DataManager.default() + contributions_manager = ContributionsManager.from_data_manager(data_manager) + summaries_manager = contributions_manager.contribution_summaries_manager + summaries_manager.to_jsonl_data_manager(data_manager) + + +@contributions.command() +def search(): + """Search summarized FEC contributions data.""" + pass + + +if __name__ == "__main__": + fec() diff --git a/server/data/fec/committees.py b/server/data/fec/committees.py index 26ea9d9..99a3e37 100644 --- a/server/data/fec/committees.py +++ b/server/data/fec/committees.py @@ -200,7 +200,7 @@ def from_csv_data_manager( cls, data_manager: "DataManager", year: int = 2020 ) -> "CommitteeManager": """Create a committee manager from a data manager.""" - return cls.from_csv_path(data_manager.path / "fec" / f"committees-{year}.csv") + return cls.from_csv_path(data_manager.path / "fec" / f"committees-{year}.txt") @classmethod def from_jsonl_io(cls, io: t.TextIO) -> "CommitteeManager": diff --git a/server/data/fec/contributions.py b/server/data/fec/contributions.py index d47dd4a..a0fba8d 100644 --- a/server/data/fec/contributions.py +++ b/server/data/fec/contributions.py @@ -8,7 +8,6 @@ The schema for the individual contribution master file is available at: https://www.fec.gov/campaign-finance-data/contributions-individuals-file-description/ """ -import csv import json import pathlib import typing as t @@ -394,10 +393,12 @@ def from_csv_io( get_nickname_index: IGetNicknameIndex, ) -> "ContributionsManager": """Create a contributions manager from a FEC individual contributions file.""" - reader = csv.reader(io, delimiter="|") + # Turns out this is not simply a CSV with a pipe delimiter. I think it comes + # down to escaping quotes, but I'm not sure. So we'll just split on pipes. + rows = (row.strip().split("|") for row in io) contributions = ( contribution - for row in reader + for row in rows if (contribution := Contribution.from_contribution_row(row)) is not None ) return cls( diff --git a/server/data/manager.py b/server/data/manager.py index 2dccf5a..4ecc659 100644 --- a/server/data/manager.py +++ b/server/data/manager.py @@ -6,8 +6,8 @@ class DataManager: """Top-level manager of all content in the data/ directory.""" - def __init__(self, path: pathlib.Path) -> None: - self._path = validate_extant_dir(path) + def __init__(self, path: str | pathlib.Path) -> None: + self._path = validate_extant_dir(pathlib.Path(path)) self._zip_code_manager = None @property @@ -18,4 +18,4 @@ def path(self) -> pathlib.Path: @classmethod def default(cls) -> "DataManager": """Return a DataManager with the default data/ directory.""" - return cls(pathlib.Path(__file__).parent.parent / "data") + return cls(pathlib.Path(__file__).parent.parent.parent / "data") diff --git a/server/data/names/nicknames.py b/server/data/names/nicknames.py index 21e0524..fed38f2 100644 --- a/server/data/names/nicknames.py +++ b/server/data/names/nicknames.py @@ -186,7 +186,7 @@ def from_path(cls, path: str | pathlib.Path) -> "NicknamesManager": @classmethod def from_data_manager(cls, data_manager: DataManager) -> "NicknamesManager": """Create a manager from a data manager.""" - return cls.from_path(data_manager.path / "names" / "nicknames.json") + return cls.from_path(data_manager.path / "names" / "nicknames.jsonl") def to_data_lines(self) -> t.Iterable[list[str]]: """Convert to a json-serializable object.""" @@ -206,7 +206,7 @@ def to_jsonl_path(self, path: str | pathlib.Path) -> None: def to_jsonl_data_manager(self, data_manager: DataManager) -> None: """Write to a json file.""" - self.to_jsonl_path(data_manager.path / "names" / "nicknames.json") + self.to_jsonl_path(data_manager.path / "names" / "nicknames.jsonl") def _index_names(self) -> None: """Index the merged names."""