Skip to content

Commit

Permalink
FEC command line tool.
Browse files Browse the repository at this point in the history
  • Loading branch information
davepeck committed Nov 23, 2023
1 parent d7a564c commit d3b0a65
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 9 deletions.
68 changes: 68 additions & 0 deletions fec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
# ruff: noqa: E501

import click

from server.data.fec.contributions import ContributionsManager
from server.data.manager import DataManager
from server.data.names.nicknames import MessyNicknamesManager


@click.group()
def fec():
"""Work with FEC data."""
pass


@fec.group()
def names():
"""Work with names data."""
pass


@names.command()
@click.option(
"--data",
type=click.Path(exists=True),
help="Path to data dir.",
required=False,
default=None,
)
def clean(data: str | None = None):
"""Clean raw names data."""
data_manager = DataManager(data) if data is not None else DataManager.default()
messy_names_manager = MessyNicknamesManager.from_data_manager(data_manager)
nicknames_manager = messy_names_manager.nicknames_manager
nicknames_manager.to_jsonl_data_manager(data_manager)


@fec.group()
def contributions():
"""Work with FEC contributions data."""
pass


@contributions.command()
@click.option(
"--data",
type=click.Path(exists=True),
help="Path to data dir.",
required=False,
default=None,
)
def summarize(data: str | None = None):
"""Summarize raw FEC individual contribution data."""
data_manager = DataManager(data) if data is not None else DataManager.default()
contributions_manager = ContributionsManager.from_data_manager(data_manager)
summaries_manager = contributions_manager.contribution_summaries_manager
summaries_manager.to_jsonl_data_manager(data_manager)


@contributions.command()
def search():
"""Search summarized FEC contributions data."""
pass


if __name__ == "__main__":
fec()
2 changes: 1 addition & 1 deletion server/data/fec/committees.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def from_csv_data_manager(
cls, data_manager: "DataManager", year: int = 2020
) -> "CommitteeManager":
"""Create a committee manager from a data manager."""
return cls.from_csv_path(data_manager.path / "fec" / f"committees-{year}.csv")
return cls.from_csv_path(data_manager.path / "fec" / f"committees-{year}.txt")

@classmethod
def from_jsonl_io(cls, io: t.TextIO) -> "CommitteeManager":
Expand Down
7 changes: 4 additions & 3 deletions server/data/fec/contributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
The schema for the individual contribution master file is available at:
https://www.fec.gov/campaign-finance-data/contributions-individuals-file-description/
"""
import csv
import json
import pathlib
import typing as t
Expand Down Expand Up @@ -394,10 +393,12 @@ def from_csv_io(
get_nickname_index: IGetNicknameIndex,
) -> "ContributionsManager":
"""Create a contributions manager from a FEC individual contributions file."""
reader = csv.reader(io, delimiter="|")
# Turns out this is not simply a CSV with a pipe delimiter. I think it comes
# down to escaping quotes, but I'm not sure. So we'll just split on pipes.
rows = (row.strip().split("|") for row in io)
contributions = (
contribution
for row in reader
for row in rows
if (contribution := Contribution.from_contribution_row(row)) is not None
)
return cls(
Expand Down
6 changes: 3 additions & 3 deletions server/data/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
class DataManager:
"""Top-level manager of all content in the data/ directory."""

def __init__(self, path: pathlib.Path) -> None:
self._path = validate_extant_dir(path)
def __init__(self, path: str | pathlib.Path) -> None:
self._path = validate_extant_dir(pathlib.Path(path))
self._zip_code_manager = None

@property
Expand All @@ -18,4 +18,4 @@ def path(self) -> pathlib.Path:
@classmethod
def default(cls) -> "DataManager":
"""Return a DataManager with the default data/ directory."""
return cls(pathlib.Path(__file__).parent.parent / "data")
return cls(pathlib.Path(__file__).parent.parent.parent / "data")
4 changes: 2 additions & 2 deletions server/data/names/nicknames.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def from_path(cls, path: str | pathlib.Path) -> "NicknamesManager":
@classmethod
def from_data_manager(cls, data_manager: DataManager) -> "NicknamesManager":
"""Create a manager from a data manager."""
return cls.from_path(data_manager.path / "names" / "nicknames.json")
return cls.from_path(data_manager.path / "names" / "nicknames.jsonl")

def to_data_lines(self) -> t.Iterable[list[str]]:
"""Convert to a json-serializable object."""
Expand All @@ -206,7 +206,7 @@ def to_jsonl_path(self, path: str | pathlib.Path) -> None:

def to_jsonl_data_manager(self, data_manager: DataManager) -> None:
"""Write to a json file."""
self.to_jsonl_path(data_manager.path / "names" / "nicknames.json")
self.to_jsonl_path(data_manager.path / "names" / "nicknames.jsonl")

def _index_names(self) -> None:
"""Index the merged names."""
Expand Down

0 comments on commit d3b0a65

Please sign in to comment.