jgehrcke · jgehrcke · Sep 28, 2023 · Sep 28, 2023 · Sep 28, 2023 · Sep 28, 2023
diff --git a/Makefile b/Makefile
@@ -41,3 +41,7 @@ lint: ci-image
 	docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "flake8 analyze.py fetch.py pdf.py"
 	docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black --check analyze.py fetch.py pdf.py"
 	docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "mypy analyze.py fetch.py"
+
+.PHONY: black
+black: ci-image
+	docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black analyze.py fetch.py pdf.py"
diff --git a/analyze.py b/analyze.py
@@ -255,7 +255,6 @@ def finalize_and_render_report():
 
 
 def run_pandoc(md_report_filepath, html_template_filepath, html_output_filepath):
-
     pandoc_cmd = [
         ARGS.pandoc_command,
         # For allowing raw HTML in Markdown, ref
@@ -390,7 +389,6 @@ def _get_snapshot_time_from_path(p, basename_suffix):
 
 
 def _get_snapshot_dfs(csvpaths, basename_suffix):
-
     snapshot_dfs = []
     column_names_seen = set()
 
@@ -424,7 +422,6 @@ def _get_snapshot_dfs(csvpaths, basename_suffix):
 
 
 def _build_entity_dfs(dfa, entity_type, unique_entity_names):
-
     cmn_ename_prefix = os.path.commonprefix(list(unique_entity_names))
     log.info("_build_entity_dfs. cmn_ename_prefix: %s", cmn_ename_prefix)
     log.info("dfa:\n%s", dfa)
@@ -783,7 +780,6 @@ def _get_uens(snapshot_dfs):
 
 
 def analyse_view_clones_ts_fragments() -> pd.DataFrame:
-
     log.info("read views/clones time series fragments (CSV docs)")
 
     basename_suffix = "_views_clones_series_fragment.csv"
@@ -796,7 +792,7 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame:
         log.info("attempt to parse %s", p)
         snapshot_time = _get_snapshot_time_from_path(p, basename_suffix)
 
-        df = pd.read_csv(
+        df = pd.read_csv(  # type: ignore
             p,
             index_col=["time_iso8601"],
             date_parser=lambda col: pd.to_datetime(col, utc=True),
@@ -879,11 +875,13 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame:
     if ARGS.views_clones_aggregate_inpath:
         if os.path.exists(ARGS.views_clones_aggregate_inpath):
             log.info("read previous aggregate: %s", ARGS.views_clones_aggregate_inpath)
-            df_prev_agg = pd.read_csv(
+
+            df_prev_agg = pd.read_csv(  # type: ignore
                 ARGS.views_clones_aggregate_inpath,
                 index_col=["time_iso8601"],
                 date_parser=lambda col: pd.to_datetime(col, utc=True),
             )
+
             df_prev_agg.index.rename("time", inplace=True)
         else:
             log.info(
@@ -975,7 +973,6 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame:
     # )
     # agg_fpath = os.path.join(ARGS.snapshotdir, agg_fname)
     if ARGS.views_clones_aggregate_outpath:
-
         if os.path.exists(ARGS.views_clones_aggregate_outpath):
             log.info("file exists: %s", ARGS.views_clones_aggregate_outpath)
             if not ARGS.views_clones_aggregate_inpath:
@@ -1400,17 +1397,18 @@ def symlog_or_lin(df, colname, threshold):
 
 
 def read_stars_over_time_from_csv() -> pd.DataFrame:
-
     if not ARGS.stargazer_ts_inpath:
         log.info("stargazer_ts_inpath not provided, return emtpy df")
         return pd.DataFrame()
 
     log.info("Parse stargazer time series (raw) CSV: %s", ARGS.stargazer_ts_inpath)
-    df = pd.read_csv(
+
+    df = pd.read_csv(  # type: ignore
         ARGS.stargazer_ts_inpath,
         index_col=["time_iso8601"],
         date_parser=lambda col: pd.to_datetime(col, utc=True),
     )
+
     # df = df.astype(int)
     df.index.rename("time", inplace=True)
     log.info("stars_cumulative, raw data: %s", df["stars_cumulative"])
@@ -1439,17 +1437,18 @@ def read_stars_over_time_from_csv() -> pd.DataFrame:
 
 
 def read_forks_over_time_from_csv() -> pd.DataFrame:
-
     if not ARGS.fork_ts_inpath:
         log.info("fork_ts_inpath not provided, return emtpy df")
         return pd.DataFrame()
 
     log.info("Parse fork time series (raw) CSV: %s", ARGS.fork_ts_inpath)
-    df = pd.read_csv(
+
+    df = pd.read_csv(  # type: ignore
         ARGS.fork_ts_inpath,
         index_col=["time_iso8601"],
         date_parser=lambda col: pd.to_datetime(col, utc=True),
     )
+
     # df = df.astype(int)
     df.index.rename("time", inplace=True)
     log.info("forks_cumulative, raw data: %s", df["forks_cumulative"])

diff --git a/ci.Dockerfile b/ci.Dockerfile
@@ -10,7 +10,7 @@ RUN pip install -r requirements-ci.txt
 # Install bats for running cmdline tests. This is the image used when invoking
 # `make bats-test`.
 RUN git clone https://github.com/bats-core/bats-core.git && cd bats-core && \
-    git checkout v1.5.0 && ./install.sh /usr/local
+    git checkout v1.10.0 && ./install.sh /usr/local
 
 RUN mkdir -p /bats-libraries
 RUN git clone https://github.com/bats-core/bats-support /bats-libraries/bats-support

diff --git a/fetch.py b/fetch.py
@@ -144,7 +144,6 @@ def fetch_and_write_fork_ts(repo: Repository.Repository, path: str):
 def fetch_all_traffic_api_endpoints(
     repo,
 ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-
     log.info("fetch top referrers")
     df_referrers_snapshot_now = referrers_to_df(fetch_top_referrers(repo))
 
@@ -266,7 +265,6 @@ def referrers_to_df(top_referrers) -> pd.DataFrame:
 
 
 def paths_to_df(top_paths) -> pd.DataFrame:
-
     series_url_paths = []
     series_views_unique = []
     series_views_total = []
@@ -431,7 +429,6 @@ def get_stars_over_time(repo: Repository.Repository) -> pd.DataFrame:
 
 
 def handle_rate_limit_error(exc):
-
     if "wait a few minutes before you try again" in str(exc):
         log.warning("GitHub abuse mechanism triggered, wait 60 s, retry")
         return True

diff --git a/pdf.py b/pdf.py
@@ -38,7 +38,6 @@
 
 
 def main():
-
     parser = argparse.ArgumentParser(description="")
     parser.add_argument(
         "htmlpath",
@@ -70,7 +69,6 @@ def main():
 
 
 def gen_pdf_bytes(html_apath):
-
     wd_options = Options()
     wd_options.add_argument("--headless")
     wd_options.add_argument("--disable-gpu")
@@ -104,7 +102,6 @@ def gen_pdf_bytes(html_apath):
 
 
 def send_print_request(driver):
-
     # Construct chrome dev tools print request.
     # https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF
     # Also see https://bugs.chromium.org/p/chromium/issues/detail?id=603559 for