From 75d7e43d2cbb95ee4bd50866dff9c102ad37fd55 Mon Sep 17 00:00:00 2001 From: "Dr. Jan-Philip Gehrcke" Date: Thu, 28 Sep 2023 17:36:05 +0200 Subject: [PATCH 1/3] Makefile: add `black` to reformat, and reformat. --- Makefile | 4 ++++ analyze.py | 7 ------- fetch.py | 3 --- pdf.py | 3 --- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 98f9142d..c139a375 100644 --- a/Makefile +++ b/Makefile @@ -41,3 +41,7 @@ lint: ci-image docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "flake8 analyze.py fetch.py pdf.py" docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black --check analyze.py fetch.py pdf.py" docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "mypy analyze.py fetch.py" + +.PHONY: black +black: ci-image + docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black analyze.py fetch.py pdf.py" \ No newline at end of file diff --git a/analyze.py b/analyze.py index 7591b9eb..249633aa 100644 --- a/analyze.py +++ b/analyze.py @@ -255,7 +255,6 @@ def finalize_and_render_report(): def run_pandoc(md_report_filepath, html_template_filepath, html_output_filepath): - pandoc_cmd = [ ARGS.pandoc_command, # For allowing raw HTML in Markdown, ref @@ -390,7 +389,6 @@ def _get_snapshot_time_from_path(p, basename_suffix): def _get_snapshot_dfs(csvpaths, basename_suffix): - snapshot_dfs = [] column_names_seen = set() @@ -424,7 +422,6 @@ def _get_snapshot_dfs(csvpaths, basename_suffix): def _build_entity_dfs(dfa, entity_type, unique_entity_names): - cmn_ename_prefix = os.path.commonprefix(list(unique_entity_names)) log.info("_build_entity_dfs. cmn_ename_prefix: %s", cmn_ename_prefix) log.info("dfa:\n%s", dfa) @@ -783,7 +780,6 @@ def _get_uens(snapshot_dfs): def analyse_view_clones_ts_fragments() -> pd.DataFrame: - log.info("read views/clones time series fragments (CSV docs)") basename_suffix = "_views_clones_series_fragment.csv" @@ -975,7 +971,6 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame: # ) # agg_fpath = os.path.join(ARGS.snapshotdir, agg_fname) if ARGS.views_clones_aggregate_outpath: - if os.path.exists(ARGS.views_clones_aggregate_outpath): log.info("file exists: %s", ARGS.views_clones_aggregate_outpath) if not ARGS.views_clones_aggregate_inpath: @@ -1400,7 +1395,6 @@ def symlog_or_lin(df, colname, threshold): def read_stars_over_time_from_csv() -> pd.DataFrame: - if not ARGS.stargazer_ts_inpath: log.info("stargazer_ts_inpath not provided, return emtpy df") return pd.DataFrame() @@ -1439,7 +1433,6 @@ def read_stars_over_time_from_csv() -> pd.DataFrame: def read_forks_over_time_from_csv() -> pd.DataFrame: - if not ARGS.fork_ts_inpath: log.info("fork_ts_inpath not provided, return emtpy df") return pd.DataFrame() diff --git a/fetch.py b/fetch.py index 5dc3768b..8a57a016 100644 --- a/fetch.py +++ b/fetch.py @@ -144,7 +144,6 @@ def fetch_and_write_fork_ts(repo: Repository.Repository, path: str): def fetch_all_traffic_api_endpoints( repo, ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - log.info("fetch top referrers") df_referrers_snapshot_now = referrers_to_df(fetch_top_referrers(repo)) @@ -266,7 +265,6 @@ def referrers_to_df(top_referrers) -> pd.DataFrame: def paths_to_df(top_paths) -> pd.DataFrame: - series_url_paths = [] series_views_unique = [] series_views_total = [] @@ -431,7 +429,6 @@ def get_stars_over_time(repo: Repository.Repository) -> pd.DataFrame: def handle_rate_limit_error(exc): - if "wait a few minutes before you try again" in str(exc): log.warning("GitHub abuse mechanism triggered, wait 60 s, retry") return True diff --git a/pdf.py b/pdf.py index be88c42e..69d5bad6 100644 --- a/pdf.py +++ b/pdf.py @@ -38,7 +38,6 @@ def main(): - parser = argparse.ArgumentParser(description="") parser.add_argument( "htmlpath", @@ -70,7 +69,6 @@ def main(): def gen_pdf_bytes(html_apath): - wd_options = Options() wd_options.add_argument("--headless") wd_options.add_argument("--disable-gpu") @@ -104,7 +102,6 @@ def gen_pdf_bytes(html_apath): def send_print_request(driver): - # Construct chrome dev tools print request. # https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF # Also see https://bugs.chromium.org/p/chromium/issues/detail?id=603559 for From 8a5118040a221349ce7e131a222e5f45033d62a4 Mon Sep 17 00:00:00 2001 From: "Dr. Jan-Philip Gehrcke" Date: Thu, 28 Sep 2023 17:44:09 +0200 Subject: [PATCH 2/3] analyze.py: ignore typing err around pd.read_csv() After a mypy bump there are type issues; makes sense to think through those after bumping pandas to 2.x. Ignore until then. --- analyze.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/analyze.py b/analyze.py index 249633aa..2ad200e6 100644 --- a/analyze.py +++ b/analyze.py @@ -792,7 +792,7 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame: log.info("attempt to parse %s", p) snapshot_time = _get_snapshot_time_from_path(p, basename_suffix) - df = pd.read_csv( + df = pd.read_csv( # type: ignore p, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), @@ -875,11 +875,13 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame: if ARGS.views_clones_aggregate_inpath: if os.path.exists(ARGS.views_clones_aggregate_inpath): log.info("read previous aggregate: %s", ARGS.views_clones_aggregate_inpath) - df_prev_agg = pd.read_csv( + + df_prev_agg = pd.read_csv( # type: ignore ARGS.views_clones_aggregate_inpath, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), ) + df_prev_agg.index.rename("time", inplace=True) else: log.info( @@ -1400,11 +1402,13 @@ def read_stars_over_time_from_csv() -> pd.DataFrame: return pd.DataFrame() log.info("Parse stargazer time series (raw) CSV: %s", ARGS.stargazer_ts_inpath) - df = pd.read_csv( + + df = pd.read_csv( # type: ignore ARGS.stargazer_ts_inpath, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), ) + # df = df.astype(int) df.index.rename("time", inplace=True) log.info("stars_cumulative, raw data: %s", df["stars_cumulative"]) @@ -1438,11 +1442,13 @@ def read_forks_over_time_from_csv() -> pd.DataFrame: return pd.DataFrame() log.info("Parse fork time series (raw) CSV: %s", ARGS.fork_ts_inpath) - df = pd.read_csv( + + df = pd.read_csv( # type: ignore ARGS.fork_ts_inpath, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), ) + # df = df.astype(int) df.index.rename("time", inplace=True) log.info("forks_cumulative, raw data: %s", df["forks_cumulative"]) From 994bb063fb121aef8445633d2aa0a3504e1b10b2 Mon Sep 17 00:00:00 2001 From: "Dr. Jan-Philip Gehrcke" Date: Thu, 28 Sep 2023 18:03:19 +0200 Subject: [PATCH 3/3] ci: bump bats: 1.5 -> 1.10 on my machine I saw a FileNotFoundError: [Errno 2] No such file or directory: 'stargazers-rs.csv.tmp' -> 'stargazers-rs.csv' during test 1 that I had not seen years ago and that's also not present in GitHub actions. I supposed that something about my tmp directory setup and bats specific way to manage directories was the culprit. Bumping bats indeed helped. Bats has made great progress, that's lovely to see. --- ci.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci.Dockerfile b/ci.Dockerfile index 3db54c9a..da1488ae 100644 --- a/ci.Dockerfile +++ b/ci.Dockerfile @@ -10,7 +10,7 @@ RUN pip install -r requirements-ci.txt # Install bats for running cmdline tests. This is the image used when invoking # `make bats-test`. RUN git clone https://github.com/bats-core/bats-core.git && cd bats-core && \ - git checkout v1.5.0 && ./install.sh /usr/local + git checkout v1.10.0 && ./install.sh /usr/local RUN mkdir -p /bats-libraries RUN git clone https://github.com/bats-core/bats-support /bats-libraries/bats-support