diff --git a/Makefile b/Makefile index 98f9142d..c139a375 100644 --- a/Makefile +++ b/Makefile @@ -41,3 +41,7 @@ lint: ci-image docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "flake8 analyze.py fetch.py pdf.py" docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black --check analyze.py fetch.py pdf.py" docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "mypy analyze.py fetch.py" + +.PHONY: black +black: ci-image + docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black analyze.py fetch.py pdf.py" \ No newline at end of file diff --git a/analyze.py b/analyze.py index 7591b9eb..2ad200e6 100644 --- a/analyze.py +++ b/analyze.py @@ -255,7 +255,6 @@ def finalize_and_render_report(): def run_pandoc(md_report_filepath, html_template_filepath, html_output_filepath): - pandoc_cmd = [ ARGS.pandoc_command, # For allowing raw HTML in Markdown, ref @@ -390,7 +389,6 @@ def _get_snapshot_time_from_path(p, basename_suffix): def _get_snapshot_dfs(csvpaths, basename_suffix): - snapshot_dfs = [] column_names_seen = set() @@ -424,7 +422,6 @@ def _get_snapshot_dfs(csvpaths, basename_suffix): def _build_entity_dfs(dfa, entity_type, unique_entity_names): - cmn_ename_prefix = os.path.commonprefix(list(unique_entity_names)) log.info("_build_entity_dfs. cmn_ename_prefix: %s", cmn_ename_prefix) log.info("dfa:\n%s", dfa) @@ -783,7 +780,6 @@ def _get_uens(snapshot_dfs): def analyse_view_clones_ts_fragments() -> pd.DataFrame: - log.info("read views/clones time series fragments (CSV docs)") basename_suffix = "_views_clones_series_fragment.csv" @@ -796,7 +792,7 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame: log.info("attempt to parse %s", p) snapshot_time = _get_snapshot_time_from_path(p, basename_suffix) - df = pd.read_csv( + df = pd.read_csv( # type: ignore p, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), @@ -879,11 +875,13 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame: if ARGS.views_clones_aggregate_inpath: if os.path.exists(ARGS.views_clones_aggregate_inpath): log.info("read previous aggregate: %s", ARGS.views_clones_aggregate_inpath) - df_prev_agg = pd.read_csv( + + df_prev_agg = pd.read_csv( # type: ignore ARGS.views_clones_aggregate_inpath, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), ) + df_prev_agg.index.rename("time", inplace=True) else: log.info( @@ -975,7 +973,6 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame: # ) # agg_fpath = os.path.join(ARGS.snapshotdir, agg_fname) if ARGS.views_clones_aggregate_outpath: - if os.path.exists(ARGS.views_clones_aggregate_outpath): log.info("file exists: %s", ARGS.views_clones_aggregate_outpath) if not ARGS.views_clones_aggregate_inpath: @@ -1400,17 +1397,18 @@ def symlog_or_lin(df, colname, threshold): def read_stars_over_time_from_csv() -> pd.DataFrame: - if not ARGS.stargazer_ts_inpath: log.info("stargazer_ts_inpath not provided, return emtpy df") return pd.DataFrame() log.info("Parse stargazer time series (raw) CSV: %s", ARGS.stargazer_ts_inpath) - df = pd.read_csv( + + df = pd.read_csv( # type: ignore ARGS.stargazer_ts_inpath, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), ) + # df = df.astype(int) df.index.rename("time", inplace=True) log.info("stars_cumulative, raw data: %s", df["stars_cumulative"]) @@ -1439,17 +1437,18 @@ def read_stars_over_time_from_csv() -> pd.DataFrame: def read_forks_over_time_from_csv() -> pd.DataFrame: - if not ARGS.fork_ts_inpath: log.info("fork_ts_inpath not provided, return emtpy df") return pd.DataFrame() log.info("Parse fork time series (raw) CSV: %s", ARGS.fork_ts_inpath) - df = pd.read_csv( + + df = pd.read_csv( # type: ignore ARGS.fork_ts_inpath, index_col=["time_iso8601"], date_parser=lambda col: pd.to_datetime(col, utc=True), ) + # df = df.astype(int) df.index.rename("time", inplace=True) log.info("forks_cumulative, raw data: %s", df["forks_cumulative"]) diff --git a/ci.Dockerfile b/ci.Dockerfile index 3db54c9a..da1488ae 100644 --- a/ci.Dockerfile +++ b/ci.Dockerfile @@ -10,7 +10,7 @@ RUN pip install -r requirements-ci.txt # Install bats for running cmdline tests. This is the image used when invoking # `make bats-test`. RUN git clone https://github.com/bats-core/bats-core.git && cd bats-core && \ - git checkout v1.5.0 && ./install.sh /usr/local + git checkout v1.10.0 && ./install.sh /usr/local RUN mkdir -p /bats-libraries RUN git clone https://github.com/bats-core/bats-support /bats-libraries/bats-support diff --git a/fetch.py b/fetch.py index 5dc3768b..8a57a016 100644 --- a/fetch.py +++ b/fetch.py @@ -144,7 +144,6 @@ def fetch_and_write_fork_ts(repo: Repository.Repository, path: str): def fetch_all_traffic_api_endpoints( repo, ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - log.info("fetch top referrers") df_referrers_snapshot_now = referrers_to_df(fetch_top_referrers(repo)) @@ -266,7 +265,6 @@ def referrers_to_df(top_referrers) -> pd.DataFrame: def paths_to_df(top_paths) -> pd.DataFrame: - series_url_paths = [] series_views_unique = [] series_views_total = [] @@ -431,7 +429,6 @@ def get_stars_over_time(repo: Repository.Repository) -> pd.DataFrame: def handle_rate_limit_error(exc): - if "wait a few minutes before you try again" in str(exc): log.warning("GitHub abuse mechanism triggered, wait 60 s, retry") return True diff --git a/pdf.py b/pdf.py index be88c42e..69d5bad6 100644 --- a/pdf.py +++ b/pdf.py @@ -38,7 +38,6 @@ def main(): - parser = argparse.ArgumentParser(description="") parser.add_argument( "htmlpath", @@ -70,7 +69,6 @@ def main(): def gen_pdf_bytes(html_apath): - wd_options = Options() wd_options.add_argument("--headless") wd_options.add_argument("--disable-gpu") @@ -104,7 +102,6 @@ def gen_pdf_bytes(html_apath): def send_print_request(driver): - # Construct chrome dev tools print request. # https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF # Also see https://bugs.chromium.org/p/chromium/issues/detail?id=603559 for