Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI maintenance #77

Merged
merged 3 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,7 @@ lint: ci-image
docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "flake8 analyze.py fetch.py pdf.py"
docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black --check analyze.py fetch.py pdf.py"
docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "mypy analyze.py fetch.py"

.PHONY: black
black: ci-image
docker run -v $(shell pwd):/checkout $(CI_IMAGE) bash -c "black analyze.py fetch.py pdf.py"
21 changes: 10 additions & 11 deletions analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ def finalize_and_render_report():


def run_pandoc(md_report_filepath, html_template_filepath, html_output_filepath):

pandoc_cmd = [
ARGS.pandoc_command,
# For allowing raw HTML in Markdown, ref
Expand Down Expand Up @@ -390,7 +389,6 @@ def _get_snapshot_time_from_path(p, basename_suffix):


def _get_snapshot_dfs(csvpaths, basename_suffix):

snapshot_dfs = []
column_names_seen = set()

Expand Down Expand Up @@ -424,7 +422,6 @@ def _get_snapshot_dfs(csvpaths, basename_suffix):


def _build_entity_dfs(dfa, entity_type, unique_entity_names):

cmn_ename_prefix = os.path.commonprefix(list(unique_entity_names))
log.info("_build_entity_dfs. cmn_ename_prefix: %s", cmn_ename_prefix)
log.info("dfa:\n%s", dfa)
Expand Down Expand Up @@ -783,7 +780,6 @@ def _get_uens(snapshot_dfs):


def analyse_view_clones_ts_fragments() -> pd.DataFrame:

log.info("read views/clones time series fragments (CSV docs)")

basename_suffix = "_views_clones_series_fragment.csv"
Expand All @@ -796,7 +792,7 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame:
log.info("attempt to parse %s", p)
snapshot_time = _get_snapshot_time_from_path(p, basename_suffix)

df = pd.read_csv(
df = pd.read_csv( # type: ignore
p,
index_col=["time_iso8601"],
date_parser=lambda col: pd.to_datetime(col, utc=True),
Expand Down Expand Up @@ -879,11 +875,13 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame:
if ARGS.views_clones_aggregate_inpath:
if os.path.exists(ARGS.views_clones_aggregate_inpath):
log.info("read previous aggregate: %s", ARGS.views_clones_aggregate_inpath)
df_prev_agg = pd.read_csv(

df_prev_agg = pd.read_csv( # type: ignore
ARGS.views_clones_aggregate_inpath,
index_col=["time_iso8601"],
date_parser=lambda col: pd.to_datetime(col, utc=True),
)

df_prev_agg.index.rename("time", inplace=True)
else:
log.info(
Expand Down Expand Up @@ -975,7 +973,6 @@ def analyse_view_clones_ts_fragments() -> pd.DataFrame:
# )
# agg_fpath = os.path.join(ARGS.snapshotdir, agg_fname)
if ARGS.views_clones_aggregate_outpath:

if os.path.exists(ARGS.views_clones_aggregate_outpath):
log.info("file exists: %s", ARGS.views_clones_aggregate_outpath)
if not ARGS.views_clones_aggregate_inpath:
Expand Down Expand Up @@ -1400,17 +1397,18 @@ def symlog_or_lin(df, colname, threshold):


def read_stars_over_time_from_csv() -> pd.DataFrame:

if not ARGS.stargazer_ts_inpath:
log.info("stargazer_ts_inpath not provided, return emtpy df")
return pd.DataFrame()

log.info("Parse stargazer time series (raw) CSV: %s", ARGS.stargazer_ts_inpath)
df = pd.read_csv(

df = pd.read_csv( # type: ignore
ARGS.stargazer_ts_inpath,
index_col=["time_iso8601"],
date_parser=lambda col: pd.to_datetime(col, utc=True),
)

# df = df.astype(int)
df.index.rename("time", inplace=True)
log.info("stars_cumulative, raw data: %s", df["stars_cumulative"])
Expand Down Expand Up @@ -1439,17 +1437,18 @@ def read_stars_over_time_from_csv() -> pd.DataFrame:


def read_forks_over_time_from_csv() -> pd.DataFrame:

if not ARGS.fork_ts_inpath:
log.info("fork_ts_inpath not provided, return emtpy df")
return pd.DataFrame()

log.info("Parse fork time series (raw) CSV: %s", ARGS.fork_ts_inpath)
df = pd.read_csv(

df = pd.read_csv( # type: ignore
ARGS.fork_ts_inpath,
index_col=["time_iso8601"],
date_parser=lambda col: pd.to_datetime(col, utc=True),
)

# df = df.astype(int)
df.index.rename("time", inplace=True)
log.info("forks_cumulative, raw data: %s", df["forks_cumulative"])
Expand Down
2 changes: 1 addition & 1 deletion ci.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ RUN pip install -r requirements-ci.txt
# Install bats for running cmdline tests. This is the image used when invoking
# `make bats-test`.
RUN git clone https://github.com/bats-core/bats-core.git && cd bats-core && \
git checkout v1.5.0 && ./install.sh /usr/local
git checkout v1.10.0 && ./install.sh /usr/local

RUN mkdir -p /bats-libraries
RUN git clone https://github.com/bats-core/bats-support /bats-libraries/bats-support
Expand Down
3 changes: 0 additions & 3 deletions fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def fetch_and_write_fork_ts(repo: Repository.Repository, path: str):
def fetch_all_traffic_api_endpoints(
repo,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:

log.info("fetch top referrers")
df_referrers_snapshot_now = referrers_to_df(fetch_top_referrers(repo))

Expand Down Expand Up @@ -266,7 +265,6 @@ def referrers_to_df(top_referrers) -> pd.DataFrame:


def paths_to_df(top_paths) -> pd.DataFrame:

series_url_paths = []
series_views_unique = []
series_views_total = []
Expand Down Expand Up @@ -431,7 +429,6 @@ def get_stars_over_time(repo: Repository.Repository) -> pd.DataFrame:


def handle_rate_limit_error(exc):

if "wait a few minutes before you try again" in str(exc):
log.warning("GitHub abuse mechanism triggered, wait 60 s, retry")
return True
Expand Down
3 changes: 0 additions & 3 deletions pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@


def main():

parser = argparse.ArgumentParser(description="")
parser.add_argument(
"htmlpath",
Expand Down Expand Up @@ -70,7 +69,6 @@ def main():


def gen_pdf_bytes(html_apath):

wd_options = Options()
wd_options.add_argument("--headless")
wd_options.add_argument("--disable-gpu")
Expand Down Expand Up @@ -104,7 +102,6 @@ def gen_pdf_bytes(html_apath):


def send_print_request(driver):

# Construct chrome dev tools print request.
# https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF
# Also see https://bugs.chromium.org/p/chromium/issues/detail?id=603559 for
Expand Down
Loading