Automate data updates and data downloads #13
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Refresh Data | |
on: | |
pull_request: | |
workflow_dispatch: | |
schedule: | |
- cron: '0 0 * * 0' | |
jobs: | |
refresh-data: | |
name: Refresh Data | |
runs-on: ubuntu-latest | |
container: | |
image: cansav09/metricminer | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
token: ${{ secrets.METRICMINER_GITHUB_PAT }} | |
- name: Run metricminer get data script | |
env: | |
METRICMINER_CALENDLY: ${{ secrets.METRICMINER_CALENDLY }} | |
METRICMINER_GITHUB_PAT: ${{ secrets.METRICMINER_GITHUB_PAT }} | |
METRICMINER_GOOGLE_ACCESS: ${{ secrets.METRICMINER_GOOGLE_ACCESS }} | |
METRICMINER_GOOGLE_REFRESH: ${{ secrets.METRICMINER_GOOGLE_REFRESH }} | |
run: | | |
# Find .git root directory | |
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git")) | |
dir.create(file.path(root_dir, "data"), showWarnings = FALSE) | |
dir.create(file.path(root_dir, "plots"), showWarnings = FALSE) | |
set.seed(1234) | |
library(metricminer) | |
library("magrittr") | |
auth_from_secret("google", | |
refresh_token = Sys.getenv("METRICMINER_GOOGLE_REFRESH"), | |
access_token = Sys.getenv("METRICMINER_GOOGLE_ACCESS"), | |
cache = TRUE) | |
auth_from_secret("github", token = Sys.getenv("METRICMINER_GITHUB_PAT")) | |
ga_accounts <- get_ga_user() | |
fhdsl_account_id <- ga_accounts %>% | |
dplyr::filter(name == "fhDaSL") %>% | |
dplyr::pull(id) | |
itcr_account_id <- ga_accounts %>% | |
dplyr::filter(name == "itcrtraining") %>% | |
dplyr::pull(id) | |
fhdsl_stats_list <- get_all_ga_metrics(account_id = fhdsl_account_id) | |
itcr_stats_list <- get_all_ga_metrics(account_id = itcr_account_id) | |
# There's some google analytics that aren't ITCR courses | |
not_itcr <- c("hutchdatasci", "whoiswho", "MMDS", "FH Cluster 101", "AnVIL_Researcher_Journey") | |
# Set up each data frame | |
ga_metrics <- dplyr::bind_rows(fhdsl_stats_list$metrics ,itcr_stats_list$metrics) %>% | |
dplyr::filter( | |
!(website %in%not_itcr) | |
) | |
saveRDS(ga_metrics, file.path(root_dir, "data","itcr_ga_metric_data.RDS")) | |
ga_dims <- dplyr::bind_rows(fhdsl_stats_list$dimensions, itcr_stats_list$dimensions) %>% | |
dplyr::filter( | |
!(website %in% not_itcr) | |
) | |
saveRDS(ga_dims, file.path(root_dir, "data","itcr_ga_dims_data.RDS")) | |
ga_link_clicks <- dplyr::bind_rows(fhdsl_stats_list$link_clicks,itcr_stats_list$link_clicks) %>% | |
dplyr::filter( | |
!(website %in% not_itcr) | |
) | |
saveRDS(ga_link_clicks, file.path(root_dir, "data","itcr_ga_link_click_data.RDS")) | |
manual_course_info <- googlesheets4::read_sheet( | |
"https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=1550012125", sheet = "Course_data", | |
col_types = "ccDDDciii") %>% | |
dplyr::mutate_if(is.numeric.Date, lubridate::ymd) | |
# Join this all together | |
itcr_course_data <- ga_metrics %>% | |
dplyr::left_join(manual_course_info) %>% | |
dplyr::mutate(website = dplyr::case_when( | |
website == "Advanced Reproducibility in Cancer Informatics" ~ "Advanced Reproducibility", | |
TRUE ~ website)) | |
# Save these to TSVs | |
readr::write_tsv(itcr_course_data, file.path(root_dir,"data", "itcr_course_metrics.tsv")) | |
collabs <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=0") | |
# Save this to a TSV | |
readr::write_tsv(collabs, file.path(root_dir, "data", "collabs.tsv")) | |
loqui_usage <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1G_HTU-bv2k5txExP8EH3ScUfGqtW1P3syThD84Z-g9k/edit#gid=0") | |
# Save this to a TSV | |
readr::write_tsv(loqui_usage, file.path(root_dir,"data", "loqui_usage.tsv")) | |
career_stage_counts <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1-8vox2LzkVKzhmSFXCWjwt3jFtK-wHibRAq2fqbxEyo/edit#gid=8290691", range = "Workshop attendee type") | |
readr::write_tsv(career_stage_counts, file.path(root_dir, "data", "career_stage_counts.tsv")) | |
repos <- c( | |
"fhdsl/metricminer", | |
"fhdsl/metricminer.org", | |
"jhudsl/ottrpal", | |
"jhudsl/ari", | |
"jhudsl/cow", | |
"jhudsl/ottrproject.org", | |
"jhudsl/ottr_docker", | |
"jhudsl/ottr-reports", | |
"fhdsl/conrad", | |
"jhudsl/text2speech", | |
"jhudsl/OTTR_Quizzes", | |
"jhudsl/OTTR_Template", | |
"jhudsl/OTTR_Template_Website", | |
"jhudsl/ITCR_Tables", | |
"jhudsl/ITN_Platforms", | |
"fhdsl/Choosing_Genomics_Tools", | |
"jhudsl/Informatics_Research_Leadership", | |
"jhudsl/Documentation_and_Usability", | |
"jhudsl/Reproducibility_in_Cancer_Informatics", | |
"jhudsl/Adv_Reproducibility_in_Cancer_Informatics", | |
"fhdsl/GitHub_Automation_for_Scientists", | |
"jhudsl/Computing_for_Cancer_Informatics", | |
"fhdsl/Overleaf_and_LaTeX_for_Scientific_Articles", | |
"fhdsl/Ethical_Data_Handling_for_Cancer_Research", | |
"fhdsl/AI_for_Decision_Makers", | |
"fhdsl/AI_for_Efficient_Programming", | |
"fhdsl/NIH_Data_Sharing" | |
) | |
gh_metrics <- get_multiple_repos_metrics(repo_names = repos) | |
saveRDS(gh_metrics, file.path(root_dir,"data", "itcr_gh_metrics.RDS")) | |
shell: Rscript {0} | |
- name: Commit data files | |
env: | |
GH_PAT: ${{ secrets.GH_PAT }} | |
run: | | |
git remote set-url origin https://${GH_PAT}@github.com/${GITHUB_REPOSITORY} | |
git fetch origin | |
git add --force data/* | |
git commit -m 'Refresh data' || echo "No changes to commit" | |
git pull --allow-unrelated-histories --strategy-option=ours | |
git push -u origin main || echo "No changes to push" |