Skip to content

Commit

Permalink
Improve gitignore, add stats script
Browse files Browse the repository at this point in the history
  • Loading branch information
jfy133 committed Aug 30, 2022
1 parent 9e1434b commit d14b0d8
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
Rplot001.pdf
playground/
.~lock*
AncientMetagenomeDir-*.zip
1 change: 0 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ and this project follows to [Calendar Versioning](https://calver.org/).
- Fixed Schuenemann2011 Put the accession ID of the full plasmid sequence not partial
- Fixed Martin2013 corrected archive name (incorrectly reported in publication)
- Fixed Seersholm2016 archive ENA (from incorrectly specified as SRA)
- Fixed Vagene2018 polymerase typo
- Fixed Eisenhofer2020 date rounding error (originally added 230 rather than 200)
- Fixed BravoLopez2020 typo in site name (Inmaculate -> Immaculate)
- Fixed Willmann2018 project_name (originally missing the second 'n')
Expand Down
90 changes: 90 additions & 0 deletions assets/analysis/release-stats-script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env R

library(tidyverse)
library(zen4R)

##############################
## Release Stats Generation ##
##############################


## Download previous release from Zenodo: https://doi.org/10.5281/zenodo.3980833

## Load previous release files
old_path <- "/home/jfellows/Downloads/AncientMetagenomeDir-v21.09/SPAAM-community-AncientMetagenomeDir-d9610d3/"

old_senv <- read_tsv(paste0(old_path, "ancientmetagenome-environmental/ancientmetagenome-environmental.tsv"))
old_shos <- read_tsv(paste0(old_path, "ancientmetagenome-hostassociated/ancientmetagenome-hostassociated.tsv"))
old_ssin <- read_tsv(paste0(old_path, "ancientsinglegenome-hostassociated/ancientsinglegenome-hostassociated.tsv"))

## after v22.09 onwards
#old_senv <- read_tsv(paste0(old_path, "ancientmetagenome-environmental/samples/ancientmetagenome-environmental_samples.tsv"))
#old_shos <- read_tsv(paste0(old_path, "ancientmetagenome-hostassociated/samples/ancientmetagenome-hostassociated_samples.tsv"))
#old_ssin <- read_tsv(paste0(old_path, "ancientsinglegenome-hostassociated/samples/ancientsinglegenome-hostassociated_samples.tsv"))
#old_lenv <- read_tsv(paste0(old_path, "ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries.tsv"))
#old_lhos <- read_tsv(paste0(old_path, "ancientmetagenome-hostassociated/libraries/ancientmetagenome-hostassociated_libraries.tsv"))
#old_lsin <- read_tsv(paste0(old_path, "ancientsinglegenome-hostassociated/libraries/ancientsinglegenome-hostassociated_libraries.tsv"))

## Load new release files

new_senv <- read_tsv("ancientmetagenome-environmental/samples/ancientmetagenome-environmental_samples.tsv")
new_shos <- read_tsv("ancientmetagenome-hostassociated/samples/ancientmetagenome-hostassociated_samples.tsv")
new_ssin <- read_tsv("ancientsinglegenome-hostassociated/samples/ancientsinglegenome-hostassociated_samples.tsv")
new_lenv <- read_tsv("ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries.tsv")
new_lhos <- read_tsv("ancientmetagenome-hostassociated/libraries/ancientmetagenome-hostassociated_libraries.tsv")
new_lsin <- read_tsv("ancientsinglegenome-hostassociated/libraries/ancientsinglegenome-hostassociated_libraries.tsv")

## Find differences

## New
print("PUBLICATIONS")

old_pubs <- c(old_senv$project_name, old_shos$project_name, old_ssin$project_name) %>% unique
new_pubs <- c(new_senv$project_name, new_shos$project_name, new_ssin$project_name) %>% unique

## DOESN'T REPORT REMOVED!
print(paste("New:", length(new_pubs), "Old:", length(old_pubs), "Gain:", length(new_pubs) - length(old_pubs)))

print("SAMPLES")
for ( i in c("senv", "shos", "ssin")) {
old_res <- paste0("old_", i)
new_res <- paste0("new_", i)

old_n <- nrow(eval(as.name(old_res)))
new_n <- nrow(eval(as.name(new_res)))

new_diff <- new_n - old_n

print(paste(old_res,":", old_n))
print(paste(new_res,":", new_n))
print(paste(i, "change:", new_diff))
}

print("LIBRARIES")
for ( i in c("lenv", "lhos", "lsin")) {
#old_res <- paste0("old_", i)
new_res <- paste0("new_", i)

#old_n <- nrow(eval(as.name(old_res)))
new_n <- nrow(eval(as.name(new_res)))

#new_diff <- new_n - old_n

#print(paste(old_res,":", old_n))
print(paste(new_res,":", new_n))
#print(paste("change:", new_diff))
}

### MESSAGE

##Release v22.09 includes 7 new publications, representing XXX new ancient host-associated metagenome samples, XX new ancient microbial genomes, and XX new ancient environmental samples.

##It adds XXX new new ancient host-associated metagenome libraries, XX new ancient microbial genome libraries, and XX new ancient environmental libraries.


##This brings the repository to a total of 132 publications, 1038 new ancient host-associated metagenome samples, 368 ancient microbial genomes, and 375 new ancient environmental samples.


## Load latest release

## Compare and print stats test

0 comments on commit d14b0d8

Please sign in to comment.